# Matplotlib and Quick intro to GeoPandas

Data visualization library that includes:
- Image plot, contour plots, scatter plots, line plots, 3D plots
- Variety of hardcopy formats
- Interactie environments

`matplotlib` has a vast number of functions. Don't worry, it is hard to remember all of them.

We can always guide our work by getting codes from here: https://matplotlib.org/stable/gallery/index.html





In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import geopandas
import pyproj
import rasterio
import rasterstats
import os
import json

As we saw in class, we can plot either by **the plt way** or **the fig and axes way**. 

In [None]:
x = np.random.randn(100,1)
y = np.random.randn(100,1)

the_array = np.concatenate([x,y],axis=1)

df = pd.DataFrame(the_array,columns=['X','Y'])

In the object oriented way (fig and axes), we edit the objects that we want

In [None]:
fig, ax = plt.subplots()  # Create a figure containing a single axes.

In [None]:
fig, ax = plt.subplots(2,2) 

In [None]:
fig, ax = plt.subplots(2,1) 

The syntax for plotting is: `ax.plot(data, linestyle, color, marker)` 

In [None]:
fig, ax = plt.subplots()  # Create a figure containing a single axes.
ax.plot(np.linspace(100,200,100), df['Y'])  # Plot some data on the axes.

In [None]:
fig, ax = plt.subplots()  # Create a figure containing a single axes.
ax.plot(np.linspace(100,200,100), df['Y'], linestyle="--",color="darkcyan",marker="p");


In [None]:
fig, ax = plt.subplots()  # Create a figure containing a single axes.
ax.plot(np.linspace(100,200,100), df['Y'], linestyle="--",color="darkcyan",marker="p");
ax.set_title("Title of the plot");
ax.set_xlabel("this is the x-axis label");
ax.set_ylabel("this is the y-axis label");


In [None]:
fig, axes = plt.subplots(2,1, figsize=(12,5))
axes[0].plot(np.linspace(100,200,100), df['Y'], color="darkred");
axes[1].hist(df['Y'], bins=10, color="darkcyan");
axes[0].set_title("Top Plot", fontsize=15);
axes[0].set_ylabel("Y label top");
axes[1].set_title("Bottom Plot", fontsize=15);
axes[1].set_ylabel("Y label bottom");
#fig.savefig("export_figure.pdf")

What if we need different data in the same plot:

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 6));
ax.plot(np.random.randn(100).cumsum(), linestyle="--", marker = ".", label="first");
ax.plot(np.random.randn(100).cumsum(), linestyle=":" , marker = "o", label="second");
ax.plot(np.random.randn(100).cumsum(), linestyle="-.", marker = "s", label="third");
ax.legend(loc="best", fontsize=15);
ax.set_title("multiple plots in one subplot", fontsize=18);

We can also add some text inside the subplot with:
- `ax.text(x, y, "text", fontsize)`: Inserts text into subplot
- `ax.annotate("text",xy,xytext, arrowprops)`: inserts an arrow with . Using ax.annotate() the arrow head points at xy and the bottom left corner of the text will be placed at xytext.

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 5))
ax.plot(np.random.randn(100).cumsum(), linestyle="--", marker = ".", label="first");
ax.plot(np.random.randn(100).cumsum(), linestyle=":" , marker = "o", label="second");
ax.plot(np.random.randn(100).cumsum(), linestyle="-.", marker = "s", label="third");
ax.legend(loc="best", fontsize=15);
ax.set_title("multiple plots in one subplot", fontsize=18)
ax.text(0,0, "here", fontsize=20)
ax.annotate("there",fontsize=20, xy=(0,0),xytext=(60,0),arrowprops=dict(facecolor="black"))

#### Best practice: an overall
1. Step 1: Create a figure object and subplots:
2. Step 2: Plot data
3. Step 3: Set colors, markers and line styles
4. Step 4: Set title, axis labels and ticks
5. Step 5: Add legend or other parameters
6. Step 6: Save plot.

In case that you want to read some data and plot it, it is the same intuition.

In [None]:
pwd

In [None]:
data = pd.read_csv('pyintro_resources/fivethirtyeight/alcohol-consumption/drinks.csv')

In [None]:
data.head()

In [None]:
data.sort_values(['total_litres_of_pure_alcohol'], ascending=False).head(10)

In [None]:
# Step 1:
fig, ax = plt.subplots(2,2, figsize=(12,7))

# Step 2 and 3: plot data, decide style, colors. 

ax[0,0].scatter(range(0,193),data['beer_servings'], color="lightcoral", alpha=0.5);
ax[0,1].plot(range(0,193),data['spirit_servings'],color="goldenrod", linewidth=3);
ax[1,0].plot(range(0,193),data['wine_servings'], color="skyblue", marker="+");
ax[1,1].plot(range(0,193),data['total_litres_of_pure_alcohol'], color="g", linestyle="--", label = "total litres");


# Step 4: set titles and others labels to the subfigures

ax[0,0].set_title("Scatter plot - Beer");
ax[0,1].set_title("Plot Spirit");
ax[1,0].set_title("Wine");
ax[1,1].set_title("Total litres");

# Step 5: Adding other parameters (Legends)

ax[1,1].legend(fontsize=10);
fig.suptitle('This is a long figure title', fontsize=16)
# Step 6: Saveplot:

fig.savefig("myplot.pdf")

## Okay, so that's neat, I guess. Put what the heck is on the x-axis??

Well... That's simply just the index for the country, so it is essentially meaningless. 

To draw some meaning, let's first look at some of the highest values.. 

In [None]:
#top 10 wine drinking countries in the world 
data[['country','wine_servings']].sort_values(by='wine_servings', ascending=False).head(10)
#Look at the wine graph above, we see a jump around the x-value 61 where France is

In [None]:
#Top 10 beer drinking countries in the world
data[['country','beer_servings']].sort_values(by='beer_servings', ascending=False).head(10)

In [None]:
data.columns

In [None]:
#Top 10 spirit drinking countries in the world
drink_geo[['country','spirit_servings']].sort_values(by='spirit_servings', ascending=False).head(10)

In [None]:
#Top 10 total liters of pure alcohol drinking countries in the world
drink_geo[['country','total_litres_of_pure_alcohol']].sort_values(by='total_litres_of_pure_alcohol', ascending=False).head(10)

### Ideally with countries, we would like to see them mapped given they are a geographic feature.

# Geopandas!

Main library to manage geospatial data: 
- Geopandas https://geopandas.org/index.html

In [None]:
pwd

In [None]:
the_world="pyintro_resources/Longitude_Graticules_and_World_Countries_Boundaries.geojson"
world=geopandas.read_file(the_world)

## Check that Coordinate Reference System (CRS)!

In [None]:
world.crs

In [None]:
world.head()

In [None]:
world.plot();

In [None]:
len(world)

In [None]:
len(data)

## Let's merge these two data together! 

In [None]:
#What about data types??
print(type(data))
print(type(world))

In [None]:
data.columns

In [None]:
world.columns

### We have to have something to merge on....

In [None]:
world_drink=data.merge(world, how='left',left_on='country', right_on='CNTRY_NAME')

In [None]:
len(world_drink)

In [None]:
world_drink.head()

In [None]:
world_drink.head()

In [None]:
type(world_drink)

In [None]:
drink_geo=geopandas.GeoDataFrame(world_drink, geometry=world_drink.geometry)

In [None]:
type(drink_geo)

In [None]:
drink_geo.plot()

Wait... Where is the USA & Russia??

Let's look at the orignial data sources to see what may have been going on.

In [None]:
#First, let's start with the US
world.loc[world['CNTRY_NAME'].str.contains("States")]

In [None]:
data.loc[data['country'].str.contains("USA")]

When we merged python did not know to treat USA and United States as the same country name. This is often resolved by having a country codes columns but we have to deal with just the names for now. Let's change one of the names and re-run some of what we did above 

In [None]:
#code to replace one of the country names 
data['country'].replace({'USA':"United States"}, inplace=True)

In [None]:
data.loc[data.country=='United States']

In [None]:
#Secondly, let's look with at Russia
world.loc[world['CNTRY_NAME'].str.contains("Russia")]

In [None]:
data.loc[data['country'].str.contains("Russia")]

When we merged python did not know to treat Russia and Russian Federation as the same country name. 

In [None]:
#code to replace one of the country names 
data['country'].replace({'Russian Federation':"Russia"}, inplace=True)

In [None]:
data.loc[data.country=='Russia']

In [None]:
#remerge
world_drink=data.merge(world, how='left',left_on='country', right_on='CNTRY_NAME')
#reconvert to geopandas
drink_geo=geopandas.GeoDataFrame(world_drink, geometry=world_drink.geometry)
#Plot
drink_geo.plot()

In [None]:
drink_geo.columns

In [None]:
f,a =plt.subplots(2,2)

In [None]:
drink_geo.plot()

### How to plot world beer consumption?

In [None]:
f,a=plt.subplots(figsize=(10,7))
drink_geo.plot(ax=a, column='beer_servings',legend=True, legend_kwds={'shrink': 0.3})
plt.axis("off")
plt.title("World Beer Consumption",fontsize=20)

## What about wine, spirits and pure alcohol??

### Let's slowly build a graph that contains all of this info..

In [None]:
fig,ax=plt.subplots(2,2, figsize=(25,7));
drink_geo.plot(ax=ax[0,0], column='beer_servings',legend=True, legend_kwds={'shrink': 0.75});
ax[0,0].set_title('World Beer Consumption');
ax[0,0].axis('off');

In [None]:
drink_geo.columns

In [None]:
fig,ax=plt.subplots(2,2, constrained_layout=True, figsize=(25,15));
#Beer 
drink_geo.plot(ax=ax[0,0], column='beer_servings',legend=True, legend_kwds={'shrink': 0.5});
ax[0,0].set_title('Beer Consumption',fontsize=30);
ax[0,0].axis('off');
#Spritis
drink_geo.plot(ax=ax[0,1], column='spirit_servings',legend=True, legend_kwds={'shrink': 0.5});
ax[0,1].set_title('Spirit servings',fontsize=30);
ax[0,1].axis('off');



In [None]:
fig,ax=plt.subplots(2,2, constrained_layout=True, figsize=(25,15));
#Beer 
drink_geo.plot(ax=ax[0,0], column='beer_servings',legend=True, legend_kwds={'shrink': 0.5});
ax[0,0].set_title('Beer Consumption',fontsize=30);
ax[0,0].axis('off');
#Spritis
drink_geo.plot(ax=ax[0,1], column='spirit_servings',legend=True, legend_kwds={'shrink': 0.5});
ax[0,1].set_title('Spirit servings',fontsize=30);
ax[0,1].axis('off');
#Wine
drink_geo.plot(ax=ax[1,0], column='wine_servings',legend=True, legend_kwds={'shrink': 0.5});
ax[1,0].set_title('Wine servings',fontsize=30);
ax[1,0].axis('off');
#Pure alcohol
drink_geo.plot(ax=ax[1,1], column='total_litres_of_pure_alcohol',legend=True, legend_kwds={'shrink': 0.5});
ax[1,1].set_title('Total liters of Pure Alcohol',fontsize=30);
ax[1,1].axis('off');
plt.suptitle('World Alcohol Consumption by type',fontsize=50);


# It is kind of bland with all of the same colors
(could be good that way as well)

Different colored choropleth maps options can be found at this [link](https://matplotlib.org/stable/tutorials/colors/colormaps.html)

In [None]:
fig,ax=plt.subplots(2,2, constrained_layout=True, figsize=(25,15));
#Beer 
drink_geo.plot(ax=ax[0,0], column='beer_servings',legend=True, legend_kwds={'shrink': 0.5}, cmap='Purples');
ax[0,0].set_title('Beer Consumption',fontsize=30);
ax[0,0].axis('off');
#Spritis
drink_geo.plot(ax=ax[0,1], column='spirit_servings',legend=True, legend_kwds={'shrink': 0.5}, cmap='Greens');
ax[0,1].set_title('Spirit servings',fontsize=30);
ax[0,1].axis('off');
#Wine
drink_geo.plot(ax=ax[1,0], column='wine_servings',legend=True, legend_kwds={'shrink': 0.5}, cmap='Reds');
ax[1,0].set_title('Wine servings',fontsize=30);
ax[1,0].axis('off');
#Pure alcohol
drink_geo.plot(ax=ax[1,1], column='total_litres_of_pure_alcohol',legend=True, legend_kwds={'shrink': 0.5}, cmap='Oranges');
ax[1,1].set_title('Total liters of Pure Alcohol',fontsize=30);
ax[1,1].axis('off');
plt.suptitle('World Alcohol Consumption by type',fontsize=50);


### Are different kind of alcohol compliments or substitutes?

In [None]:
corr_drink=drink_geo[['beer_servings','spirit_servings','wine_servings']]

In [None]:
corr_drink.corr()