# More Matplotlib

The goal here is to replicate the figures you see as closely as possible.

In order to get some data, you will have to run the code in the cells below. **Don't worry about how this code works.** In the end, it will give you some numpy arrays, which you will use in your plots.

### Problem 1: Line plots

In this problem, we will plot some daily weather data from a NOAA station in [Millbrook, NY](https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/GHCND:US1NYDT0008/detail). A full description of this dataset is available at: <https://www.ncdc.noaa.gov/data-access/land-based-station-data>

The cell below uses pandas to download the data and populate a bunch of numpy arrays (`t_daily_min`, `t_daily_max`, etc.) Run the cell and then use the numpy arrays to try to re-create the plot you see.

![figure](https://earth-env-data-science.github.io/_images/fig1.png)

In [None]:
import pooch
POOCH = pooch.create(
    path=pooch.os_cache("noaa-data"),
    # Use the figshare DOI
    base_url="doi:10.5281/zenodo.5553029/",
    registry={
        "HEADERS.txt": "md5:2a306ca225fe3ccb72a98953ded2f536",
        "CRND0103-2016-NY_Millbrook_3_W.txt": "md5:eb69811d14d0573ffa69f70dd9c768d9",
        "CRND0103-2017-NY_Millbrook_3_W.txt": "md5:b911da727ba1bdf26a34a775f25d1088",
        "CRND0103-2018-NY_Millbrook_3_W.txt": "md5:5b61bc687261596eba83801d7080dc56",
        "CRND0103-2019-NY_Millbrook_3_W.txt": "md5:9b814430612cd8a770b72020ca4f2b7d",
        "CRND0103-2020-NY_Millbrook_3_W.txt": "md5:cd8de6d5445024ce35fcaafa9b0e7b64"
    },
)


import pandas as pd

with open(POOCH.fetch("HEADERS.txt")) as fp:
    data = fp.read()
lines = data.split('\n')
headers = lines[1].split(' ')

dframes = []
for year in range(2016, 2019):
    fname = f'CRND0103-{year}-NY_Millbrook_3_W.txt'               
    df = pd.read_csv(POOCH.fetch(fname), parse_dates=[1],
                     names=headers, header=None, sep='\s+',
                     na_values=[-9999.0, -99.0])
    dframes.append(df)

df = pd.concat(dframes)
df = df.set_index('LST_DATE')
df

#########################################################
#### BELOW ARE THE VARIABLES YOU SHOULD USE IN THE PLOTS!
#### (numpy arrays)  
#### NO PANDAS ALLOWED!
#########################################################

t_daily_min = df.T_DAILY_MIN.values
t_daily_max = df.T_DAILY_MAX.values
t_daily_mean = df.T_DAILY_MEAN.values
p_daily_calc = df.P_DAILY_CALC.values
soil_moisture_5 = df.SOIL_MOISTURE_5_DAILY.values
soil_moisture_10 = df.SOIL_MOISTURE_10_DAILY.values
soil_moisture_20 = df.SOIL_MOISTURE_20_DAILY.values
soil_moisture_50 = df.SOIL_MOISTURE_50_DAILY.values
soil_moisture_100 = df.SOIL_MOISTURE_100_DAILY.values
date = df.index.values

In [None]:
units = lines[2].split(' ')
for name, unit in zip(headers, units):
    print(f'{name}: {unit}')

In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt

#make figure and subplots
fig, axes = plt.subplots(ncols=1, nrows=3, figsize=(12,12), sharex=True)

#name subplots
ax0,ax1,ax2 = axes

#subplot 1

#plot the mean
ax0.plot(df.index, df.T_DAILY_MEAN, color = 'm')
#fill the max and min with upper bound = max and lower = min
ax0.fill_between(df.index, df.T_DAILY_MAX, df.T_DAILY_MIN, color='lightgray')
#set axis limit to first and last date
ax0.set_xlim(df.index[0], df.index[-1])

# Add labels
ax0.set_ylabel('Temperature in Celcius')
ax0.legend(['daily mean','daily range'])


#subplot 2

#plot the precipitation
ax1.plot(df.index, df.P_DAILY_CALC, color='royalblue' )
ax1.set_ylim(df.P_DAILY_CALC.min(), df.P_DAILY_CALC.max())

# Add labels
ax0.set_ylabel('Precipitation in mm')



#subplot 3

#plots for each soil variables
ax2.plot(df.index, df.SOIL_MOISTURE_5_DAILY, color='blue')
ax2.plot(df.index, df.SOIL_MOISTURE_10_DAILY, color='orange')
ax2.plot(df.index, df.SOIL_MOISTURE_20_DAILY, color='green')
ax2.plot(df.index, df.SOIL_MOISTURE_50_DAILY, color='red')
ax2.plot(df.index, df.SOIL_MOISTURE_100_DAILY, color='magenta')

# Add labels
ax2.set_ylabel('Soil Moisture in m^3/m^3')
ax2.legend(['5 cm','10 cm','20 cm','50 cm','100 cm'])


plt.tight_layout()
plt.show()

### Problem 2: Contour Plots

Now we will visualize some global temperature data from the NCEP-NCAR atmospheric reanalysis.
![fig2](https://earth-env-data-science.github.io/_images/fig2.png)

In [None]:
import xarray as xr
ds_url = 'http://iridl.ldeo.columbia.edu/SOURCES/.NOAA/.NCEP-NCAR/.CDAS-1/.MONTHLY/.Diagnostic/.surface/.temp/dods'
ds = xr.open_dataset(ds_url, decode_times=False)

#########################################################
#### BELOW ARE THE VARIABLES YOU SHOULD USE IN THE PLOTS!
#### (numpy arrays) 
#### NO XARRAY ALLOWED!
#########################################################

temp = ds.temp[-1].values - 273.15
lon = ds.X.values
lat = ds.Y.values

In [None]:
import numpy as np

mesh_lon, mesh_lat = np.meshgrid(lon,lat)

#make figure and subplots
fig = plt.figure(figsize=(15,8))

#make subplots with diff figure sizes
ax0 = plt.subplot2grid(shape=(2, 3), loc=(0, 0), rowspan=2, colspan=2) #first row and first col spanning 2 cols and 2 rows
ax1 = plt.subplot2grid(shape=(1, 3), loc=(0,2), rowspan=2, colspan=1) #first row, 3rd col spaning 2 rowa and one column


#subplot 1
# Create a shaded contour plot
contour = ax0.contourf(mesh_lon, mesh_lat, temp, levels=np.linspace(-30, 40, 60), extend='both', cmap='magma')

# Add a white contour for temperature values of 0 degrees
ax0.contour(mesh_lon, mesh_lat, temp, levels=[0], colors='white') 

# Add a colorbar
cbar = plt.colorbar(contour, shrink=0.5)
cbar.set_ticks(np.arange(-30,41,10)) #set the ticks for temp scale
cbar.set_label('\u00b0 C')

# Add labels and a title
ax0.set_xlabel('Longitude')
ax0.set_ylabel('Latitude')
ax0.set_title('Current Global Temperature')


#subplot 2

#lat requires a column-wise operation
zonal_mean_temp = np.nanmean(temp, axis=1) #mean temp for each lat

ax1.plot(zonal_mean_temp, lat, color='black')
ax1.set_yticks(np.arange(-80, 81, 20)) #from - 80 to 80 with 20 steps between
ax1.set_xticks(np.arange(-60, 21, 20)) #from -60 to 20 with 20 steps between

ax1.set_xlim(-65) #setting lower limit of x_axis

#set up grid
ax1.grid(which='major')

# Display the plot
plt.tight_layout
plt.show()


### Problem 3: Scatter plots
Here we will make a map plot of earthquakes from a USGS catalog of historic large earthquakes. Color the earthquakes by log10(depth) and adjust the marker size to be magntiude$^4$/100

![fig3](https://earth-env-data-science.github.io/_images/fig3.png)

In [None]:
fname = pooch.retrieve(
    "https://rabernat.github.io/research_computing/signif.txt.tsv.zip",
    known_hash='22b9f7045bf90fb99e14b95b24c81da3c52a0b4c79acf95d72fbe3a257001dbb',
    processor=pooch.Unzip()
)[0]

earthquakes = np.genfromtxt(fname, delimiter='\t')
depth = earthquakes[:, 8]
magnitude = earthquakes[:, 9]
latitude = earthquakes[:, 20]
longitude = earthquakes[:, 21]

In [None]:
#set figure size
plt.figure(figsize=(15,7))

#set size for magnitudes of earthquakes
size = (magnitude**4)/100

#create array of log10 of depth
log_depth = np.log10(depth)

#make scatter plot
scatter = plt.scatter(longitude,latitude, s=size, c=log_depth)

#create colorbar
cbar = plt.colorbar(scatter, format="10$^%d$") #format colorbar for 10^x)
cbar.set_ticks([0,1,2]) #set ticks to be whoell numbers

# Set colorbar label
cbar.set_label('Depth [m]') 

#make gride of major values
plt.grid(which='major')
plt.show()