# Chapter 7 - Example: Atmospheric Data 
### Analyze wind at 10m for a given month

In this chapter we exemplify the use of an atmospheric data set, in this wind vectors at 10m from reanalysis dataset ERA-5. We characterize its variability over a given region, and calculate linear trends.

[ERA-5 (ECMWF)](https://registry.opendata.aws/ecmwf-era5/) reanalysis incorporates satellite and in-situ data, and its output variables include ocean, land and atmospheric ones. Therefore, this script can be easily modified for other data than wind. 

In [None]:
# libraries
import warnings
warnings.simplefilter('ignore') # filter some warning messages

import numpy as np
import pandas as pd
import xarray as xr
from calendar import monthrange
import matplotlib.pyplot as plt 
import hvplot.pandas
import hvplot.xarray
import fsspec
import s3fs
import dask
from dask.distributed import performance_report, Client, progress
import os

***
## Select the region, month and years to analyze

In [None]:
# Select region by defining latitude and longitude range. ERA-5 data has a 1/4 degree resolution. 
latr = [39, 40] # Latitude range. Make sure lat1 > lat2 since no test is done below to simplify the code. resolution 0.25 degrees
lonr = [-125, -123] # lon1 > lon2. and use the range -180 : 180
# time selection
mon = 5 # month to analyze
iyr = 1979 # you can select the initial year. by default, we set it to the start year of ERA5 dataset
fyr = 2021 # you can select the final year. by default, we set it to the end year of ERA5 dataset


## Obtain data from the AWS cloud

In this case, files are stored in a different format (per year, per month), so files have to be accessed individually.

In [None]:
tdt = list() # list tot store time range

# v meridional component
for iy, y in enumerate(range(iyr, fyr+1)):
    file_location = 'https://era5-pds.s3.us-east-1.amazonaws.com/zarr/'+str(y)+'/'+str(mon).zfill(2)+'/data/northward_wind_at_10_metres.zarr'
    # filename includes: bucket name: era5-pds, year: y (transformed to string type), month: mon, and the name of the variable with extenssion zarr
    ds = xr.open_zarr(file_location,consolidated=True) # open access to data

    # generate time frame to obtain (first to last day of selected month)
    dte1 = str(y)+'-'+str(mon).zfill(2)+'-01'
    dte2 = str(y)+'-'+str(mon).zfill(2)+'-'+str(monthrange(y, mon)[1])
    # select data region and time - meridional wind
    vds = ds['northward_wind_at_10_metres'].sel(time0 = slice(dte1,dte2),
                                            lat  = slice(latr[1],latr[0],), 
                                            lon  = slice(lonr[0]+360,lonr[1]+360)
                                           ).mean(axis=0).load()
    if iy==0: # if the first year, create an array to sttore data
        v10_dt = np.full((len(range(iyr, fyr+1)),vds.shape[0],vds.shape[1]), np.nan)
    v10_dt[iy,:,:] = vds.data # store selected data
    
# u component
for iy, y in enumerate(range(iyr, fyr+1)):
    file_location = 'https://era5-pds.s3.us-east-1.amazonaws.com/zarr/'+str(y)+'/'+str(mon).zfill(2)+'/data/eastward_wind_at_10_metres.zarr'
    ds = xr.open_zarr(file_location,consolidated=True)

    # look at the datarray structure, description and attributes
    dte1 = str(y)+'-'+str(mon).zfill(2)+'-01'
    dte2 = str(y)+'-'+str(mon).zfill(2)+'-'+str(monthrange(y, mon)[1])
    uds = ds['eastward_wind_at_10_metres'].sel(time0 = slice(dte1,dte2),
                                            lat  = slice(latr[1],latr[0],), 
                                            lon  = slice(lonr[0]+360,lonr[1]+360)
                                           ).mean(axis=0).load()

    if iy==0: # if the first year, create an array to sttore data
        u10_dt = np.full((len(range(iyr, fyr+1)),uds.shape[0],uds.shape[1]), np.nan)
    u10_dt[iy,:,:] = uds.data # store selected data
    
    # build time list
    tdt.append(str(y)+'-'+str(mon).zfill(2)+'-01') # build time list
    
os.remove('ERA5_wind10m_mon'+str(mon).zfill(2)+'.nc') # delete lastt file, to clear space

In [None]:
# build a dataset from the selected data
mw10 = xr.Dataset(data_vars=dict(u10m=(['time','lat','lon'],u10_dt),
                                 v10m=(['time','lat','lon'],v10_dt), ),
                    coords=dict(time=tdt,lat=vds.lat.values, lon=vds.lon.values-360),attrs=vds.attrs) 
# add wind speed
mw10['wsp10m'] = np.sqrt(mw10.u10m**2+mw10.v10m**2) # calculate wind speed
mw10.to_netcdf('ERA5_wind10m_mon'+str(mon).zfill(2)+'.nc') # saving the file for a future use, so we don't have to get data again
mw10 # taking a peek


## Plotting the data

As before, there is a simple way to plot the data, and also a way to make the plot ready for sharing or publication.

In [None]:
# simple plot of data, using the matplotlib function quiver to plot vectors
x,y = np.meshgrid(mw10.lon,mw10.lat) # generate an grid for the vectors, for plotting
plt.quiver(x, y, mw10.u10m[0,:,:], mw10.v10m[0,:,:]) 
plt.show()

In [None]:
# now a more presentable plot
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import cartopy.feature as cfeature
import cartopy.crs as ccrs
from calendar import month_abbr

# select a region of our data
margin = 0.5 # extra space for the plot
region = np.array([[latr[0]-margin,latr[1]+margin],[lonr[0]-margin,lonr[1]+margin]]) # numpy array that specifies the lat/lon boundaries of our selected region

# create and set the figure context
fig = plt.figure(figsize=(8,5)) # create a figure object, and assign it a variable name fig
ax = plt.axes(projection=ccrs.PlateCarree()) # projection type - this one is easy to use
ax.coastlines(resolution='50m',linewidth=2,color='black') 
ax.add_feature(cfeature.LAND, color='grey', alpha=0.3)
ax.set_extent([region[1,0],region[1,1],region[0,0],region[0,1]],crs=ccrs.PlateCarree()) 
ax.set_xticks([*np.arange(region[1,0],region[1,1]+1,1)], crs=ccrs.PlateCarree()) # customize ticks and labels to longitude
ax.set_yticks([*np.arange(region[0,0],region[0,1]+1,1)], crs=ccrs.PlateCarree()) # customize ticks and labels to latitude
ax.xaxis.set_major_formatter(LongitudeFormatter(zero_direction_label=True))
ax.yaxis.set_major_formatter(LatitudeFormatter())

# plot average wind for the selected month, color is the wind speed
plt.quiver(x, y, mw10.u10m.mean(axis=0), mw10.v10m.mean(axis=0),mw10.wsp10m.mean(axis=0), cmap='jet')
cbar=plt.colorbar()
cbar.set_label('m/s') # color bar label
plt.title('Wind for '+month_abbr[mon]+' ('+str(iyr)+'-'+str(fyr)+')')
#fig.savefig('./figures/map_base_'+moname+'.png') # save your figure by usinig the method .savefig. python recognized the format from the filename extension. 
plt.show()

*** 
## To analyze the data in time, this time we'll select one point in space. 

For spatial averages use .mean(axis=(1,2)) on the variables.

In [None]:
print('Latitude values: ', mw10.lat.values)
print('Longitude values: ',mw10.lon.values)

In [None]:
# select a point from the range of latitude and longitude values
slat = 39 # selected latitude
slon = -124 # selected longitude

In [None]:
# Select data for an specific location, and do a simple plot of each component and the wind speed
# meridional wind change
plt.plot(range(iyr,fyr+1),mw10.v10m.sel(lat=slat,lon=slon), 'bd-',zorder=2)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Meridional wind (v), Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)
plt.show()

# zonal wind change
plt.plot(range(iyr,fyr+1),mw10.u10m.sel(lat=slat,lon=slon), 'go-',zorder=2)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Zonal wind (u), Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)
plt.show()

# wind speed change
plt.plot(range(iyr,fyr+1), mw10.wsp10m.sel(lat=slat,lon=slon), 's-',c='darkorange',zorder=2)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Wind speed, Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)
plt.show()

## Calculate a linear trend using a linear regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
import statsmodels.api as sm

var='v10m'

x = np.array([*range(iyr,fyr+1)]).reshape(-1,1) # we generate an array of years, and transpose it by using .reshape(-1,1)
y = mw10[var].sel(lat=slat,lon=slon).values.reshape(-1,1) # selected variable
polf = PolynomialFeatures(1) # linear regression (order=1)
xp = polf.fit_transform(x) # generate a array with the years and a dummy / constant variable
mods = sm.OLS(y,xp).fit() # calculate regression model, stored in mods
print(mods.summary()) 

# Resources