# Chapter 7 - Example: Atmospheric Data 
### Analyze monthy wind at 10m for a selected region

In this chapter, we exemplify the use of an atmospheric/climate data set, the reanalysis dataset ERA-5, to analyze change in wind vectors at 10m. We characterize its variability over a given region, plot the field and calculate linear trends.

[ERA-5 (ECMWF)](https://registry.opendata.aws/ecmwf-era5/) reanalysis incorporates satellite and in-situ data, and its output variables include ocean, land and atmospheric ones. Therefore, this script can be easily modified for other data. 

In [None]:
# Libraries
import warnings

warnings.simplefilter("ignore")  # filter some warning messages

import os  # library to interact with the operating system
from calendar import (
    month_abbr,  # function that gives you the abbreviated name of a month
)
from calendar import monthrange  # gives the number of day in a month

import dask
import fsspec
import hvplot.pandas
import hvplot.xarray
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import s3fs
import xarray as xr
from dask.distributed import Client, performance_report, progress

xr.set_options(keep_attrs=True)

In [None]:
import s3fs


@dask.delayed
def s3open(path):
    fs = s3fs.S3FileSystem(
        anon=True, default_fill_cache=False, config_kwargs={"max_pool_connections": 20}
    )
    return s3fs.S3Map(path, s3=fs)

***
## For this example we select a region, and also a specific month and a range of years to analyze

In [None]:
# Select region by defining latitude and longitude range. ERA-5 data has a 1/4 degree resolution.
latr = [
    39,
    40,
]  # Latitude range. Make sure lat1 > lat2 since no test is done below to simplify the code. resolution 0.25 degrees
lonr = [-125, -123]  # lon1 > lon2. and use the range -180 : 180
# time selection
mon = 5  # month to analyze
start_year = 1979  # you can select the initial year. by default, we set it to the start year of ERA5 dataset
end_year = 2020  # you can select the final year. by default, we set it to the end year of ERA5 dataset

speed_attributes = {
    "long_name": "10 metre wind speed",
    "nameCDM": "10_metre_wind_speed_surface",
    "nameECMWF": "10 metre wind speed",
    "product_type": "analysis",
    "shortNameECMWF": "10m",
    "standard_name": "wind_speed",
    "units": "m s**-1",
}

***
## Acquire data from the AWS cloud

In this case, files are stored in a different format than SST. Now they are monthly files (of daily data) organized by yearly folders. Then, files have to be accessed individually.

In [None]:
%%time

file_pattern = "era5-pds/zarr/{year}/{month}/data/{var}.zarr/"
years = list(np.arange(start_year, end_year + 1, 1))
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
var_names = ["northward_wind_at_10_metres", "eastward_wind_at_10_metres"]

# create empty list that can be used to store data
ds_era = []

# loop over variables to read
for var in var_names:
    # Get files
    files_mapper = [
        s3open(file_pattern.format(year=year, month=month, var=var))
        for year in years
        for month in months
    ]

    # read in zarr data
    ds = xr.open_mfdataset(
        files_mapper,
        engine="zarr",
        concat_dim="time0",
        combine="nested",
        coords="minimal",
        compat="override",
        parallel=True,
    )

    # re-order latitudes and resample to month
    ds = ds.sortby(ds.lat)  # conform to lat -90 to 90

    ds_month = ds.resample(time0="1M").mean(dim="time0")
    ds_era.append(ds_month)

ds_era = xr.merge(ds_era)

# calculate the wind speed and add attributes to this new variable
ds_era["wind_speed"] = np.sqrt(
    ds_era.northward_wind_at_10_metres ** 2 + ds_era.eastward_wind_at_10_metres ** 2
)
ds_era["wind_speed"].attrs = speed_attributes

In [None]:
# Build a dataset from the selected data. not only a data array since we have 2 variables of the vector
# mw10 = xr.Dataset(
#    data_vars=dict(
#        u10m=(["time", "lat", "lon"], u10_dt),
#        v10m=(["time", "lat", "lon"], v10_dt),
#    ),
#    coords=dict(time=tdt, lat=vds.lat.values, lon=vds.lon.values - 360),
#    attrs=vds.attrs,
# )
# Add a wind speed variable
# mw10["wsp10m"] = np.sqrt(mw10.u10m ** 2 + mw10.v10m ** 2)  # calculate wind speed
# mw10.to_netcdf(
#    "ERA5_wind10m_mon" + str(mon).zfill(2) + ".nc"
# )  # saving the file for a future use, so we don't have to get data again

mw10 = ds_era.sel(
    lat=slice(latr[0], latr[1]), lon=slice(lonr[0] + 360, lonr[1] + 360)
)  # taking a peek

mw10  # taking a peek

***
## Plotting the data

As before, there is a simple way to plot the data for quick inspection, and also a way to make the plot ready for sharing or publication.

In [None]:
# simple plot of data, using the matplotlib function quiver to plot vectors
x, y = np.meshgrid(mw10.lon, mw10.lat)  # generate an lat/lon grid to plot the vectors
plt.quiver(x, y, mw10.u10m[0, :, :], mw10.v10m[0, :, :])

In [None]:
# Now a more presentable plot
from calendar import month_abbr

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.ticker import LatitudeFormatter, LongitudeFormatter

# Select a region of our data, giving it a margin
margin = 0.5  # extra space for the plot
region = np.array(
    [[latr[0] - margin, latr[1] + margin], [lonr[0] - margin, lonr[1] + margin]]
)  # numpy array that specifies the lat/lon boundaries of our selected region

# Create and set the figure context
fig = plt.figure(
    figsize=(8, 5)
)  # create a figure object, and assign it a variable name fig
ax = plt.axes(
    projection=ccrs.PlateCarree()
)  # projection type - this one is easy to use
ax.coastlines(resolution="50m", linewidth=2, color="black")
ax.add_feature(cfeature.LAND, color="grey", alpha=0.3)
ax.set_extent(
    [region[1, 0], region[1, 1], region[0, 0], region[0, 1]], crs=ccrs.PlateCarree()
)
ax.set_xticks(
    [*np.arange(region[1, 0], region[1, 1] + 1, 1)], crs=ccrs.PlateCarree()
)  # customize ticks and labels to longitude
ax.set_yticks(
    [*np.arange(region[0, 0], region[0, 1] + 1, 1)], crs=ccrs.PlateCarree()
)  # customize ticks and labels to latitude
ax.xaxis.set_major_formatter(LongitudeFormatter(zero_direction_label=True))
ax.yaxis.set_major_formatter(LatitudeFormatter())

# Plot average wind for the selected month, color is the wind speed
plt.quiver(
    x,
    y,
    mw10.northward_wind_at_10_metres[0, :, :],
    mw10.eastward_wind_at_10_metres[0, :, :],
    mw10.wind_speed[0, :, :],
    cmap="jet",
)
cbar = plt.colorbar()
cbar.set_label("m/s")  # color bar label
plt.title(
    "Wind for " + month_abbr[mon] + " (" + str(start_year) + "-" + str(end_year) + ")"
)
# fig.savefig('filename') # save your figure by usinig the method .savefig. python recognized the format from the filename extension.

*** 
## To analyze the data in time, we select only one point in space. 
But if you want to analyze the entire field, you can:
- Average spatially using .mean(axis=(1,2)) on the variables
- Repeat the analysis for each point (using a `for` loop)
- Use `xarray` methods to apply a function to the array

In [None]:
print("Latitude values: ", mw10.lat.values)
print("Longitude values: ", mw10.lon.values)

In [None]:
# select a point from the range of latitude and longitude values above
slat = 39  # selected latitude
slon = -124  # selected longitude
subset = mw10.sel(lat=slat, lon=slon + 360).load()  # load data so we can run analytics

# calculate annual averages
subset_year = subset.resample(time0="1Y").mean(dim="time0")

subset

In [None]:
# Select data for an specific location, and do a simple plot of each variable
plt.figure(figsize=(12, 8))

# meridional wind change
plt.subplot(2, 2, 1)
plt.plot(subset.time0, subset.northward_wind_at_10_metres, "bd-", zorder=1)
plt.plot(subset_year.time0, subset_year.northward_wind_at_10_metres, "rd-", zorder=2)
plt.axhline(y=0, c="k", alpha=0.4)
plt.ylabel("Wind speed (m/s)")
plt.title("Meridional wind (v), Lat=" + str(slat) + ", Lon=" + str(slon))
plt.grid(zorder=0)

# zonal wind change
plt.subplot(2, 2, 2)
plt.plot(subset.time0, subset.eastward_wind_at_10_metres, "go-", zorder=1)
plt.plot(subset_year.time0, subset_year.eastward_wind_at_10_metres, "rd-", zorder=3)
plt.axhline(y=0, c="k", alpha=0.4)
plt.ylabel("Wind speed (m/s)")
plt.title("Zonal wind (u), Lat=" + str(slat) + ", Lon=" + str(slon))
plt.grid(zorder=0)

# wind speed change
plt.subplot(2, 2, 3)
plt.plot(subset.time0, subset.wind_speed, "s-", c="darkorange", zorder=1)
plt.plot(subset_year.time0, subset_year.wind_speed, "rd-", zorder=2)
plt.axhline(y=0, c="k", alpha=0.4)
plt.ylabel("Wind speed (m/s)")
plt.title("Wind speed, Lat=" + str(slat) + ", Lon=" + str(slon))
plt.grid(zorder=0)

plt.tight_layout()

***
## Now, let's calculate the temporal trend on one of the wind variables, using a first degree linear regression 

In [None]:
%%time
results = subset.polyfit(dim='time0',deg=1)
trend = xr.polyval(subset.time0,results)

## Plot data again with trends

In [None]:
# Select data for an specific location, and do a simple plot of each variable
plt.figure(figsize=(12,8))

# meridional wind change
plt.subplot(2,2,1)
plt.plot(subset.time0,subset.northward_wind_at_10_metres, 'bd-', zorder=1)
plt.plot(subset_year.time0,subset_year.northward_wind_at_10_metres, 'rd-',zorder=2)
plt.plot(trend.time0,trend.northward_wind_at_10_metres_polyfit_coefficients, 'm',zorder=2,lw=4)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Meridional wind (v), Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)

# zonal wind change
plt.subplot(2,2,2)
plt.plot(subset.time0,subset.eastward_wind_at_10_metres, 'go-',zorder=1)
plt.plot(subset_year.time0,subset_year.eastward_wind_at_10_metres, 'rd-',zorder=3)
plt.plot(trend.time0,trend.eastward_wind_at_10_metres_polyfit_coefficients, 'm',zorder=2,lw=4)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Zonal wind (u), Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)

# wind speed change
plt.subplot(2,2,3)
plt.plot(subset.time0,subset.wind_speed, 's-',c='darkorange',zorder=1)
plt.plot(subset_year.time0,subset_year.wind_speed, 'rd-',zorder=2)
plt.plot(trend.time0,trend.wind_speed_polyfit_coefficients, 'm',zorder=2,lw=4)
plt.axhline(y=0,c='k', alpha=0.4)
plt.ylabel('Wind speed (m/s)')
plt.title('Wind speed, Lat='+str(slat)+', Lon='+str(slon))
plt.grid(zorder=0)

plt.tight_layout()

***
# Resources
**Data**
- AWS [ERA-5 (ECMWF)](https://registry.opendata.aws/ecmwf-era5/) reanalysis data.
This page also has links to other tutorials that use other libraries.
- [List of data available](https://github.com/planet-os/notebooks/blob/master/aws/era5-pds.md) on ERA5 and details on how the files are organized.
- Google Earth Engine ERA-5 data. [[Monthly]](https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_MONTHLY#bands) [[Daily]](https://developers.google.com/earth-engine/datasets/catalog/ECMWF_ERA5_DAILY).

**More on the libraries:**
- [xarray apply](https://www.programcreek.com/python/example/123575/xarray.apply_ufunc) Examples on how to apply a function to an xarray structure
- [sckit-learn (sklearn)](https://scikit-learn.org/stable/) a library for machine learning functions
- [statsmodels](https://www.statsmodels.org/stable/user-guide.html) a library to calculalte statistical models.


