# Chapter 6 - Ocean Data Example
### Quantify area of ocean temperature above a given threshold

In this chapter we exemplify the use of Sea Surface Temperature (SST) data by analyzing an area of the ocean and determining the percentage of area above a given threshold. This could be used to study marine heatwaves, or use a threshold relevant for a marine species of interest.

In [None]:
# libraries
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt 
import hvplot.pandas
import hvplot.xarray
import fsspec
import s3fs
import dask
from dask.distributed import performance_report, Client, progress
xr.set_options(display_style="html")  #display dataset nicely

# this library helps to make your code execution less messy
import warnings
warnings.simplefilter('ignore') # filter some warning messages

In [None]:
# input parameters
latr = 20.5 # lat1 > lat2
lonr = -158.2 # lon1 > lon2
# time frame
dater = ['2013-01-01','2019-12-31'] # dates on the format 'YYYY-MM-DD' as string


***
### Read data from the Cloud

In [None]:
# read the cloud data - metadata, and set the access 'link' (variable)
# https://registry.opendata.aws/mur/
# s3://mur-sst/zarr/
# look at the data, description and attributes
ds_sst = xr.open_zarr('https://mur-sst.s3.us-west-2.amazonaws.com/zarr-v1',consolidated=True)
ds_sst
# click on ... for information

In [None]:
# load the data - only the selected one
sst = ds_sst['analysed_sst'].sel(time = slice(dater[0],dater[1]),
                                            lat  = latr,
                                            lon  = lonr
                                           ).load()

sst = sst-273.15 # into degrees Celsius
sst.to_netcdf('sst_example.nc')
sst.plot()

***
### Read the data from a file - either no access to cloud, or not want to keep reading

In [None]:
# in the meantime....
sst = xr.open_dataset('./sst_example.nc') # read a netcdf
sst.close()
sst = sst.analysed_sst

***
### Let's plot the data, with two different libraries.
#### 1. matplotlib, that we already learn. static nice figures.
#### 2. hovplot, a more interactive library for web displlay. 

In [None]:
# matplotlib #
# this is all you need

print('matplotlib')
sst.plot() 

# all stuff here to make it look good
plt.ylabel('SST ($^\circ$C)')
plt.xlabel('Year')
plt.title('Location: '+str(latr)+'$^\circ$N, '+str(lonr)+'$^\circ$W')
plt.grid(True, alpha=0.3)
plt.show()

# hovplot #
print('hovplot. you can examine your data while plotting: try hovering your mouse over the plot and data details are displayed.')
df = pd.DataFrame(data=sst.data, index=sst.time.data,columns=['SST (C)'])
df.index.name = 'Date'
df.hvplot(grid=True)

### Now let's do some analysis on our data.
#### First, the classic climatology and anomalies. Just to use our new hovplot tool.

In [None]:
# Climatology
sst_climatology = sst.groupby('time.dayofyear').mean('time',keep_attrs=True,skipna=False)
sst_climstd = sst.groupby('time.dayofyear').std('time',keep_attrs=True,skipna=False)

df = pd.DataFrame(data=sst_climatology.data, index=sst_climatology.dayofyear.data,columns=['SST (C)'])
df['+Std']=sst_climstd.data+sst_climatology.data
df['-Std']=-sst_climstd.data+sst_climatology.data
df.index.name = 'Day of Year'
df.hvplot(color=['k','grey','grey'])

In [None]:
# Anomalies
sst_anomaly = sst.groupby('time.dayofyear')-sst_climatology
sst_anomaly_monthly = sst_anomaly.resample(time='1MS', loffset='15D').mean(keep_attrs=True,skipna=False)

df2 = pd.DataFrame(data=sst_anomaly.data, index=sst.time.data,columns=['SSTa (C)'])
df2.index.name = 'Date'
df2.hvplot(grid=True)

In [None]:
# another way to plot it
df2.hvplot.area(x='Date', y='SSTa (C)', grid=True)

### Now, using the plots above to further our analysis.
### _For example_, hovering over the climatology i get to see that normally, SST doesn't get avobe 28.22C (on average), or 28.82C (with 1 standard deviation). Considering any day with SST below or equal to 28.8C a normal (even if hot), when do we see higher SST values?

In [None]:
# now, let's choose a threshold
thr = df['+Std'].max() # one degree C above the climatology
print('Max normal SST = ',thr,'C')

# plotting data, highlighting values above threshold
plt.plot(sst.time,sst.data, lw=1)
a=sst>=thr
plt.plot(sst.time[a], sst.data[a],'.r')
# all stuff here to make it look good
plt.ylabel('SST ($^\circ$C)')
plt.xlabel('Year')
plt.title('Location: '+str(latr)+'$^\circ$N, '+str(lonr)+'$^\circ$W')
plt.grid(True, alpha=0.3)
plt.show()

# plotting data, highlighting values above threshold
dts = sst[sst>=thr].time
hot_days = dts.groupby('time.year').count()
plt.bar(hot_days.year, hot_days)
plt.xlim(int(dater[0][:4]), int(dater[1][:4])+1)
plt.show()
hot_days




### now lets try a different definitin of hot days. marine heat waves. defined by SST hoter than the 90th percentile of SST anomalies within a period.

In [None]:
# plotting data, highlighting values above threshold

plt.plot(sst_anomaly.time,sst_anomaly.data, lw=1)
plt.axhline(y=0, c='k', zorder=0, alpha=0.5)
# 90%th percentile
thr = np.percentile(sst_anomaly, 90)
a=sst_anomaly>=thr
plt.plot(sst_anomaly.time[a], sst_anomaly.data[a],'.r')
# all stuff here to make it look good
plt.ylabel('SST ($^\circ$C)')
plt.xlabel('Year')
plt.title('Location: '+str(latr)+'$^\circ$N, '+str(lonr)+'$^\circ$W')
plt.grid(True, alpha=0.3)
plt.show()

# how many days we have values above that threshoold per year
dts = sst_anomaly[sst_anomaly>=thr].time
mhw = dts.groupby('time.year').count()
plt.bar(mhw.year,mhw)
plt.show()
mhw

## finally, let's see how the SST field around this point look for a hot day identified above.

In [None]:
# select a time (month and year)
# make plot and countour values above threshold
# save figure
# input parameters
latr = 20.5 # lat1 > lat2
lonr = -158.2 # lon1 > lon2
# time frame
dater = ['2019-07-16','2019-07-16'] # dates on the format 'YYYY-MM-DD' as string

sst2 = ds_sst.sel(time = slice(dater[0],dater[1]),
                                lat  = slice(latr-2,latr+2),
                                lon  = slice(lonr-2,lonr+2)
                                ).load()


In [None]:
sst3 = sst2['analysed_sst']-273.15
mask = sst2['mask'].where(sst2['mask']<2)
sst3 = sst3*mask
sst3.hvplot.quadmesh(x='lon',y='lat',coastline=True, clabel='T [C]', cmap='coolwarm')

# resources