# Calculate extreme indices

In [1]:
import xarray as xr, matplotlib.pyplot as plt
from importlib import reload # need to use this if I edit a function file
import os
import numpy as np
import pandas as pd
import cartopy.crs as ccrs # to add in continents and change map projections 
from matplotlib.colors import LinearSegmentedColormap # to change colour bar????
import dask.diagnostics # dask allows you to check how long something is taking to load
import climtas # needed to count event statistics with a specified duration
import seaborn as sns
import scipy.stats # to calculate correlation

In [2]:
# import custom functions
import sys 
sys.path.append('/home/563/kb6999/Functions') # use this if the function file is in a different directory to the notebook

import frequently_used_functions as func
import plotting_functions as fplot
import model_functions as funcM
import reanalysis_functions as funcR
import Extreme_indices_functions as funcX
# import obs_functions as funcO

In [3]:
# define path where obs data is store
path = '/g/data/w48/kb6999/Observations/obs_netcdfs_T/'

In [4]:
# open daily T data from 5 stations 
daily_T = xr.open_dataset(f'{path}Daily_T_Aus_5S.nc')
daily_T

Here are the periods of data around the Krakatoa eruption that seem to be homogenous, based on analysis of the monthly records. There are no breakpoints near Krakatoa (with the possible exception of Adelaide), so they shouldn't affect us. But we need to acknowledge that there may be remaining quality issues with the observations, just as there are with the reanalysis! The joys of working with the real world!
 
Adelaide: Tmax and Tmin [1872-1887] - clim [1872-1887]  less than 30 years

Cape Otway: Tmax [1864-1898], Tmin [1867-1888] - overall [1867-1888] less than 30 years

Eversleigh: Tmax and Tmin [1877-1922] - clim [1880-1910 or 1890-1920]

Melbourne: Tmax [1855-1941], Tmin [1870-1908] - overall [1870-1908] - clim [1870-1900]

Sydney: Tmax [1869-1912], Tmin [1866-1919] - overall [1869-1912] - clim [1870-1900 or 1880-1910]

Overall good period: 1877-1888 (ten years around Krakatoa)

In [5]:
# years to take as base period for percentiles
start_dates = [1872, 1867, 1880, 1870, 1870]
end_dates = [1887, 1888, 1910, 1900, 1900]

In [6]:
#calculate the length of each base period
for d, dates in enumerate(end_dates):
    period = end_dates[d] - start_dates[d]
    print(period)

15
21
30
30
30


# extremes by month

In [39]:
funcX=reload(funcX)

In [40]:
time_group = ['M', 'time.month']

In [42]:
ds=[]
# calculate extreme indices for each station 
# with dask.diagnostics.ProgressBar:
for s, station in enumerate(daily_T.station):
    ex = funcX.extreme_indices(daily_T.sel(station=station), time_group, f'{start_dates[s]}', f'{end_dates[s]}')
    ds.append(ex)

In [43]:
ex_indices_m = xr.concat(ds, dim='station', coords='minimal')
ex_indices_m

In [44]:
# save as netcdf
ex_indices_m.to_netcdf(f'{path}Obs_extreme_indices_m.nc')

# seasonal 

In [None]:
ex_index_s = ex_indices_m
# first I need to define a new coordinate (seasonyear) so that december gets counted with the adjoining jan and feb
seasonyear = (ex_index_s.time.dt.year + (ex_index_s.time.dt.month//12)) 
ex_index_s.coords['seasonyear'] = seasonyear
ex_index_s

In [None]:
ds_Tmax = daily_T.Tmax
ds_Tmin = daily_T.Tmin

# take the sum for these indices
FD = ex_index_s.FD.groupby('seasonyear').apply(func.seasonal_sum)
SU = ex_index_s.SU.groupby('seasonyear').apply(func.seasonal_sum)
TR = ex_index_s.TR.groupby('seasonyear').apply(func.seasonal_sum)
ID = ex_index_s.ID.groupby('seasonyear').apply(func.seasonal_sum)

# take the max/min for these indices
TXx = ex_index_s.TXx.groupby('seasonyear').apply(func.seasonal_max)
TNx = ex_index_s.TNx.groupby('seasonyear').apply(func.seasonal_max)
TNn = ex_index_s.TNn.groupby('seasonyear').apply(func.seasonal_min)
TXn = ex_index_s.TXn.groupby('seasonyear').apply(func.seasonal_min)

# take the mean for these indices
TN10p_ds = []
TX10p_ds = []
TN90p_ds = []
TX90p_ds = []
# calculate extreme indices for each station 
for s, station in enumerate(daily_T.station):
    # calculate each percentile extreme index
    TN10p_s = funcX.seasonal_10p(ds_Tmin.sel(station=station), f'{start_dates[s]}', f'{end_dates[s]}')
    TX10p_s = funcX.seasonal_10p(ds_Tmax.sel(station=station), f'{start_dates[s]}', f'{end_dates[s]}')
    TN90p_s = funcX.seasonal_90p(ds_Tmin.sel(station=station), f'{start_dates[s]}', f'{end_dates[s]}')
    TX90p_s = funcX.seasonal_90p(ds_Tmax.sel(station=station), f'{start_dates[s]}', f'{end_dates[s]}')
    # append each dataset 
    TN10p_ds.append(TN10p_s)
    TX10p_ds.append(TX10p_s)
    TN90p_ds.append(TN90p_s)
    TX90p_ds.append(TX90p_s)

TN10p = xr.concat(TN10p_ds, dim='station', coords='minimal')  
TX10p = xr.concat(TX10p_ds, dim='station', coords='minimal')  
TN90p = xr.concat(TN90p_ds, dim='station', coords='minimal')  
TX90p = xr.concat(TX90p_ds, dim='station', coords='minimal')  

DTR = funcX.seasonal_DTR(ds_Tmin, ds_Tmax)
ETR = TXx-TNn

In [None]:
# put all indicies into one xarray
ex_indices_s = xr.Dataset({'FD': FD, 'SU': SU, 'ID': ID, 'TR': TR, 'TXx': TXx, 'TNx': TNx, 'TNn': TNn, 'TXn': TXn, 'TN10p': TN10p, 'TX10p': TX10p, 'TN90p': TN90p, 'TX90p': TX90p, 'DTR': DTR, 'ETR': ETR})
ex_indices_s

In [None]:
# save as netcdf
ex_indices_s.to_netcdf(f'{path}Obs_extreme_indices_s.nc')

In [None]:
# open netcdf of extreme indices
ex_index_s = xr.open_dataset(f'{path}Obs_extreme_indices_s.nc')
ex_index_s