# Seasonal Amplitude and Phase

In [None]:
# Import modules
%matplotlib inline
%run /g/data/w40/ri9247/code/aus_precip_benchmarking/master_functions_bmf.ipynb
import xarray as xr
import numpy as np
import fnmatch
import pandas as pd
from astropy.stats import circcorrcoef
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mticker
import matplotlib.gridspec as gridspec
from matplotlib.patches import Rectangle
import cartopy as cartopy
import cartopy.crs as ccrs
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter

## Define Spatiotemporal Boundaries

In [None]:
# Define region of interest (lat and lon boundaries for all of Australia)
lat_slice = slice(-44.5,-10)
lon_slice = slice(112,156.25)
time_slice = slice("1976-01-01", "2005-12-30")

# Define temporal scale for SPI variable (1 for 3-months, 2 for 6-months, 3 for 12-months); use "None" if not using the SPI variable
iscale = None

# Define season as a list of month numbers. If not wanting a seasonal breakdown, use None.
season = None 
season_name = 'Annual' 

# Paths to mask datasets
qc_mask_ds = xr.open_dataset('/g/data/w40/ri9247/CORDEX-Australasia/data/obs/AUS-44i_grid/no_indices/mask/AUS-44i_combined_quality_mask_no_oceans.nc')

# Extract masks over Australia
qc_mask = qc_mask_ds.station_mask.sel(lat=lat_slice, lon=lon_slice)

## Define Benchmarking Thresholds for Metrics for Amplitude

In [None]:
amplitude_threshold = 0.6

## Define Keywords for Data

Keywords needed are bracketed {}. <br><br>
**RCM** database is organized following: <br>
**Climpact Indices:** <br>
parent_directory/{time_period}/{index-keyword}/{variable}_{time_average} _dataset_file.nc 
<br><br>
**Observations** database is organized as follows: <br>
obs_parent_directory/grid_type}/climpact/{variable}/{variable} _{time_average}_agcd_historical_v1_1950-2020.nc <br><br>
**Keyword Options:** <br>
grid_type: 'AUS-44i_grid', 'native_grid', 'one_degree_grid' <br>
time_period: 'historical', 'rcp85' <br>
variable: See list of Climpact Indices at: https://climpact-sci.org/indices; must be all-lowercase <br>
time_average: 'ANN', 'MON'

## Define list for subset of models

In [None]:
# List names of model subsets
subset_names = [
    "ACCESS1-0   CCAM-1704"
    , "ACCESS1-0   CCAM-2008"
    , "ACCESS1-0   WRF360J"
    , "ACCESS1-0   WRF360K"
    , "CanESM2   CCAM-2008"
    , "CanESM2   WRF360J"
    , "CNRM-CM5   CCAM-1704"
    , "GFDL-ESM2M   CCAM-1704"
    , "GFDL-ESM2M   CCAM-2008"
    , "HadGEM2-CC   CCAM-1704"
    , "HadGEM2-ES   CCLM5-0-15"
    , "HadGEM2-ES   REMO2015"
    , "MIROC5   CCAM-1704"
    , "MIROC5   CCAM-2008"
    , "MPI-ESM-LR   CCLM5-0-15"
    , "MPI-ESM-LR   REMO2015"
    , "MPI-ESM-MR   RegCM4-7"
    , "NorESM1-M   CCAM-1704"
    , "NorESM1-M   RegCM4-7"
    , "NorESM1-M   REMO2015"
]

### Using Climpact Indices

In [None]:
# Define keywords to be used with f-strings to build the path to datasets
time_period = 'historical'
variable = 'prcptot'
time_average = 'MON'
grid_type = 'AUS-44i_grid'

# Define paths to data
model_master_path = '/g/data/ks32/CLEX_Data/CORDEX_Australasia_Indices/v1-0/'
obs_master_path = '/g/data/w40/ri9247/CORDEX-Australasia/data/obs/'

model_data_path = model_master_path + f'{time_period}/{variable}/'
agcd_data_path = obs_master_path + f'{grid_type}/climpact/{variable}/{variable}_{time_average}_agcd_historical_v1_1950-2020.nc'

## Get Pandas Dataframe with list of dataset file paths for subset of model simulations

In [None]:
# Get all model paths in full ensemble (this will be stored in a Pandas DataFrame)
model_paths = get_model_files(model_data_path, time_average)

# Get file paths for subset of models
model_paths_subset = get_model_files_subset(model_paths, subset_names)
pd.set_option('display.max_colwidth', None)

# Option to print paths to confirm we get the correct files
model_paths_subset

## Calculate Amplitude and Phase of the Seasonal Cycle at each Grid Point

## Amplitude

### Observations (AGCD)

In [None]:
# Define data type for function
data_type = "obs"

# Calculate the amplitude for the Observational dataset
obs_amplitude = get_amplitude_of_annual_cycle(agcd_data_path, variable, time_slice, lat_slice, lon_slice, data_type, iscale, qc_mask)

In [None]:
# Option to quickly plot the amplitude result
obs_amplitude.plot()

### Models (stored in a dictionary)

In [None]:
# Note data_type is model so units are properly converted if daily or monthly mean data
data_type = 'model_climpact'

# Intialize an empty dictionary to store model climatologies as DataArrays
amplitude_model_dict = {}

# Loop through RCM simulations and calculate the model bias; store in a dictionary
for i, row in model_paths_subset.iterrows():
    amplitude_xr = get_amplitude_of_annual_cycle(model_paths_subset.iloc[i,1], variable, time_slice, lat_slice, lon_slice, data_type, iscale, qc_mask)
    amplitude_model_dict[model_paths_subset.iloc[i,0]] = amplitude_xr

# Option to print the dictionary keys after completion
print(amplitude_model_dict.keys())

### Calculate NRMSE of Amplitude

In [None]:
# Initialize Pandas DataFrame to store circular correlation coefficient values
model_master_nrmse_df = pd.DataFrame(columns=['dataset_name', 'amp_nrmse'])

# Loop through the dictionaries to calculate the NRMSE for amplitude and phase for each simulation against obs
for model_name in amplitude_model_dict.keys():
    
    # Calculate circular correlation coefficient
    amp_nrmse = get_nrmse_mean_ppdata(obs_amplitude, amplitude_model_dict[model_name])
    
    # Create Pandas DataFrame for each model
    model_amp_nrmse_df = pd.DataFrame({'dataset_name': f'{model_name}', 'amp_nrmse': amp_nrmse.item(0)}, index=[0])
    
    # Add model dataframe to master DataFrame
    model_master_nrmse_df = pd.concat([model_master_nrmse_df, model_amp_nrmse_df], ignore_index=True)

# Sort values for plotting
model_master_nrmse_df = model_master_nrmse_df.sort_values('amp_nrmse', ascending=True)

# Option to print the DataFrame for a sanity check
model_master_nrmse_df

## Phase

### Observations (AGCD)

In [None]:
# Calculate the phase of the observational product
obs_phase = get_phase_of_annual_cycle(agcd_data_path, variable, time_slice, lat_slice, lon_slice, iscale, qc_mask)

### Models (stored in dictionary)

In [None]:
# Intialize an empty dictionary to store model climatologies as DataArrays
phase_model_dict = {}

# Loop through RCM simulations and calculate the model bias; store in a dictionary
for i, row in model_paths_subset.iterrows():
    phase_xr = get_phase_of_annual_cycle(model_paths_subset.iloc[i,1], variable, time_slice, lat_slice, lon_slice, iscale, qc_mask)
    phase_model_dict[model_paths_subset.iloc[i,0]] = phase_xr

# Option to print the dictionary keys after completion
print(phase_model_dict.keys())

### Convert Phase to Radians

In [None]:
# Convert input data to angles around a circle (divide by 12 to get values 0-1, then multiply by 2pi)
obs_radians = ((obs_phase + 1) / 12) * 2 * np.pi

# Create new dictionary for model phase as radians
model_radians_dict = {}

# Loop through phase of models dictionary and convert phase to radians; store in new dictionary
for model_name in phase_model_dict.keys():
    
    # Convert phase to radians
    model_radians = ((phase_model_dict[model_name] + 1) / 12) * 2 * np.pi
    
    # Store in Dictionary
    model_radians_dict[model_name] = model_radians

# Option to print the dictionary keys after completion
print(model_radians_dict.keys())

### Calculate Circular Correlation Coefficient and Bootstrapped Confidence Interval (95th Percentile) Against AGCD

In [None]:
# Define resampling percentage
resample_percentage = 0.6

# Convert Obs to Numpy Array and remove NaN values
obs_alpha_i = np.asarray(obs_radians)
obs_alpha = obs_alpha_i[~np.isnan(obs_alpha_i)]

# Initialize Pandas DataFrame to store circular correlation coefficient values
model_master_circular_cor_df = pd.DataFrame(columns=['dataset_name', 'phase_circ_corr', 'lower_ci', 'upper_ci'])

# Loop through the dictionaries to calculate the spatial correlation for amplitude and phase for each simulation against obs
for model_name in model_radians_dict.keys():
    
    # Convert dataset to numpy array and remove NaN values
    model_alpha_i = np.asarray(model_radians_dict[model_name])
    model_alpha = model_alpha_i[~np.isnan(model_alpha_i)]
    
    # Calculate circular correlation coefficient
    phase_circ_corr = circcorrcoef(obs_alpha, model_alpha)
    
    # Calculate 95th Percentile Confidence Interval using Bootstrapping
    bootstrap_results = []
    
    for i in range(5000):
        bootstrap_results.append(paired_bootstrap(obs_alpha, model_alpha, resample_percentage))

    results = np.array(bootstrap_results)
    lower_ci,upper_ci = np.percentile(results,[2.5, 97.5])
    
    # Create Pandas DataFrame for each model
    model_phase_circ_corr_df = pd.DataFrame({'dataset_name': f'{model_name}', 'phase_circ_corr': phase_circ_corr.item(0), 'lower_ci': lower_ci, 'upper_ci': upper_ci}, index=[0])
    
    # Add model dataframe to master DataFrame
    model_master_circular_cor_df = pd.concat([model_master_circular_cor_df, model_phase_circ_corr_df], ignore_index=True)

# Sort values for plotting
model_master_circular_cor_df = model_master_circular_cor_df.sort_values('phase_circ_corr', ascending=False)

# Option to print the DataFrame showing the confidence intervals
model_master_circular_cor_df

## Set Up Panel Plot Sorted by Spatial Area Average

### Set up mapping constants

In [None]:
# Define map constants and specifications
extent = [112, 156.25, -44.5, -10]
proj = ccrs.PlateCarree()

# Axes indices where I want lat and/or lon tickmarks on the map
# 5X4 Plot
lat_label = [0,1,5,9,13]
lon_label = [18,19,20]
both_label = [17]

# Lat/Lon values to show for axis labels
lat_ticks = [-10,-20,-30,-40]
lon_ticks = [115,125,135,145,155]

my_colors_four_seasons =['#4b92c3', '#a5c8e1', '#ce660a', '#ff8b26', '#ffbf86', '#c02324', '#e26768', '#eea8a9', '#238023', '#56b356', '#bfe2bf', '#185f90']

cmap_phase = mpl.colors.ListedColormap(my_colors_four_seasons, name='colorblind_friendly_4seasons')

# Set Min/Max values for phase (12 months)
mmin = 0
mmax = 11

# Print the colormap
cmap_phase

# colorblind-friendly colors available at: https://gist.github.com/thriveth/8560036

In [None]:
# Define the color for the masked area
cmap_mask_i = plt.get_cmap('gray',5)
cmap_mask_i = cmap_mask_i.reversed()
cmap_mask = truncate_colormap(cmap_mask_i, 0.5, 1)

# Option to print the colormap
cmap_mask

## Create Sorted Panel Plots - Portrait (for Paper)

## PHASE

In [None]:
# Define figure size
fig = plt.figure(figsize=(12,16))
fig.suptitle(f"Climatological Rainfall Phase (1976-2005)", fontsize=16, y=0.99)

# Setup axes for all subplots
gs = gridspec.GridSpec(7,4,width_ratios=[1,1,1,1], height_ratios=[1,1,1,1,1,1,0.1])

row_max = 5
col_max = 3
row = 1
col = 0

axs = {}

# Define AGCD as the only dataset on the top row
axs['AGCD'] = fig.add_subplot(gs[0,0], projection=proj)

# Set up Axes labels (this loops through the sorted Pandas DF to assign axes positions based on wetness)
for i, df_row_mod in model_master_circular_cor_df.iterrows():
    axs[f'{df_row_mod[0]}'] = fig.add_subplot(gs[row,col], projection=proj)
    
    if col == col_max:
        row = row + 1
        col = 0 
    else:
        col = col + 1

for name, ax in axs.items():
    ax.set_title(name, fontsize=13)
    
# Plot Phase Maps 
# Define empty dictionary of plots
plots = {}

#Plot Phase Map for AGCD data
plots['AGCD'] = obs_phase.plot(ax=axs['AGCD'], add_labels=False, add_colorbar=False, vmin=mmin, vmax=mmax, cmap=cmap_phase)
(mask.where(mask==0.)).plot(ax=axs['AGCD'], add_labels=False, add_colorbar=False, cmap=cmap_mask)

# Plot Phase maps and add mask   
for model_name in phase_model_dict.keys():
    
    # Define Circular Correlation Coefficient Score and Confidence Interval for each model
    phase_cir_corr_score = model_master_circular_cor_df[model_master_circular_cor_df['dataset_name'] == f'{model_name}'].iloc[0,1].round(2)
    phase_lower_ci = model_master_circular_cor_df[model_master_circular_cor_df['dataset_name'] == f'{model_name}'].iloc[0,2].round(2)
    phase_upper_ci = model_master_circular_cor_df[model_master_circular_cor_df['dataset_name'] == f'{model_name}'].iloc[0,3].round(2)
    
    # Plot Data and add grey mask 
    plots[model_name] = phase_model_dict[model_name].plot(ax=axs[model_name], add_labels=False, add_colorbar=False, vmin=mmin, vmax=mmax, cmap=cmap_phase)
    (mask.where(mask==0.)).plot(ax=axs[model_name], add_labels=False, add_colorbar=False, cmap=cmap_mask)
    
    # Check if models meet Phase benchmark and add highlight passing metrics
    if ((phase_cir_corr_score >= phase_lower_ci and phase_cir_corr_score <= phase_upper_ci) and (phase_cir_corr_score >0)):
        axs[model_name].text(0.06, 0.06,'CCor:\n%.2f' % (phase_cir_corr_score), fontsize=12.0, transform=axs[model_name].transAxes,
                            bbox=dict(facecolor='lavender', edgecolor='blueviolet',boxstyle='round'))
            
    else:
        axs[model_name].text(0.06, 0.06,'CCor:\n%.2f' % (phase_cir_corr_score), fontsize=12.0, transform=axs[model_name].transAxes)

# Add coastlines and set map extent
for ax in axs.values():
    ax.coastlines()
    #ax.set_extent(extent)
    

# Add lat and long values on first column and bottom row; add empty tick marks to the rest of the plots    
for i, ax in enumerate(fig.axes):
    ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
    ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
    ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])
    if i in lat_label:
        ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
        lat_formatter = LatitudeFormatter()
        ax.yaxis.set_major_formatter(lat_formatter)
    elif i in lon_label:
        ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
        lon_formatter = LongitudeFormatter(zero_direction_label=True)
        ax.xaxis.set_major_formatter(lon_formatter)
    elif i in both_label:
        ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
        ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
        lon_formatter = LongitudeFormatter(zero_direction_label=True)
        lat_formatter = LatitudeFormatter()
        ax.xaxis.set_major_formatter(lon_formatter)
        ax.yaxis.set_major_formatter(lat_formatter)
    else:
        continue


# Plot colorbar
cbar_gs = gridspec.GridSpecFromSubplotSpec(1,1, subplot_spec=gs[6,:], hspace=0.5)
cbar_ax = fig.add_subplot(cbar_gs[0,0])
tick_pos = np.linspace(0.5, 10.5, 12)
plt.colorbar(plots['CanESM2   WRF360J'], cbar_ax, ticks=tick_pos, orientation='horizontal')

# Specify colorbar labels as months and set colorbar title
cbar_ax.set_xticklabels(['J','F','M','A','M','J','J','A','S','O','N','D'])
cbar_ax.set_xlabel("Month of Maximum Rainfall",fontsize=12.0)

# Plot with tight layout
plt.tight_layout()

## AMPLITUDE

In [None]:
# Update colorbar for Amplitude
cmap = plt.get_cmap('YlGnBu',16)

# Set Min/Max values for Amplitude
mmin = 0
mmax = 200

# Print the colormap
cmap

In [None]:
# Define figure size
fig = plt.figure(figsize=(12,16))
fig.suptitle(f"Climatological Rainfall Amplitude (1976-2005)", fontsize=16, y=0.99)

# Setup axes for all subplots
gs = gridspec.GridSpec(7,4,width_ratios=[1,1,1,1], height_ratios=[1,1,1,1,1,1,0.1])

row_max = 5
col_max = 3
row = 1
col = 0

axs = {}

# Define AGCD as only dataset on the top row
axs['AGCD'] = fig.add_subplot(gs[0,0], projection=proj)

# Set up Axes labels (this loops through the sorted Pandas DF to assign axes positions based on wetness)
for i, df_row_mod in model_master_nrmse_df.iterrows():
    axs[f'{df_row_mod[0]}'] = fig.add_subplot(gs[row,col], projection=proj)
    
    if col == col_max:
        row = row + 1
        col = 0 
    else:
        col = col + 1

for name, ax in axs.items():
    ax.set_title(name, fontsize=13)
    
# Plot Amplitude Maps 
# Define empty dictionary of plots
plots = {}

#Plot Phase Map for AGCD data
plots['AGCD'] = obs_amplitude.plot(ax=axs['AGCD'], add_labels=False, add_colorbar=False, vmin=mmin, vmax=mmax, cmap=cmap)
(mask.where(mask==0.)).plot(ax=axs['AGCD'], add_labels=False, add_colorbar=False, cmap=cmap_mask)


# Plot Phase maps and add mask   
for model_name in amplitude_model_dict.keys():
    
    # Define NRMSE for each model
    amplitude_score = model_master_nrmse_df[model_master_nrmse_df['dataset_name'] == f'{model_name}'].iloc[0,1]
    
    # Plot Data and add grey mask 
    plots[model_name] = amplitude_model_dict[model_name].plot(ax=axs[model_name], add_labels=False, add_colorbar=False, vmin=mmin, vmax=mmax, cmap=cmap)
    (mask.where(mask==0.)).plot(ax=axs[model_name], add_labels=False, add_colorbar=False, cmap=cmap_mask)
    
    # Check if models meet Amplitude benchmark and highlight passing metrics
    if amplitude_score <= amplitude_threshold:
        axs[model_name].text(0.06, 0.06,'NRMSE:\n%.2f' % (amplitude_score), fontsize=12.0, transform=axs[model_name].transAxes,
                            bbox=dict(facecolor='lavender', edgecolor='blueviolet',boxstyle='round'))
            
    elif amplitude_score >= amplitude_threshold:
        axs[model_name].text(0.06, 0.06,'NRMSE:\n%.2f' % (amplitude_score), fontsize=12.0, transform=axs[model_name].transAxes)

# Add coastlines and set map extent
for ax in axs.values():
    ax.coastlines()
    #ax.set_extent(extent)
    #ax.add_feature(cartopy.feature.OCEAN)
    

# Add lat and long values on first column and bottom row; add empty tick marks to the rest of the plots    
for i, ax in enumerate(fig.axes):
    ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
    ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
    ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])
    if i in lat_label:
        ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
        lat_formatter = LatitudeFormatter()
        ax.yaxis.set_major_formatter(lat_formatter)
    elif i in lon_label:
        ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
        lon_formatter = LongitudeFormatter(zero_direction_label=True)
        ax.xaxis.set_major_formatter(lon_formatter)
    elif i in both_label:
        ax.set_xticks(lon_ticks, crs=ccrs.PlateCarree())
        ax.set_yticks(lat_ticks, crs=ccrs.PlateCarree())
        lon_formatter = LongitudeFormatter(zero_direction_label=True)
        lat_formatter = LatitudeFormatter()
        ax.xaxis.set_major_formatter(lon_formatter)
        ax.yaxis.set_major_formatter(lat_formatter)
    else:
        continue


# Plot colorbars
cbar_gs = gridspec.GridSpecFromSubplotSpec(1,1, subplot_spec=gs[6,:], hspace=0.5)
cbar_ax = fig.add_subplot(cbar_gs[0,0])
plt.colorbar(plots['CanESM2   WRF360J'], cbar_ax, orientation='horizontal', extend='max')
cbar_ax.set_xlabel("Amplitude (mm)",fontsize=12.0)

# Plot with tight layout
plt.tight_layout()