In [1]:
# import cell

import xarray as xr # to work with multi-dimensional arrays
import numpy as np # to work with multi-dimensional arrays
import regionmask # to work with predefined data masks
import pandas as pd # to work with tables
import xesmf as xe # to regrid data
import glob # to find file pathways
import netCDF4 # use to save xarrays as nc files
import matplotlib
import matplotlib.pyplot as plt # for plotting
import matplotlib.colors as colors # for custom colourbars
import cartopy # use for geographic map projections
import cartopy.crs as ccrs # use for geographic map projections
import time # to add a creation timestamp to Datasets
import pickle # to save DataFrame objects efficiently

### Spatial Mean and the Standard Deviation of Annual Number of Tropical Nights

Each year's number of tropical nights across the pre-industrial, current, and future scenario periods has been computed using JASMIN resources resulting in a DataArray object with the original model resolution and a 3rd dimension of N years (pre-industrial, current, and furture scenarios are 50, 30, and 30 years respectively).

Metacode:

1. Define file pathway and load DataSet object.
2. Compute the spatial mean annual number of tropical nights. (Averaging over year dimension and not longitude and latitude, so no weighting required.)
3. Compute the standard deviation of the spatial mean annual number of tropical nights. (Intra-annual variability in annual tropical nights; same reason as above as to no weighting.)
4. Form a Dataset object from two DataArray objects.
5. Save Dataset object.
6. Repeat for all models with available data.

In [2]:
def abs_mean(f_pw):
    
    '''

    '''
    # load in annual tropical nights variable; incorrectly named "tasmin"
    x = xr.open_dataset(f_pw, use_cftime = True)
    data = x.tasmin
    x.close()
    
    # compute the spatial mean annual number of tropical nights
    mean_tn = data.mean(dim = 'year', skipna = True, keep_attrs = True)

    # update spatial mean DataArray attributes
    mean_tn.attrs['years_sampled'] = mean_tn.attrs['years_aggregated']
    mean_tn.attrs['title'] = 'mean_ann_trop_nights'
    mean_tn.attrs['long_name'] = 'spatial mean annual number of tropical nights'
    mean_tn.attrs['created_on'] = time.ctime()
    del(mean_tn.attrs['years_aggregated'], mean_tn.attrs['aggregated_on'])

    # compute the spatial standard deviation of the annual number of tropical nights
    std_tn = data.std(dim = 'year', skipna = True, keep_attrs = True)

    # update spatial standard deviation DataArray attributes
    std_tn.attrs['years_sampled'] = std_tn.attrs['years_aggregated']
    std_tn.attrs['title'] = 'mean_ann_trop_nights'
    std_tn.attrs['long_name'] = 'spatial mean annual number of tropical nights'
    std_tn.attrs['created_on'] = time.ctime()
    del(std_tn.attrs['years_aggregated'], std_tn.attrs['aggregated_on'])

    # create Dataset object from two DataArray objects
    ds = xr.Dataset({'mean_ann_tn': mean_tn, 'std_ann_tn': std_tn})
    ds.attrs['model'], ds.attrs['modelling_group'] = mean_tn.model, mean_tn.modelling_group
    ds.attrs['realisation'], ds.attrs['experiment'] = mean_tn.realisation, mean_tn.experiment
    ds.attrs['years_sampled'], ds.attrs['period'] = mean_tn.years_sampled, mean_tn.period
    ds.attrs['created_on'] = time.ctime()

    # save Dataset object
    save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/' + \
              f'{ds.experiment}/{ds.modelling_group}/{ds.model}/' + \
              f'{ds.experiment}_{ds.model}_absolute_ann_tn_{ds.period[:4]}_{ds.period[-4:]}.nc'
    ds.to_netcdf(save_pw, mode = 'w')

    # close Dataset objects
    ds.close(), mean_tn.close(), std_tn.close(), data.close()

    return print('File saved at:\n' + save_pw)

In [None]:
# compute the spatial mean and standard deviation of absolute tropical nights
f_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/working_files/*/*')
for pw in f_pws:
    abs_mean(pw)

### Spatial Mean and the Standard Deviation of Annual Number of Tropical Nights Anomaly

Each year's number of tropical nights across the pre-industrial, current, and future scenario periods has been computed using JASMIN resources resulting in a DataArray object with the original model resolution and a 3rd dimension of N years (pre-industrial, current, and furture scenarios are 50, 30, and 30 years respectively).

Metacode:

1. Define file pathway and load DataSet object containing annual number of tropical nights.
2. Define the baseline period to obtain pre-industrial tropical nights norm from. (1851-1900.)
3. Compute the spatial mean annual number of tropical nights for baseline period. (Load in pre-industrial absolute data.)
4. Compute the annual tropical nights anomalies by negating the pre-industrial norm from each year.
5. Compute the spatial mean annual tropical nights anomaly.
6. Compute the standard deviation of the tropical nights anomalies. (Intra-annual variability.)
7. Form a Dataset object from two DataArray objects.
8. Save Dataset object.
9. Repeat for all models with available data.

In [205]:
def anom_mean(pw):
    '''
    
    '''
    # load in annual tropical nights variable; incorrectly named "tasmin"
    x = xr.open_dataset(pw, use_cftime = True)
    data = x.tasmin
    data = data.where(data < 365., 365) # restrict upper limit to 365 days; ignore extra day from leap year
    x.close()
    del(x)

    # load pre-industrial spatial mean annual number of tropical nights for same model
    pre_ind_pw = glob.glob(f'{pw.rsplit("/", 3)[0]}/pre_ind/{data.modelling_group}/{data.model}/*absolute*')
    x = xr.open_dataset(pre_ind_pw[0], use_cftime= True)
    bl = x.mean_ann_tn
    bl = bl.where(bl < 365., 365.) # restrict upper limit to 365 days; ignore extra day from leap year
    x.close()
    del(x)

    # compute the spatial annual number of tropical nights anomaly for each year
    anomalies = data - bl

    # compute the spatial mean tropical nights anomaly
    mean_tn_anom = anomalies.mean(dim = 'year', skipna = True, keep_attrs = True)
                     
    # update spatial mean anomaly DataArray attributes
    mean_tn_anom.attrs['title'] = 'mean_ann_trop_nights_anomaly'
    mean_tn_anom.attrs['long_name'] = 'spatial mean anomaly in annual number of tropical nights relative to 1851-1900'
    mean_tn_anom.attrs['model'], mean_tn_anom.attrs['modelling_group'] = data.model, data.modelling_group
    mean_tn_anom.attrs['realisation'], mean_tn_anom.attrs['experiment'] = data.realisation, data.experiment
    mean_tn_anom.attrs['period'], mean_tn_anom.attrs['years_sampled'] = data.period, len(anomalies.year)
    mean_tn_anom.attrs['baseline'], mean_tn_anom.attrs['created_on'] = '1851 - 1900', time.ctime()

    # compute the spatial standard deviation of the tropical nights anomalies
    std_tn_anom = anomalies.std(dim = 'year', skipna = True, keep_attrs = True)

    # update spatial mean anomaly DataArray attributes
    std_tn_anom.attrs['title'] = 'std_dev_ann_trop_nights_anomaly'
    std_tn_anom.attrs['long_name'] = 'spatial standard deviation of anomaly in annual' + \
                                 'number of tropical nights relative to 1851-1900'
    std_tn_anom.attrs['model'], std_tn_anom.attrs['modelling_group'] = data.model, data.modelling_group
    std_tn_anom.attrs['realisation'], std_tn_anom.attrs['experiment'] = data.realisation, data.experiment
    std_tn_anom.attrs['period'], std_tn_anom.attrs['years_sampled'] = data.period, len(anomalies.year)
    std_tn_anom.attrs['baseline'], std_tn_anom.attrs['created_on'] = '1851 - 1900', time.ctime()

    # create Dataset object from two DataArray objects
    ds = xr.Dataset({'mean_ann_tn_anom': mean_tn_anom, 'std_ann_tn_anom': std_tn_anom})
    ds.attrs['model'], ds.attrs['modelling_group'] = data.model, data.modelling_group
    ds.attrs['realisation'], ds.attrs['experiment'] = data.realisation, data.experiment
    ds.attrs['years_sampled'], ds.attrs['period'] = mean_tn_anom.years_sampled, data.period
    ds.attrs['baseline'], ds.attrs['created_on'] = '1851 -1900', time.ctime()

    # save Dataset object
    save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/' + \
              f'{ds.experiment}/{ds.modelling_group}/{ds.model}/' + \
              f'{ds.experiment}_{ds.model}_anomaly_ann_tn_{ds.period[:4]}_{ds.period[-4:]}.nc'
    ds.to_netcdf(save_pw, mode = 'w')

    # close Dataset objects
    data.close(), bl.close(), anomalies.close(), ds.close(), mean_tn_anom.close(), std_tn_anom.close()
    
    return print('File saved at:' + save_pw)

In [None]:
# compute the spatial mean and standard deviation of annual tropical nights anomaly
f_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/working_files/current/*') + \
        glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/working_files/ssp*/*')
for pw in f_pws:
    anom_mean(pw)

### Regridding of Model Output: 1.0x1.0 and 2.5x3.75 degree latitude-longitude grids

Metacode:
1. Define desired resolution of data to be regridded to.
2. Read in original resolution data.
3. Compute a "regridder" which will transform the data from one resolution to another.
4. Save regridded data to the directory containing the original resolution data.

Data regridded to both 1x1 and 2.5x3.75 grids

In [4]:
# define absolute and anomaly tropical nights data to regrid; both end in '0.nc'
f_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/*/*/*/*0.nc')

# define the desired output grid/resolution; 1.0 x 1.0 degrees
grid_out = xr.Dataset({'lat': (['lat'], np.arange(-89.5, 90.5, 1.)),
                       'lon': (['lon'], np.arange(-179.5, 180.5, 1.))})

for pw in f_pws:
    
    # load in annual tropical nights data in the original resolution
    grid_in = xr.open_dataset(pw)
    
    # compute the "regridder" file which will define the weighting to apply
    regridder = xe.Regridder(grid_in, grid_out,
                             method = 'bilinear', # interpolation method
                             periodic = True) # required for global girds; prevents blank data on meridian
    
    # apply the weighting matrix to transform the data to new resolution
    rg_data = regridder(grid_in)
    
    # clear regridder file from being saved
    regridder.clean_weight_file()
    
    # define some atrributes for the new resolution Dataset object
    d_type = pw.split('/')[-1].split('_')[-5]
    rg_data.attrs['data_type'] = f'{d_type} values'
    rg_data.attrs['description'] = 'mean and standard deviation of annual number of tropical nights'
    rg_data.attrs['model'], rg_data.attrs['modelling_group'] = grid_in.model, grid_in.modelling_group
    rg_data.attrs['realisation'], rg_data.attrs['experiment'] = grid_in.realisation, grid_in.experiment
    rg_data.attrs['years_sampled'], rg_data.attrs['period'] = grid_in.years_sampled, grid_in.period
    rg_data.attrs['resolution'], rg_data.attrs['created_on'] = '1deg_x_1deg', time.ctime()
    
    # save regridded Dataset object
    save_pw = pw[: -3] + '_1x1_res.nc'
    rg_data.to_netcdf(save_pw, mode = 'w')
    print(f'File saved: {save_pw.rsplit("/")[-1]}')
    
    # close Datasets
    rg_data.close(), grid_in.close(), grid_out.close()

Create weight file: bilinear_160x320_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp126_BCC-CSM2-MR_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_160x320_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp126_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp126_CanESM5_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove 

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp245_BCC-CSM2-MR_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_160x320_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp245_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp245_CanESM5_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp2

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp370_BCC-CSM2-MR_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_160x320_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp370_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp370_CanESM5_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp3

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp585_BCC-CSM2-MR_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_160x320_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: ssp585_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp585_CanESM5_absolute_ann_tn_2071_2100_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: ssp5

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_180x360_peri.nc
File saved: pre_ind_BCC-CSM2-MR_absolute_ann_tn_1851_1900_1x1_res.nc
Create weight file: bilinear_64x128_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_180x360_peri.nc
File saved: pre_ind_CanESM5_absolute_ann_tn_1851_1900_1x1_res.nc
Create weight file: bilinear_128x256_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_180x360_peri.nc
File saved: pre_ind_CNRM-CM6-1_absolute_ann_tn_1851_1900_1x1_res.nc
Create weight file: bilinear_145x192_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_145x192_180x360_peri.nc
File saved: pre_ind_A

using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_143x144_180x360_peri.nc
File saved: current_IPSL-CM6A-LR_anomaly_ann_tn_1981_2010_1x1_res.nc
Create weight file: bilinear_128x256_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_180x360_peri.nc
File saved: current_MIROC6_absolute_ann_tn_1981_2010_1x1_res.nc
Create weight file: bilinear_128x256_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_180x360_peri.nc
File saved: current_MIROC6_anomaly_ann_tn_1981_2010_1x1_res.nc
Create weight file: bilinear_144x192_180x360_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_144x192_180x360_peri.nc
File saved: cur

In [11]:
# define absolute and anomaly tropical nights data to regrid; both end in '0.nc'
f_pws = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/*/*/*/*0.nc')

# define the desired output grid/resolution; 2.5 x 3.75 degree latitude-longitude; same as HadEX2
grid_out = xr.Dataset({'lat': (['lat'], np.arange(-90., 92.5, 2.5)),
                       'lon': (['lon'], np.arange(-180., 180., 3.75))})

for pw in f_pws:
    
#    # load in annual tropical nights data in the original resolution
    grid_in = xr.open_dataset(pw)
    
    # compute the "regridder" file which will define the weighting to apply
    regridder = xe.Regridder(grid_in, grid_out,
                             method = 'bilinear', # interpolation method
                             periodic = True) # required for global girds; prevents blank data on meridian
    
    # apply the weighting matrix to transform the data to new resolution
    rg_data = regridder(grid_in)
    
    # clear regridder file from being saved
    regridder.clean_weight_file()
    
    # define some atrributes for the new resolution Dataset object
    d_type = pw.split('/')[-1].split('_')[-5]
    rg_data.attrs['data_type'] = f'{d_type} values'
    rg_data.attrs['description'] = 'mean and standard deviation of annual number of tropical nights'
    rg_data.attrs['model'], rg_data.attrs['modelling_group'] = grid_in.model, grid_in.modelling_group
    rg_data.attrs['realisation'], rg_data.attrs['experiment'] = grid_in.realisation, grid_in.experiment
    rg_data.attrs['years_sampled'], rg_data.attrs['period'] = grid_in.years_sampled, grid_in.period
    rg_data.attrs['resolution'], rg_data.attrs['created_on'] = 'lat-lon: 2.5x3.75 degrees', time.ctime()
    
    # save regridded Dataset object
    save_pw = pw[: -3] + '_2_5x3_75_res.nc'
    rg_data.to_netcdf(save_pw, mode = 'w')
    print(f'File saved: {save_pw.rsplit("/")[-1]}')
    
    # close Datasets
    rg_data.close(), grid_in.close(), grid_out.close()

Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp126_BCC-CSM2-MR_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp126_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp126_CanESM5_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove

Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp245_BCC-CSM2-MR_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp245_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp245_CanESM5_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp370_BCC-CSM2-MR_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp370_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp370_CanESM5_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp585_BCC-CSM2-MR_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_160x320_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: ssp585_BCC-CSM2-MR_anomaly_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp585_CanESM5_absolute_ann_tn_2071_2100_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: ssp

using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_160x320_73x96_peri.nc
File saved: pre_ind_BCC-CSM2-MR_absolute_ann_tn_1851_1900_2_5x3_75_res.nc
Create weight file: bilinear_64x128_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_64x128_73x96_peri.nc
File saved: pre_ind_CanESM5_absolute_ann_tn_1851_1900_2_5x3_75_res.nc
Create weight file: bilinear_128x256_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_73x96_peri.nc
File saved: pre_ind_CNRM-CM6-1_absolute_ann_tn_1851_1900_2_5x3_75_res.nc
Create weight file: bilinear_145x192_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_145x192_73x96_peri.nc
File saved: pre_ind_

using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_143x144_73x96_peri.nc
File saved: current_IPSL-CM6A-LR_anomaly_ann_tn_1981_2010_2_5x3_75_res.nc
Create weight file: bilinear_128x256_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_73x96_peri.nc
File saved: current_MIROC6_absolute_ann_tn_1981_2010_2_5x3_75_res.nc
Create weight file: bilinear_128x256_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn_anom as the horizontal dimensions for this dataset.
Remove file bilinear_128x256_73x96_peri.nc
File saved: current_MIROC6_anomaly_ann_tn_1981_2010_2_5x3_75_res.nc
Create weight file: bilinear_144x192_73x96_peri.nc
using dimensions ('lat', 'lon') from data variable mean_ann_tn as the horizontal dimensions for this dataset.
Remove file bilinear_144x192_73x96_peri.nc
File saved: cu

### Ensemble Spatial Mean and Standard Deviation of Annual Tropical Nights

Metacode:
1. Define pathway for spatial annual tropical nights data.
2. For each period/scenario, concatenate all model output along a new dimension. (Model output is in the form of two DataArray objects; spatial mean annual tropical nights and standard deviation of annual tropical nights/intra-annual variability.)
3. Compute the spatial mean along this new dimension for both DataArray objects (ensemble spatial means of annual tropical nights and intra-annual tropical nights variability).
4. Compute the spatial standard deviation along this new dimension for spatial mean DataArray object (measure of ensemble variance).
5. Form Dataset object from ensemble means and standard deviation DataArray objects from all periods/scenarios.
6. Save Dataset object to suitable directory.

Above is to be applied to both absolute and anomaly tropical nights 1x1 degree resolution data.

In [2]:
anom_cur = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/current/*/*/*anomaly*_1x1*')
anom_126 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp126/*/*/*anomaly*_1x1*')
anom_245 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp245/*/*/*anomaly*_1x1*')
anom_370 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp370/*/*/*anomaly*_1x1*')
anom_585 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp585/*/*/*anomaly*_1x1*')
abso_pre = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/pre_ind/*/*/*abso*_1x1*')
abso_cur = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/current/*/*/*abso*_1x1*')
abso_126 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp126/*/*/*abso*_1x1*')
abso_245 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp245/*/*/*abso*_1x1*')
abso_370 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp370/*/*/*abso*_1x1*')
abso_585 = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/ssp585/*/*/*abso*_1x1*')

f_pws_list = [anom_cur, anom_126, anom_245, anom_370, anom_585,
              abso_pre, abso_cur, abso_126, abso_245, abso_370, abso_585]

In [6]:
# create Dataset object to store the spatial ensemble annual tropical nights variables
ens_ds = xr.Dataset()

for pws_group in f_pws_list:
    
    # load in a single model output; remaining model outputs will be concatenated to this
    x0 = xr.open_dataset(pws_group[0], use_cftime = True)
    models = [x0.model] # note model used
    
    # concatenate remaining model ouputs along a new dimension
    for pw in pws_group[1: ]:
        x = xr.open_dataset(pw, use_cftime = True)
        x0 = xr.concat([x0, x], dim = 'model')
        
        # note model concatenated
        models += [x.model]
        
        # close Dataset object
        x.close()
        del(x)
        
    # update attribute information to concatenated Dataset
    x0.attrs['model_index'] = models # add names of model concatenated in order of concatenation;
                                     # index of model dimension will match names in list
    del(x0.attrs['model'], x0.attrs['modelling_group'], x0.attrs['realisation'])
        
    # save Dataset object containing the concatenated model output for specfic group
    save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/' + \
              f'{x0.experiment}_{x0.data_type[0: 4]}_all_models.nc'
    x0.to_netcdf(save_pw, 'w')
    print(f'File saved: {save_pw.split("/")[-1]}')
    del(save_pw)
    
    # create a dictionary object storing attribute information for new objects
    attrs_info = {'models': models, 'models_used': len(models), 'regird_method': x0.regrid_method,
                  'data_type': x0.data_type, 'experiment': x0.experiment,
                  'year_sampled': x0.years_sampled, 'period': x0.period, 'resolution': x0.resolution}

    # compute the ensemble spatial annual tropical night variables and add attribute information
    if 'anomaly' in x0.data_type:
        ens_mean = x0.mean_ann_tn_anom.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_intra = x0.std_ann_tn_anom.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_std = x0.mean_ann_tn_anom.std(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_qua = x0.mean_ann_tn_anom.quantile([0.05, 0.25, 0.75, 0.95], dim = 'model',
                                               interpolation = 'linear').assign_attrs(attrs_info)
        ens_medi = x0.mean_ann_tn_anom.median(dim = 'model', skipna = True).assign_attrs(attrs_info)
    
    elif 'absolute' in x0.data_type:
        ens_mean = x0.mean_ann_tn.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_intra = x0.std_ann_tn.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_std = x0.mean_ann_tn.std(dim = 'model', skipna = True).assign_attrs(attrs_info)
        ens_qua = x0.mean_ann_tn.quantile([0.05, 0.25, 0.75, 0.95], dim = 'model',
                                          interpolation = 'linear').assign_attrs(attrs_info)
        ens_medi = x0.mean_ann_tn.median(dim = 'model', skipna = True).assign_attrs(attrs_info)
    
    else:
        raise ValueError('check data type attribute of Datasets being averaged.')
        
    # add DataArray objects to ensemble Dataset object
    ens_ds = ens_ds.assign({f'{x0.experiment}_mean_ann_tn_{x0.data_type[0: 4]}': ens_mean})
    ens_ds = ens_ds.assign({f'{x0.experiment}_intrvar_ann_tn_{x0.data_type[0: 4]}': ens_intra})
    ens_ds = ens_ds.assign({f'{x0.experiment}_std_ann_tn_{x0.data_type[0: 4]}': ens_std})
    ens_ds = ens_ds.assign({f'{x0.experiment}_quantiles_ann_tn_{x0.data_type[0: 4]}': ens_qua})
    ens_ds = ens_ds.assign({f'{x0.experiment}_medi_ann_tn_{x0.data_type[0: 4]}': ens_medi})
    
# update attribute informaton to Dataset object
ens_ds = ens_ds.assign_attrs(attrs_info)
del(ens_ds.attrs['data_type'], ens_ds.attrs['period'], ens_ds.attrs['experiment'],
    ens_ds.attrs['year_sampled'])
ens_ds.attrs['description'] = '"mean_ann_tn" = spatial ensemble mean annual tropical nights; ' +\
                              '"intravar_ann_tn" = spatial ensemble mean model inter-annual variabiltiy in '+\
                              'annual number of tropical nights; "std_ann_tn" = spatial ensemble standard '+\
                              'deviation in annual tropical nights; "quantiles_ann_tn" = 5th, 25th, 75th '+\
                              'and 95th percentiles of ensemble annual tropical nights; "medi_ann_tn" = '+\
                              'spatial ensemble median annual tropical nights.'

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/ensemble_trop_nights.nc'
ens_ds.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close DataArray/set objects
x0.close(), ens_ds.close(), ens_mean.close(), ens_intra.close(), ens_std.close(),
ens_qua.close(), ens_medi.close()

File saved: current_anom_all_models.nc
File saved: ssp126_anom_all_models.nc
File saved: ssp245_anom_all_models.nc
File saved: ssp370_anom_all_models.nc
File saved: ssp585_anom_all_models.nc
File saved: pre_ind_abso_all_models.nc
File saved: current_abso_all_models.nc
File saved: ssp126_abso_all_models.nc
File saved: ssp245_abso_all_models.nc
File saved: ssp370_abso_all_models.nc
File saved: ssp585_abso_all_models.nc
File saved: ensemble_trop_nights.nc


(None, None)

A multi-model ensemble for the current period only is to be regridded to 2.5x3.75 latitude-longitude grid for use with an observational dataset (HadEX2) for model evaluation.

In [14]:
# create Dataset object to store the spatial ensemble mean annual tropical nights variable
ens_ds = xr.Dataset()

# define pathway to current absolute 2.5x3.75 resolution model output
abso_cur = glob.glob('/home/ucfagtj/DATA/Dissertation/Data/trop_nights/current/*/*/*abso*_2_5x3_75*')
    
# load in a single model output; remaining model outputs will be concatenated to this
x0 = xr.open_dataset(abso_cur[0], use_cftime = True)
models = [x0.model] # note model used
    
# concatenate remaining model ouputs along a new dimension
for pw in abso_cur[1: ]:
    x = xr.open_dataset(pw, use_cftime = True)
    x0 = xr.concat([x0, x], dim = 'model')
        
    # note model concatenated
    models += [x.model]
        
    # close Dataset object
    x.close()
    del(x)
        
# update attribute information to concatenated Dataset
x0.attrs['model_index'] = models # add names of model concatenated in order of concatenation;
                                 # index of model dimension will match names in list
del(x0.attrs['model'], x0.attrs['modelling_group'], x0.attrs['realisation'])
        
# save Dataset object containing the concatenated model output for specfic group
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/' + \
           f'{x0.experiment}_{x0.data_type[0: 4]}_all_models_2_5x3_75_res.nc'
x0.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')
del(save_pw)
    
# create a dictionary object storing attribute information for new objects
attrs_info = {'models': models, 'models_used': len(models), 'regird_method': x0.regrid_method,
              'data_type': x0.data_type, 'experiment': x0.experiment,
              'year_sampled': x0.years_sampled, 'period': x0.period, 'resolution': x0.resolution}

# compute the ensemble spatial annual tropical night variables and add attribute information
ens_mean = x0.mean_ann_tn.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
ens_intra = x0.std_ann_tn.mean(dim = 'model', skipna = True).assign_attrs(attrs_info)
ens_std = x0.mean_ann_tn.std(dim = 'model', skipna = True).assign_attrs(attrs_info)
ens_qua = x0.mean_ann_tn.quantile([0.05, 0.25, 0.75, 0.95], dim = 'model',
                                   interpolation = 'linear').assign_attrs(attrs_info)
ens_medi = x0.mean_ann_tn.median(dim = 'model', skipna = True).assign_attrs(attrs_info)
       
# add DataArray objects to ensemble Dataset object
ens_ds = ens_ds.assign({f'{x0.experiment}_mean_ann_tn_{x0.data_type[0: 4]}': ens_mean})
ens_ds = ens_ds.assign({f'{x0.experiment}_intrvar_ann_tn_{x0.data_type[0: 4]}': ens_intra})
ens_ds = ens_ds.assign({f'{x0.experiment}_std_ann_tn_{x0.data_type[0: 4]}': ens_std})
ens_ds = ens_ds.assign({f'{x0.experiment}_quantiles_ann_tn_{x0.data_type[0: 4]}': ens_qua})
ens_ds = ens_ds.assign({f'{x0.experiment}_medi_ann_tn_{x0.data_type[0: 4]}': ens_medi})
    
# update attribute informaton to Dataset object
ens_ds = ens_ds.assign_attrs(attrs_info)
del(ens_ds.attrs['data_type'], ens_ds.attrs['period'], ens_ds.attrs['experiment'],
    ens_ds.attrs['year_sampled'])
ens_ds.attrs['description'] = '"mean_ann_tn" = spatial ensemble mean annual tropical nights; ' +\
                              '"intravar_ann_tn" = spatial ensemble mean model inter-annual variabiltiy in '+\
                              'annual number of tropical nights; "std_ann_tn" = spatial ensemble standard '+\
                              'deviation in annual tropical nights; "quantiles_ann_tn" = 5th, 25th, 75th '+\
                              'and 95th percentiles of ensemble annual tropical nights; "medi_ann_tn" = '+\
                              'spatial ensemble median annual tropical nights.'

# save Dataset object
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/ensemble_trop_nights_2_5x3_75_res.nc'
ens_ds.to_netcdf(save_pw, 'w')
print(f'File saved: {save_pw.split("/")[-1]}')

# close DataArray/set objects
x0.close(), ens_ds.close(), ens_mean.close(), ens_intra.close(), ens_std.close(),
ens_qua.close(), ens_medi.close()

File saved: current_abso_all_models_2_5x3_75_res.nc
File saved: ensemble_trop_nights_2_5x3_75_res.nc


(None, None)

### Weighted Regional Ensemble Means

As regional and surface means will use raster cells of differing areas, a weighting must be applied such that cell values with a larger area contribute more towards the mean. For rectangular grid data, the cosine of the latitude is a good approximation to provide a weighting.

Metacode:

1. Create a weighted mean function. This function will take 3 inputs; a data array, a weights array, and dimension/s to compute the weighted mean over.
2. Define regions to calculate weighted means for.
3. Create a weights array using latitude cosine approximation. Use this weights array to assign zero weights to cells outside desired region (or ocean cells within desired region if only want land cells to contribute to mean).
4. Compute weighted means for each region for each DataArray object.
5. Populate a DataFrame object with weighted means.
6. Save DataFrame object using serialisation for efficiency ("pickling").

In [7]:
def weighted_mean(arr, weights, dim):
    '''
    Summary:
    --------
    Computes a weighted mean of an array along a dimension/dimensions of a DataArray object.
    
    Parameters:
    -----------
    arr: xarray DataArray object
         array containing values used to compute a weighted mean
         
    weights: xarray DataArray object
             array containing weights to be applied to the values within arr object
             
    dim: str or sequence of str
         dimension/s over which to compute the weighted mean
         
    Returns:
    --------
    weighted_mean: xarray DataArray oject
                   array containing weighted mean with the dimension/s mean calculated over removed.
                   
    '''
    
    # sum up the weighted sum of the values within the region specified by dim
    # recall matrix multiplication; cols of first equal rows of second; (10x2) * (2x6) = (10x6)
    # so weights array does not need to be same shape as data array
    weighted_sum = (arr * weights).sum(dim = dim, skipna = True)
    
    # define an array where weights of cells with valid and invalid values are preserved and NA respectively
    masked_weights = weights.where(arr.notnull()) 
    
    # sum up the weights of the valid cells
    sum_of_weights = masked_weights.sum(dim = dim, skipna = True)
    
    # as cannot divide by zero, set weights equal to zero as NA
    # the values of these cells will be removed from the mean in the weighted sum part (multiplied by 0)
    valid_weights = sum_of_weights != 0
    sum_of_weights = sum_of_weights.where(valid_weights)
    
    # compute weighted mean along the specified dimension/s
    weighted_mean = weighted_sum / sum_of_weights
    
    return weighted_mean

In [8]:
def region_average(arr, regions, land_only = True):
    '''
    Summary:
    --------
    Computes weighted mean of an array for various regions, as well for the globe, the ocean, 
    and the land with and without Antartica.
    
    Parameters:
    -----------
    arr: xarray DataArray object
         array encompassing the regions where the weighted means are computed
    
    regions: regionmask.Regions object
             regions to compute the weighted means over
        
    land_only : bool
                whether to mask out ocean points before calculating regional means
                default is True
        
    Returns:
    --------
    reg_ave : xarray DataArray object
              New DataArray with weighted mean over the whole globe, the ocean, the land, the 
              land without Antarctica, and all regions.
              Dimensions (n_regions + 4) x (additional dimensions no averaged over)
    
    '''
    # check that regions specified are an instance of regionmask.Regions;
    # essnetially checking all are an 'regionmask.Regions' object type
    if not isinstance(regions, regionmask.Regions):
        raise ValueError('specified regions must be a regionmask.Regions instance')
        
    # define names of regions to be used to index the various weighted means to be computed
    abbrevs = ['global', 'ocean', 'land', 'land_wo_antarctica']
    abbrevs = abbrevs + regions.abbrevs
    
    # define the IPCC numbers for each regions to index the various weighted means to be computed
    numbers = np.array(regions.numbers)
    
    # compute the latitude weights using the cosine approximation
    weight = np.cos(np.deg2rad(arr.lat))
    
    # define a land mask where land and sea cells are True and False respectively
    landmask = regionmask.defined_regions.natural_earth.land_110.mask(arr)
    landmask = landmask == 0
    
    # for land only, combine latitude weighting with landmask
    # result being that only cells over land have non-zero weights following latitude cosine approximation
    if land_only:
        wgt = weight * landmask
    
    # otherwise, combine latitude weighting with same shape as input array
    # result being that all cells, both land or ocean, will have a weight corresponding to its latitude
    # this weight will be used for regions containing both ocean and land cells
    else:
        wgt = xr.full_like(landmask, 1) * weight
    
    # define a region mask; cells given number for a given region (Europe = 1, Aus = 2 etc.)
    # cells that do not fall into a region, denoted as NaN
    mask = regions.mask(arr)
    
    # define a list to accumulate averages/weighted means
    ave = list()
    
    # compute global mean
    # weighting is simple cosine latitude weighting; no mask as averging entire surface/all cells
    a = weighted_mean(arr, dim = ('lat', 'lon'), weights = weight)
    ave.append(a)
    
    # compute global ocean mean
    # weighting is a cosine latitude weighting of only ocean cells; land cells all weighted as 0
    weights = (weight * (1.0 - landmask))
    a = weighted_mean(arr, dim = ('lat', 'lon'), weights = weights)
    ave.append(a)
    
    # compute global land mean
    # weighting is a cosine latitude weighting of only land cells; ocean cells weighted as 0
    weights = (weight * landmask)
    a = weighted_mean(arr, dim = ('lat', 'lon'), weights = weights)
    ave.append(a)
    
    # compute global land mean without Antarctica
    # weighting is a cosine latitude weighting of only land cells; ocean cells weighted as 0
    arr_selected = arr.sel(lat = slice(-60, None)) # remove Antarctica by removing low latitudes
    weights = (weight * landmask)
    a = weighted_mean(arr_selected, dim = ('lat', 'lon'), weights = weights)
    ave.append(a)
    
    #### Regional Weighted Means ####
    # Computing the specified regional averages is quicker using Groupby objects
    # Groupby objects use multi indexing of coordinates to state the raster cells of a given group
    # Multi indexing essentially concatenates the coordinates
    # (i.e. a cell with lat = -5 and lon = 25 denoted as lat_lon = -5, 25)
    # Using these "stacked" coordinates reduces the dimensions/shape of an array
    # (i.e. stacking lat and lon will change the 2D representation of these to 1D)
    
    # compute the region weighted means
    g = arr.groupby(mask) # group array into the different regions
    
    # create a new dimension of 'stacked coordinates'; moves from raster/grid format to 1D object
    # (unstacked dimensions -> lat = 10, lon = 12, value = 50 stacked dimension -> 10_12, value = 50)
    wgt_stacked = wgt.stack(stacked_lat_lon = ('lat', 'lon'))
    
    # apply stacked lat_lon weights to stacked Groupby object
    a = g.apply(weighted_mean, dim = ('stacked_lat_lon'), weights = wgt_stacked)

    ave.append(a.drop('region')) # drop the region information as want to use as dimension to merge averages
    
    # merge the list of weighted means DataArray objects into a single DataArray object
    arr = xr.concat(ave, dim = 'region')
    
    # shift region coordinates such that the numbers correspond to the regions
    # accounting for the 4 non-regional weighted means also computed
    numbers = np.arange(numbers.min() - 4, numbers.max() + 1)
    
    # add the abbreviations of the regions and update the numbers
    arr = arr.assign_coords(**{'abbrev': ('region', abbrevs), 'number': ('region', numbers)})
    
    # create a multi index
    arr = arr.set_index(region = ('abbrev', 'number'))
    
    return arr

In [9]:
# load in spatial ensemble annual tropical night variables; has both absolute and anomaly
f_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/ensemble_trop_nights.nc'
data = xr.open_dataset(f_pw)

# define AR6 land regions to calculate weighted means for; must be regionmask objects
regions = regionmask.defined_regions.ar6.land

# create DataFrame object to store weighted mean
wm_df = pd.DataFrame()

# define column headers
col_names = ['global', 'ocean', 'land', 'land_wo_antarctica']
col_names = col_names + regions.abbrevs

# add column headers to DataDrame object
wm_df = wm_df.reindex(columns = col_names)

# create a list of the various DataArray objects
data_vars = data.data_vars.values() # .data_vars gives a dictionary object; .values() lists the DataArrays

# loop over each DataArray object
for data_arr in data_vars:
    
    # compute the regional weighted means; use only land cells for mean with regions with both ocean and land
    w_means = region_average(data_arr, regions, land_only = True)
    
    # must loop over each percentile with quantile DataArray
    if 'quantiles' in data_arr.name:
        for i in range(0, 4):
            
            # add a row to DataFrame object for each percentile
            row_name = f'{data_arr.name}_pct_{str(data_arr["quantile"][i].values)[2:]}'
            wm_df = wm_df.append(pd.Series(name = row_name, dtype = 'float64'))
            
            # populate the DataFrame object for each percentile
            for col_name in col_names:
                wm_df[col_name][row_name] = w_means[i, :].sel(abbrev = col_name)
    
    elif 'quantiles' not in data_arr.name:
        
        # add a row to DataFrame object
        row_name = data_arr.name
        wm_df = wm_df.append(pd.Series(name = row_name, dtype = 'float64'))
      
        # populate the DataFrame object
        for col_name in col_names:
            wm_df[col_name][row_name] = w_means.sel(abbrev = col_name)
            
# save DataFrame object using pickle; deconstructs and reconstucts data to save space
save_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/ensemble_trop_nights_variables_AR6.pickle'
with open(save_pw, 'wb') as f:
    pickle.dump(wm_df, f) 
print(f'File saved: {save_pw.split("/")[-1]}')

# close Dataset object    
data.close()
del(data)

File saved: ensemble_trop_nights_variables_AR6.pickle


### Table View of Weighted Regional Ensemble Means

In [3]:
# load data weighted regional means data
wm_pw = '/home/ucfagtj/DATA/Dissertation/Data/trop_nights/processed/ensemble_trop_nights_variables_AR6.pickle'
unpickle = open(wm_pw, 'rb')
wm_df = pickle.load(unpickle)

# create a DataFrame object with Regions and periods column
col_names = ['region', 'pre_ind', 'current', 'ssp126', 'ssp245', 'ssp370', 'ssp585', 
             'current_pct', 'ssp126_pct', 'ssp245_pct', 'ssp370_pct', 'ssp585_pct']
df = pd.DataFrame()
df = df.reindex(columns = col_names)

# define region names; excluding the two Antartica land regions
region_names = ['global', 'ocean', 'land', 'land_wo_antarctica'] + regionmask.defined_regions.ar6.land.abbrevs[: -2] 

# extract absolute and percentage increase for each region
for i, region in enumerate(region_names):
    
    # restrict weighted means DataFrame object to given region
    reg_data = wm_df[f'{region}']
    
    # extract absolute values
    pre_ind = reg_data['pre_ind_mean_ann_tn_abso']
    current = reg_data['current_mean_ann_tn_abso']
    ssp126 = reg_data['ssp126_mean_ann_tn_abso']
    ssp245 = reg_data['ssp245_mean_ann_tn_abso']
    ssp370 = reg_data['ssp370_mean_ann_tn_abso']
    ssp585 = reg_data['ssp585_mean_ann_tn_abso']
    
    # extract pre-industrial anomaly values
    current_anom = reg_data['current_mean_ann_tn_anom']
    ssp126_anom = reg_data['ssp126_mean_ann_tn_anom']
    ssp245_anom = reg_data['ssp245_mean_ann_tn_anom']
    ssp370_anom = reg_data['ssp370_mean_ann_tn_anom']
    ssp585_anom = reg_data['ssp585_mean_ann_tn_anom']
    
    # calculate percentage change of anomaly relative to pre-industrial value
    if pre_ind == 0:
        current_pct, ssp126_pct, ssp245_pct, ssp370_pct, ssp585_pct = '-', '-', '-', '-', '-'
    else:
        current_pct = (reg_data['current_mean_ann_tn_anom'] / pre_ind) * 100
        ssp126_pct = (reg_data['ssp126_mean_ann_tn_anom'] / pre_ind) * 100 
        ssp245_pct = (reg_data['ssp245_mean_ann_tn_anom'] / pre_ind) * 100
        ssp370_pct = (reg_data['ssp370_mean_ann_tn_anom'] / pre_ind) * 100
        ssp585_pct = (reg_data['ssp585_mean_ann_tn_anom'] / pre_ind) * 100
    
    # format anomaly entries for table
    if pre_ind == 0:
        current_entry = f'{round(current_anom, 1)} ({current_pct})' 
        ssp126_entry = f'{round(ssp126_anom, 1)} ({ssp126_pct})'
        ssp245_entry = f'{round(ssp245_anom, 1)} ({ssp245_pct})'
        ssp370_entry = f'{round(ssp370_anom, 1)} ({ssp370_pct})'
        ssp585_entry = f'{round(ssp585_anom, 1)} ({ssp585_pct})'
        
    else:
        current_entry = f'{round(current_anom, 1)} ({round(current_pct, 1)}\%)' 
        ssp126_entry = f'{round(ssp126_anom, 1)} ({round(ssp126_pct, 1)}\%)'
        ssp245_entry = f'{round(ssp245_anom, 1)} ({round(ssp245_pct, 1)}\%)'
        ssp370_entry = f'{round(ssp370_anom, 1)} ({round(ssp370_pct, 1)}\%)'
        ssp585_entry = f'{round(ssp585_anom, 1)} ({round(ssp585_pct, 1)}\%)'
        #current_entry = f'& ({round(current_pct, 1)}\%)' 
        #ssp126_entry = f'& ({round(ssp126_pct, 1)}\%)'
        #ssp245_entry = f'& ({round(ssp245_pct, 1)}\%)'
        #ssp370_entry = f'& ({round(ssp370_pct, 1)}\%)'
        #ssp585_entry = f'& ({round(ssp585_pct, 1)}\%)'
        

    # from Dictionary object holding a given region's data
    data = {'region': region, 'pre_ind': round(pre_ind, 1), 'current': round(current, 1),
            'current_pct': current_entry, 'ssp126': round(ssp126, 1), 
            'ssp126_pct': ssp126_entry, 'ssp245': round(ssp245, 1),
            'ssp245_pct': ssp245_entry, 'ssp370': round(ssp370, 1),
            'ssp370_pct': ssp370_entry, 'ssp585': round(ssp585, 1),
            'ssp585_pct': ssp585_entry}
    
    # add data as new entry to DataFrame object
    df = df.append(data, ignore_index = True, sort = False)
df

Unnamed: 0,region,pre_ind,current,ssp126,ssp245,ssp370,ssp585,current_pct,ssp126_pct,ssp245_pct,ssp370_pct,ssp585_pct
0,global,149.3,155.3,168.1,176.4,185.0,190.9,6.1 (4.1\%),18.9 (12.6\%),27.2 (18.2\%),35.8 (24.0\%),41.6 (27.9\%)
1,ocean,174.7,179.3,190.0,196.7,203.5,208.4,4.6 (2.6\%),15.2 (8.7\%),21.9 (12.6\%),28.8 (16.5\%),33.6 (19.2\%)
2,land,86.5,96.2,114.3,126.5,139.4,147.9,9.7 (11.2\%),27.8 (32.1\%),40.0 (46.3\%),52.9 (61.2\%),61.3 (70.9\%)
3,land_wo_antarctica,94.4,105.0,124.7,138.0,152.1,161.3,10.6 (11.2\%),30.3 (32.1\%),43.7 (46.3\%),57.7 (61.2\%),66.9 (70.9\%)
4,GIC,0.0,0.0,0.0,0.0,0.0,0.0,0.0 (-),0.0 (-),0.0 (-),0.0 (-),0.0 (-)
5,NWN,0.2,0.3,1.2,2.7,6.1,10.0,0.1 (57.1\%),1.0 (575.9\%),2.6 (1485.9\%),5.9 (3426.6\%),9.8 (5665.0\%)
6,NEN,0.7,1.0,3.3,6.0,10.2,14.8,0.3 (49.2\%),2.6 (396.6\%),5.4 (812.4\%),9.6 (1455.0\%),14.1 (2141.9\%)
7,WNA,7.0,8.6,16.6,24.5,37.5,48.7,1.6 (23.5\%),9.6 (138.1\%),17.5 (251.4\%),30.6 (437.7\%),41.7 (597.7\%)
8,CNA,57.3,63.0,84.3,98.6,114.4,124.7,5.6 (9.8\%),27.0 (47.0\%),41.2 (71.9\%),57.1 (99.5\%),67.3 (117.4\%)
9,ENA,43.7,49.6,69.4,82.5,98.3,109.2,5.9 (13.6\%),25.8 (59.0\%),38.8 (88.9\%),54.6 (125.0\%),65.5 (150.1\%)


In [21]:
bad, good = df['ssp585'], df['ssp126']
reduction = ((bad -  good) / bad) * 100
reduction.nlargest(3), reduction.nsmallest(3), reduction.iloc[[47, 46, 19, 12, 14, 42]]

(20    89.090909
 5     88.000000
 32    86.363636
 dtype: float64,
 12    2.401325
 14    3.918322
 42    5.510675
 dtype: float64,
 47    73.378840
 46    45.000000
 19    68.085106
 12     2.401325
 14     3.918322
 42     5.510675
 dtype: float64)

In [14]:
df.iloc[[12, 14, 42]]

Unnamed: 0,region,pre_ind,current,ssp126,ssp245,ssp370,ssp585,current_pct,ssp126_pct,ssp245_pct,ssp370_pct,ssp585_pct
12,CAR,334.7,342.7,353.6,358.1,360.9,362.3,8.0 (2.4\%),18.9 (5.6\%),23.3 (7.0\%),26.2 (7.8\%),27.6 (8.2\%)
14,NSA,308.0,327.2,348.2,355.9,360.7,362.4,19.1 (6.2\%),40.1 (13.0\%),47.8 (15.5\%),52.6 (17.1\%),54.2 (17.6\%)
42,SEA,297.4,309.1,327.5,335.5,341.9,346.6,11.7 (3.9\%),30.1 (10.1\%),38.1 (12.8\%),44.4 (14.9\%),49.1 (16.5\%)
