# Compute percentiles on the trends for both regional and spatial processed data

### Imports

In [1]:
import glob
import os
import numpy as np
import xarray as xr

### Define functions

In [2]:
# These functions allow me to calculate a percentile across all variables for each month of the year. 
# Essentially a poorman's version of creating the numpy.percentile function within xarray. But it works!

def xr_percentile_wrapper(nparray,q,axis):
    '''
    Wrapper for a non-xarray function so that it returns a DataArray object.
    I am using nanpercentile here because it handles all cases.
    '''
    return xr.DataArray(np.nanpercentile(nparray,q=q,axis=axis))

def percentile_monthly_wrapper(data,dim,q,axis):
    
    return data.groupby(dim).map(xr_percentile_wrapper,q=q,axis=axis)

In [3]:
def percentile_vec(data,core_dim:str,q,**kwargs):
    
    data_chunk = data.chunk({core_dim:-1}) # needed!

    res_out = xr.apply_ufunc(np.nanpercentile,              # Function to apply
                             data_chunk,                      # Positional arguments for the function
                             input_core_dims=[[core_dim]],    # Dimension(s) to apply the function over
                             kwargs={**{'q':q}, **kwargs},                   # Keyword arguments for the function
                             # kwargs=kwargs,                   # Keyword arguments for the function
                             dask="parallelized",             # These may be necessary for efficient use via dask.
                             output_dtypes=['float64'],
                             vectorize = True)                # Vectorize
    
    return res_out


# You can also do this in numpy if you are ok converting back to xarray
# out_np = np.nanpercentile(data,q=95,axis=1,keepdims=False)

### Load and process regional trend data

#### Models

In [4]:
# I need to go from the CAM output variables to the CMOR/CMIP standard names.
pic_tseries_dir = '/glade/u/home/jonahshaw/w/trend_uncertainty/nathan/CMIP6_PIC/'
cesm1_dir       = 'CESM1/'
mpi_dir         = 'MPI-GE/'
canesm2_dir     = 'CanESM2/'
esm2m_dir       = 'ESM2M/'

cesm1_cmor_var_dict = {'ts':'TS','tas':'TREFHT','psl':'PSL'}

In [5]:
cesm1_trends_filepath   = glob.glob('%s/%s/*.1900trends.040001-220012.nc' % (pic_tseries_dir,cesm1_dir))
mpi_trends_filepath     = glob.glob('%s/%s/*.1900trends.185001-385012.nc' % (pic_tseries_dir,mpi_dir))
canesm2_trends_filepath = glob.glob('%s/%s/*.1900trends.201501-301012.nc' % (pic_tseries_dir,canesm2_dir))
esm2m_trends_filepath   = glob.glob('%s/%s/*.1900trends.000101-050012.nc' % (pic_tseries_dir,esm2m_dir))

Load the trend data.

In [6]:
cesm1_pic_trends_all   = xr.open_dataarray(*cesm1_trends_filepath)

mpi_pic_trends_all     = xr.open_dataarray(*mpi_trends_filepath)

canesm2_pic_trends_all = xr.open_dataarray(*canesm2_trends_filepath)

esm2m_pic_trends_all   = xr.open_dataarray(*esm2m_trends_filepath)

### Compute trend percentiles (regional)

Calculate a 95% confidence interval on the PI-Control trends.

In [7]:
# CESM1
cesm1_pic_trends_2_5perc  = percentile_vec(cesm1_pic_trends_all,q=2.5,core_dim='startyear').compute()
cesm1_pic_trends_97_5perc = percentile_vec(cesm1_pic_trends_all,q=97.5,core_dim='startyear').compute()

# MPI-GE
mpi_pic_trends_2_5perc  = percentile_vec(mpi_pic_trends_all,q=2.5,core_dim='startyear').compute()
mpi_pic_trends_97_5perc = percentile_vec(mpi_pic_trends_all,q=97.5,core_dim='startyear').compute()

# CanESM2
canesm2_pic_trends_2_5perc  = percentile_vec(canesm2_pic_trends_all,q=2.5,core_dim='startyear').compute()
canesm2_pic_trends_97_5perc = percentile_vec(canesm2_pic_trends_all,q=97.5,core_dim='startyear').compute()

# ESM2M
esm2m_pic_trends_2_5perc  = percentile_vec(esm2m_pic_trends_all,q=2.5,core_dim='startyear').compute()
esm2m_pic_trends_97_5perc = percentile_vec(esm2m_pic_trends_all,q=97.5,core_dim='startyear').compute()

In [8]:
cesm1_ipccregion_trends_percentiles = xr.merge([cesm1_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           cesm1_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'b.e11.B1850C5CN.f09_g16.005.cam.h0.TREFHT.040001-220012.1900trends.Percentiles.nc'

cesm1_ipccregion_trends_percentiles.to_netcdf('%s/%s/%s' % (pic_tseries_dir,cesm1_dir,filename))

In [None]:
mpi_ipccregion_trends_percentiles = xr.merge([mpi_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           mpi_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'tas_Amon_MPI-ESM_piControl_r001i1850p3.185001-385012.1900trends.Percentiles.nc'

mpi_ipccregion_trends_percentiles.to_netcdf('%s/%s/%s' % (pic_tseries_dir,mpi_dir,filename))

In [None]:
canesm2_ipccregion_trends_percentiles = xr.merge([canesm2_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           canesm2_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'tas_Amon_CanESM2_piControl_r1i1p1.201501-301012.1900trends.Percentiles.nc'

canesm2_ipccregion_trends_percentiles.to_netcdf('%s/%s/%s' % (pic_tseries_dir,canesm2_dir,filename))

In [None]:
esm2m_ipccregion_trends_percentiles = xr.merge([esm2m_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           esm2m_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'tas_Amon_GFDL-ESM2M_piControl_r1i1p1.000101-050012.1900trends.Percentiles.nc'

esm2m_ipccregion_trends_percentiles.to_netcdf('%s/%s/%s' % (pic_tseries_dir,esm2m_dir,filename))

#### Clean up regional data from models

In [9]:
del (cesm1_pic_trends_all,
    mpi_pic_trends_all,
    canesm2_pic_trends_all,
    esm2m_pic_trends_all)

In [10]:
del (cesm1_pic_trends_2_5perc,
    cesm1_pic_trends_97_5perc,
    mpi_pic_trends_2_5perc,
    mpi_pic_trends_97_5perc,
    canesm2_pic_trends_2_5perc,
    canesm2_pic_trends_97_5perc,
    esm2m_pic_trends_2_5perc,
    esm2m_pic_trends_97_5perc)

In [11]:
del (cesm1_ipccregion_trends_percentiles,
    mpi_ipccregion_trends_percentiles,
    canesm2_ipccregion_trends_percentiles,
    esm2m_ipccregion_trends_percentiles)

NameError: name 'mpi_ipccregion_trends_percentiles' is not defined

#### Observations

In [None]:
obs_tseries_dir = '/glade/u/home/jonahshaw/w/trend_uncertainty/nathan/OBS_LENS/'
gistemp_5x5_dir = 'GISTEMP_5x5/20240820/'
hadcrut5_dir    = 'HadCRUT5/20240820/'

In [None]:
gistemp_tas_var = 'tas'
hadcrut5_tas_var = 'tas'

In [None]:
gistemp_5x5_trends_filepath  = glob.glob('%s/%s/*.trends.190001-202012*.nc' % (obs_tseries_dir,gistemp_5x5_dir))
hadcrut5_trends_filepath     = glob.glob('%s/%s/*.trends.1900*.nc' % (obs_tseries_dir,hadcrut5_dir))

Load the trend data

In [None]:
gistemp_5x5_trends_all  = xr.open_dataarray(*gistemp_5x5_trends_filepath)
hadcrut5_trends_all     = xr.open_dataarray(*hadcrut5_trends_filepath)

In [None]:
gistemp_5x5_trends_1900 = gistemp_5x5_trends_all.sel(startyear=1900)
hadcrut5_trends_1900    = hadcrut5_trends_all.sel(startyear=1900)

In [None]:
# Compute the low and high bounds on the trends from the observational products

# GISTEMP 5x5
gistemp_5x5_trends_2_5perc  = percentile_vec(gistemp_5x5_trends_1900,q=2.5,core_dim='realization')
gistemp_5x5_trends_50perc  = percentile_vec(gistemp_5x5_trends_1900,q=50,core_dim='realization')
gistemp_5x5_trends_97_5perc = percentile_vec(gistemp_5x5_trends_1900,q=97.5,core_dim='realization')

# HadCRUT5
hadcrut5_trends_2_5perc  = percentile_vec(hadcrut5_trends_1900,q=2.5,core_dim='realization')
hadcrut5_trends_50perc  = percentile_vec(hadcrut5_trends_1900,q=50,core_dim='realization')
hadcrut5_trends_97_5perc = percentile_vec(hadcrut5_trends_1900,q=97.5,core_dim='realization')

In [None]:
gistemp_5x5_trends_percentiles = xr.merge([gistemp_5x5_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           gistemp_5x5_trends_50perc.assign_coords({'percentile':50.0}).expand_dims('percentile',axis=-1),
                                           gistemp_5x5_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'ensembleChunks_5x5_0001_0200.190001-202012.trends.Percentiles.nc'

gistemp_5x5_trends_percentiles.to_netcdf('%s/%s/%s' % (obs_tseries_dir,gistemp_5x5_dir,filename))

In [None]:
hadcrut5_5x5_trends_percentiles = xr.merge([hadcrut5_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                            hadcrut5_trends_50perc.assign_coords({'percentile':50.0}).expand_dims('percentile',axis=-1),
                                            hadcrut5_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

filename = 'HadCRUT.5.0.1.0.analysis.anomalies.1_200.190001-202012.trends.Percentiles.nc'

hadcrut5_5x5_trends_percentiles.to_netcdf('%s/%s/%s' % (obs_tseries_dir,hadcrut5_dir,filename))

#### Clean up regional data from observations

In [None]:
del gistemp_5x5_trends_all,hadcrut5_trends_all

In [None]:
del (gistemp_5x5_trends_2_5perc,
    gistemp_5x5_trends_50perc,
    gistemp_5x5_trends_97_5perc,
    hadcrut5_trends_2_5perc,
    hadcrut5_trends_50perc,
    hadcrut5_trends_97_5perc)

In [None]:
del (gistemp_5x5_trends_percentiles,
    hadcrut5_5x5_trends_percentiles)

## Compute trend percentiles (spatial 5x5 deg.)

Don't need to do this for the obs. unless for visualization, not used in ToE.

### Load processed trend data

#### Models

In [None]:
# I need to go from the CAM output variables to the CMOR/CMIP standard names.
pic_tseries_dir = '/glade/work/jonahshaw/trend_uncertainty/nathan/CMIP6_PIC/'
cesm1_dir       = 'CESM1/'
mpi_dir         = 'MPI-GE/'
canesm2_dir     = 'CanESM2/'
esm2m_dir       = 'ESM2M/'

cesm1_cmor_var_dict = {'ts':'TS','tas':'TREFHT','psl':'PSL'}

In [None]:
cesm1_trends_filepath   = glob.glob('%s/%s/*5x5degSpatialTrends.3_120_years.nc' % (pic_tseries_dir,cesm1_dir))
mpi_trends_filepath     = glob.glob('%s/%s/*5x5degSpatialTrends.3_120_years.nc' % (pic_tseries_dir,mpi_dir))
canesm2_trends_filepath = glob.glob('%s/%s/*5x5degSpatialTrends.3_120_years.nc' % (pic_tseries_dir,canesm2_dir))
esm2m_trends_filepath   = glob.glob('%s/%s/*5x5degSpatialTrends.3_120_years.nc' % (pic_tseries_dir,esm2m_dir))

Load the trend data.

In [None]:
cesm1_pic_trends_all   = xr.open_dataarray(*cesm1_trends_filepath)

mpi_pic_trends_all     = xr.open_dataarray(*mpi_trends_filepath)

canesm2_pic_trends_all = xr.open_dataarray(*canesm2_trends_filepath)

esm2m_pic_trends_all   = xr.open_dataarray(*esm2m_trends_filepath)

Calculate a 95% confidence interval on the PI-Control trends.

In [None]:
%%time

# CESM1
cesm1_pic_trends_2_5perc  = percentile_vec(cesm1_pic_trends_all,q=2.5,core_dim='startyear').compute()
cesm1_pic_trends_97_5perc = percentile_vec(cesm1_pic_trends_all,q=97.5,core_dim='startyear').compute()

# MPI-GE
mpi_pic_trends_2_5perc  = percentile_vec(mpi_pic_trends_all,q=2.5,core_dim='startyear').compute()
mpi_pic_trends_97_5perc = percentile_vec(mpi_pic_trends_all,q=97.5,core_dim='startyear').compute()

# CanESM2
canesm2_pic_trends_2_5perc  = percentile_vec(canesm2_pic_trends_all,q=2.5,core_dim='startyear').compute()
canesm2_pic_trends_97_5perc = percentile_vec(canesm2_pic_trends_all,q=97.5,core_dim='startyear').compute()

# ESM2M
esm2m_pic_trends_2_5perc  = percentile_vec(esm2m_pic_trends_all,q=2.5,core_dim='startyear').compute()
esm2m_pic_trends_97_5perc = percentile_vec(esm2m_pic_trends_all,q=97.5,core_dim='startyear').compute()

In [None]:
save_dir = '/glade/work/jonahshaw/trend_uncertainty/nathan/CMIP6_PIC/'

CESM1

In [None]:
cesm1_pic_trends_percentiles = xr.merge([cesm1_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                         cesm1_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

In [None]:
filename = 'b.e11.B1850C5CN.f09_g16.005.cam.h0.TREFHT.040001-179912.5x5degSpatialTrends.3_120_years.Percentiles.nc'

# Don't delete
cesm1_pic_trends_percentiles.to_netcdf('%s/CESM1/%s' % (save_dir,filename))

MPI-GE

In [None]:
mpi_pic_trends_percentiles = xr.merge([mpi_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                       mpi_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

In [None]:
filename = 'tas_Amon_MPI-ESM_piControl_r001i1850p3_185001-359912.5x5degSpatialTrends.3_120_years.Percentiles.nc'

# Don't delete
mpi_pic_trends_percentiles.to_netcdf('%s/MPI-GE/%s' % (save_dir,filename))

CanESM2

In [None]:
canesm2_pic_trends_percentiles = xr.merge([canesm2_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           canesm2_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

In [None]:
filename = 'tas_Amon_CanESM2_piControl_r1i1p1_201501-301012.5x5degSpatialTrends.3_120_years.Percentiles.nc'

# Don't delete
canesm2_pic_trends_percentiles.to_netcdf('%s/CanESM2/%s' % (save_dir,filename))

ESM2M

In [None]:
esm2m_pic_trends_percentiles = xr.merge([esm2m_pic_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                         esm2m_pic_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

In [None]:
filename = 'tas_Amon_GFDL-ESM2M_piControl_r1i1p1_000101-050012.5x5degSpatialTrends.3_120_years.Percentiles.nc'

# Don't delete
esm2m_pic_trends_percentiles.to_netcdf('%s/ESM2M/%s' % (save_dir,filename))

#### Clean up

In [None]:
del (cesm1_pic_trends_all,
    mpi_pic_trends_all,
    canesm2_pic_trends_all,
    esm2m_pic_trends_all)

In [None]:
del (cesm1_pic_trends_2_5perc,
    cesm1_pic_trends_97_5perc,
    mpi_pic_trends_2_5perc,
    mpi_pic_trends_97_5perc,
    canesm2_pic_trends_2_5perc,
    canesm2_pic_trends_97_5perc,
    esm2m_pic_trends_2_5perc,
    esm2m_pic_trends_97_5perc)

In [None]:
del (cesm1_pic_trends_percentiles,
    mpi_pic_trends_percentiles,
    canesm2_pic_trends_percentiles,
    esm2m_pic_trends_percentiles)

#### Observations

In [12]:
obs_tseries_dir = '/glade/u/home/jonahshaw/w/trend_uncertainty/nathan/OBS_LENS/'
gistemp_5x5_dir = 'GISTEMP_5x5/20240820/'
hadcrut5_dir    = 'HadCRUT5/20240820/'

In [13]:
gistemp_tas_var = 'tas'
hadcrut5_tas_var = 'tas'

In [14]:
gistemp_5x5_trends_filepath  = glob.glob('%s/%s/*5x5degSpatialTrends.190001-202012.nc' % (obs_tseries_dir,gistemp_5x5_dir))
hadcrut5_trends_filepath     = glob.glob('%s/%s/*5x5degSpatialTrends.190001-202012.nc' % (obs_tseries_dir,hadcrut5_dir))

Load the trend data

In [15]:
gistemp_5x5_trends_all  = xr.open_mfdataset(gistemp_5x5_trends_filepath)

hadcrut5_trends_all     = xr.open_mfdataset(hadcrut5_trends_filepath).rename({'longitude':'lon','latitude':'lat'})

Observational Uncertainty

In [None]:
# gistemp_5x5_trends_all  = xr.open_dataarray(*gistemp_5x5_trends_filepath)

# hadcrut5_trends_all     = xr.open_dataarray(*hadcrut5_trends_filepath)

# gistemp_5x5_trends_1960 = gistemp_5x5_trends_all.sel(startyear=1960)
# hadcrut5_trends_1960    = hadcrut5_trends_all.sel(startyear=1960)

In [16]:
%%time
# Compute the low and high bounds on the trends from the observational products

# GISTEMP 5x5
gistemp_5x5_trends_2_5perc  = percentile_vec(gistemp_5x5_trends_all,q=2.5,core_dim='realization')
gistemp_5x5_trends_50perc  = percentile_vec(gistemp_5x5_trends_all,q=50,core_dim='realization')
gistemp_5x5_trends_97_5perc = percentile_vec(gistemp_5x5_trends_all,q=97.5,core_dim='realization')

# HadCRUT5
hadcrut5_trends_2_5perc  = percentile_vec(hadcrut5_trends_all,q=2.5,core_dim='realization')
hadcrut5_trends_50perc  = percentile_vec(hadcrut5_trends_all,q=50,core_dim='realization')
hadcrut5_trends_97_5perc = percentile_vec(hadcrut5_trends_all,q=97.5,core_dim='realization')

CPU times: user 12.3 ms, sys: 0 ns, total: 12.3 ms
Wall time: 12.3 ms


Save Obs Data

In [17]:
save_dir_obs = '/glade/work/jonahshaw/trend_uncertainty/nathan/OBS_LENS/'

GISTEMP

In [None]:
gistemp_5x5_trends_percentiles = xr.merge([gistemp_5x5_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                           gistemp_5x5_trends_50perc.assign_coords({'percentile':50.0}).expand_dims('percentile',axis=-1),
                                           gistemp_5x5_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

In [None]:
filename = 'ensembleChunk_5x5_1_100.5x5degSpatialTrends.190001-202012.Percentiles.nc'

gistemp_5x5_trends_percentiles.to_netcdf('%s/%s/%s' % (save_dir_obs,gistemp_5x5_dir,filename))

HadCRUT5

In [18]:
hadcrut5_5x5_trends_percentiles = xr.merge([hadcrut5_trends_2_5perc.assign_coords({'percentile':2.5}).expand_dims('percentile',axis=-1),
                                            hadcrut5_trends_50perc.assign_coords({'percentile':50.0}).expand_dims('percentile',axis=-1),
                                            hadcrut5_trends_97_5perc.assign_coords({'percentile':97.5}).expand_dims('percentile',axis=-1)])

  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation
  overwrite_input=overwrite_input, interpolation=interpolation


In [19]:
hadcrut5_5x5_trends_percentiles

Unnamed: 0,Array,Chunk
Bytes,7.40 MB,7.40 MB
Shape,"(119, 36, 72, 3)","(119, 36, 72, 3)"
Count,633 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 7.40 MB 7.40 MB Shape (119, 36, 72, 3) (119, 36, 72, 3) Count 633 Tasks 1 Chunks Type float64 numpy.ndarray",119  1  3  72  36,

Unnamed: 0,Array,Chunk
Bytes,7.40 MB,7.40 MB
Shape,"(119, 36, 72, 3)","(119, 36, 72, 3)"
Count,633 Tasks,1 Chunks
Type,float64,numpy.ndarray


In [20]:
filename = 'HadCRUT.5.0.2.0.analysis.anomalies.1_200.5x5degSpatialTrends.190001-202012.Percentiles.nc'

hadcrut5_5x5_trends_percentiles.to_netcdf('%s/%s/%s' % (save_dir_obs,hadcrut5_dir,filename))

  overwrite_input=overwrite_input, interpolation=interpolation


#### Clean-up

In [None]:
del (gistemp_5x5_trends_all,
    hadcrut5_trends_all)

In [None]:
del (gistemp_5x5_trends_2_5perc,
    gistemp_5x5_trends_50perc,
    gistemp_5x5_trends_97_5perc,
    hadcrut5_trends_2_5perc,
    hadcrut5_trends_50perc,
    hadcrut5_trends_97_5perc)

In [None]:
del (gistemp_5x5_trends_percentiles,
    hadcrut5_5x5_trends_percentiles)