In [1]:
%matplotlib inline

%matplotlib inline
%load_ext autoreload
%autoreload 2


import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cftime
import dask
import xarrayutils
import cartopy.crs as ccrs
from xmip.preprocessing import combined_preprocessing
from xmip.preprocessing import replace_x_y_nominal_lat_lon
from xmip.drift_removal import replace_time
from xmip.postprocessing import concat_experiments
import xmip.drift_removal as xm_dr
import xmip as xm
import xesmf as xe
import datetime
from dateutil.relativedelta import relativedelta

import utils


In [2]:
dask.config.set(**{'array.slicing.split_large_chunks': True})

<dask.config.set at 0x2ab1e5ae00d0>

## Functions

In [3]:
########## WEIGHT YOUR MEAN BY MONTH LENGTH ########## 

In [4]:
#### function to find area of a grid cell from lat/lon ####
def find_area(ds, R = 6378.1):
    """ ds is the dataset, i is the number of longitudes to assess, j is the number of latitudes, and R is the radius of the earth in km. 
    Must have the ds['lat'] in descending order (90...-90)
    Returns Area of Grid cell in km"""
    circumference = (2*np.pi)*R
    deg_to_m = (circumference/360) 
    dy = (ds['lat_b'].roll({'lat_b':-1}, roll_coords = False) - ds['lat_b'])[:-1]*deg_to_m

    dx1 = (ds['lon_b'].roll({'lon_b':-1}, roll_coords = False) - 
           ds['lon_b'])[:-1]*deg_to_m*np.cos(np.deg2rad(ds['lat_b']))
    
    dx2 = (ds['lon_b'].roll({'lon_b':-1}, roll_coords = False) - 
           ds['lon_b'])[:-1]*deg_to_m*np.cos(np.deg2rad(ds['lat_b'].roll({'lat_b':-1}, roll_coords = False)[:-1]))
    
    A = .5*(dx1+dx2)*dy
    
    #### assign new lat and lon coords based on the center of the grid box instead of edges ####
    A = A.assign_coords(lon_b = ds.lon.values,
                    lat_b = ds.lat.values)
    A = A.rename({'lon_b':'lon','lat_b':'lat'})

    A = A.transpose()
    
    return(A)

In [5]:
# NEED TO REDOWNLOAD, DATA INCOMPLETE
# ds_control, ds_pulse, G = import_regrid_calc('cmip6_data/tas_Amon_CanESM5_esm-piControl_r1i1p2f1*', 
#                                              'cmip6_data/tas_Amon_CanESM5_esm-pi-CO2pulse_r1i1p2f1*', 
#                                              ds_out, pulse_size = 100)

In [6]:
#### NEED TO FIGURE OUT IF F NEEDS TO BE THE SAME IN ALL RUNS TO COMPARE ACROSS!!! ########

In [7]:
### NEED TO FIX ACCESS DATA ###

In [8]:
### NEED TO DO SOME SORT OF MATCH UP OF INTERANNUAL VARIABILITY--  #####
### EG: THEY MAY BE STARTING IN DIFFERENT YEARS, LEADING TO DIFFERENCES IN THE MEAN THAT ARE ARTIFACTS ######

## Data

Data for this is from https://gmd.copernicus.org/articles/11/1133/2018/ CDRMIP data, where pi-CO2pulse is the 100GtC pulse and piControl is the control

In [9]:
model_run_control_dict = {
                         'CANESM5_p2':'CanESM5_esm-piControl_r1i1p2f1*',
                          #'CANESM5_p1':'CanESM5_esm-piControl_r1i1p1f1*',
                         }

model_run_pulse_dict = {
                       'CANESM5_r1':'CanESM5_esm-pi-CO2pulse_r1i1p2f1*',
                       'CANESM5_r2':'CanESM5_esm-pi-CO2pulse_r2i1p2f1*',
                       'CANESM5_r3':'CanESM5_esm-pi-CO2pulse_r3i1p2f1*'}



In [10]:
#define our output grid size

ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(-89.5, 90.5, 1.0)),
        "lon": (["lon"], np.arange(0, 360, 1)),
        "lat_b": (["lat_b"], np.arange(-90.,91.,1.0)),
        "lon_b":(["lon_b"], np.arange(.5, 361.5, 1.0))
    }
)

In [11]:
A = find_area(ds_out)

In [12]:
def _regrid_cont_pulse(ds_control, ds_pulse, ds_out):
    regridder = xe.Regridder(ds_control, ds_out, "bilinear")
    attrs = ds_control.attrs
    ds_control = regridder(ds_control) 
    ds_control.attrs = attrs
    
    regridder = xe.Regridder(ds_pulse, ds_out, "bilinear")
    attrs = ds_pulse.attrs
    ds_pulse = regridder(ds_pulse) 
    ds_pulse.attrs = attrs
    
    return(ds_control, ds_pulse)


def _calc_greens(ds_control, ds_pulse, variable, pulse_size = 100):
    
    #A = find_area(ds_control.isel(time = 0), lat_bound_nm = 'lat_bounds', lon_bound_nm = 'lon_bounds')
    G = (ds_pulse[variable] - ds_control[variable])/(pulse_size)
    G = G.groupby('time.year').mean()
    G.attrs = ds_pulse.attrs
    

In [13]:
ds_control = {}
ds_pulse = {}
G = {}

anom_control = {}
anom_pulse = {}
anom_G = {}
for m1 in model_run_pulse_dict.keys():
    m2 = 'CANESM5_p2'
    ds_control[m1] = xr.open_mfdataset(f'cmip6_data/tas_Amon_{model_run_control_dict[m2]}', use_cftime=True)
    ds_pulse[m1] = xr.open_mfdataset(f'cmip6_data/tas_Amon_{model_run_pulse_dict[m1]}', use_cftime=True)
    
    ds_pulse[m1]['time'] = ds_control[m1]['time'][:len(ds_pulse[m1]['time'])]
    ds_control[m1] = replace_x_y_nominal_lat_lon(ds_control[m1])
    ds_pulse[m1] = replace_x_y_nominal_lat_lon(ds_pulse[m1])

    ds_control[m1], ds_pulse[m1] = utils._regrid_cont_pulse(ds_control[m1], ds_pulse[m1], ds_out)
    G[m1] = utils._calc_greens(ds_control[m1], ds_pulse[m1], 'tas', pulse_size = 100)

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


In [21]:
G['CANESM5_r1']

Unnamed: 0,Array,Chunk
Bytes,98.88 MiB,506.25 kiB
Shape,"(200, 180, 360)","(1, 180, 360)"
Count,867 Tasks,200 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 98.88 MiB 506.25 kiB Shape (200, 180, 360) (1, 180, 360) Count 867 Tasks 200 Chunks Type float64 numpy.ndarray",360  180  200,

Unnamed: 0,Array,Chunk
Bytes,98.88 MiB,506.25 kiB
Shape,"(200, 180, 360)","(1, 180, 360)"
Count,867 Tasks,200 Chunks
Type,float64,numpy.ndarray


In [14]:
base = datetime.datetime.strptime("2000", "%Y")
for m in G.keys():
    G[m]['year'] = [base + relativedelta(years=x) for x in range(len(G[m]['year']))]
    #anom_G[m]['year'] = [base + relativedelta(years=x) for x in range(len(anom_G[m]['year']))]

In [40]:
G_ds = xr.concat([G[m] for m in G.keys()], pd.Index([m for m in G.keys()], name='model'))
#anom_G_ds = xr.concat([anom_G[m] for m in anom_G.keys()], pd.Index([m for m in anom_G.keys()], name='model'))

In [41]:
G_mean_ds = G_ds.mean(dim = 'model')
#anom_G_mean_ds = anom_G_ds.mean(dim = 'model')

## Save Green's Functions

In [42]:
G_ds.to_netcdf('Outputs/G_canesm5_ds.nc4')
G_mean_ds.to_netcdf('Outputs/G_mean_canesm5_ds.nc4')

# anom_G_ds.to_netcdf('Outputs/anom_G_ds.nc4')
# anom_G_mean_ds.to_netcdf('Outputs/anom_G_mean_ds.nc4')