In [298]:
%matplotlib inline

%matplotlib inline
%load_ext autoreload
%autoreload 2


import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import cftime
import dask
import xarrayutils
import cartopy.crs as ccrs
from xmip.preprocessing import combined_preprocessing
from xmip.preprocessing import replace_x_y_nominal_lat_lon
from xmip.drift_removal import replace_time
from xmip.postprocessing import concat_experiments
import xmip.drift_removal as xm_dr
import xmip as xm
import xesmf as xe
import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import cf_xarray as cfxr

import utils


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [299]:
dask.config.set(**{'array.slicing.split_large_chunks': True})

<dask.config.set at 0x2b5dc86a5370>

## Data

Data for this is from https://gmd.copernicus.org/articles/11/1133/2018/ CDRMIP data, where pi-CO2pulse is the 100GtC pulse and piControl is the control

### Notes on data
ACCESS: 10 years of pi control on the co2pulse run; take G 10 years in

GFDL: pi control run for 100 years before pulse, co2pulse run starts at 100 years in without any picontrol data

UKESM1: 10 years of pi control on the co2pulse run; take G 10 years in

MIROC: co2pulse starts 10 years after pi control, but doesn't include that in its data

NORESM2: picontrol starts in 1851 and co2pulse starts in 1850, both should start in 1851 according to documentation

CanESM5 (p2): picontrol is at 5601, but was branched at 5301; co2pulse was branched at 5401;line up by moving picontrol dt to start at 5301

In [300]:
model_run_cdr_pulse_dict = utils.model_run_cdr_pulse_dict
model_run_1pct_dict = utils.model_run_1pct_dict
model_run_control_dict = utils.model_run_esm_picontrol_dict


In [301]:
#define our output grid size

ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.arange(-89.5, 90.5, 1.0)),
        "lon": (["lon"], np.arange(0, 360, 1)),
        "lat_b": (["lat_b"], np.arange(-90.,91.,1.0)),
        "lon_b":(["lon_b"], np.arange(.5, 361.5, 1.0))
    }
)


In [302]:
A = utils.find_area(ds_out)

In [303]:
m = 'ACCESS'

In [304]:
ds_pulse = xr.open_mfdataset(f'cmip6_data/tas_Amon_{model_run_cdr_pulse_dict[m]}', use_cftime = True)
ds_control= xr.open_mfdataset(f'cmip6_data/tas_Amon_{model_run_control_dict[m]}', use_cftime = True)

In [305]:
ds_control

Unnamed: 0,Array,Chunk
Bytes,93.75 kiB,56.25 kiB
Shape,"(6000, 2)","(3600, 2)"
Count,6 Tasks,2 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 93.75 kiB 56.25 kiB Shape (6000, 2) (3600, 2) Count 6 Tasks 2 Chunks Type object numpy.ndarray",2  6000,

Unnamed: 0,Array,Chunk
Bytes,93.75 kiB,56.25 kiB
Shape,"(6000, 2)","(3600, 2)"
Count,6 Tasks,2 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.28 MiB,7.97 MiB
Shape,"(6000, 145, 2)","(3600, 145, 2)"
Count,8 Tasks,2 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 13.28 MiB 7.97 MiB Shape (6000, 145, 2) (3600, 145, 2) Count 8 Tasks 2 Chunks Type float64 numpy.ndarray",2  145  6000,

Unnamed: 0,Array,Chunk
Bytes,13.28 MiB,7.97 MiB
Shape,"(6000, 145, 2)","(3600, 145, 2)"
Count,8 Tasks,2 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.58 MiB,10.55 MiB
Shape,"(6000, 192, 2)","(3600, 192, 2)"
Count,8 Tasks,2 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 17.58 MiB 10.55 MiB Shape (6000, 192, 2) (3600, 192, 2) Count 8 Tasks 2 Chunks Type float64 numpy.ndarray",2  192  6000,

Unnamed: 0,Array,Chunk
Bytes,17.58 MiB,10.55 MiB
Shape,"(6000, 192, 2)","(3600, 192, 2)"
Count,8 Tasks,2 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,637.21 MiB,382.32 MiB
Shape,"(6000, 145, 192)","(3600, 145, 192)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 637.21 MiB 382.32 MiB Shape (6000, 145, 192) (3600, 145, 192) Count 6 Tasks 2 Chunks Type float32 numpy.ndarray",192  145  6000,

Unnamed: 0,Array,Chunk
Bytes,637.21 MiB,382.32 MiB
Shape,"(6000, 145, 192)","(3600, 145, 192)"
Count,6 Tasks,2 Chunks
Type,float32,numpy.ndarray


dlat = (ds_pulse['lat'][1] - ds_pulse['lat'][0]).mean()
lat_b = np.arange(ds_pulse['lat'][0] - .5*dlat, ds_pulse['lat'][-1] + .5*dlat, dlat)


In [307]:
ds_control = {}
ds_pulse = {}
G = {}

# anom_control = {}
# anom_pulse = {}
# anom_G = {}
for m1 in model_run_cdr_pulse_dict.keys():
    if m1 == 'CANESM5_r1p2' or m1 == 'CANESM5_r2p2' or m1 == 'CANESM5_r3p2':
        m2 = 'CANESM5_r1p2'
    else:
        m2 = m1
    print(m2, m1)
    ds_control[m1], ds_pulse[m1], G[m1] = utils.import_regrid_calc(f'cmip6_data/tas_Amon_{model_run_control_dict[m2]}', 
                                                 f'cmip6_data/tas_Amon_{model_run_cdr_pulse_dict[m1]}', 
                                                 ds_out, variable = 'tas', m = m1, pulse_size = 100, replace_xy = False)
#     anom_control[m], anom_pulse[m], anom_G[m] = utils.import_regrid_calc(f'cmip6_data/tas_Amon_{model_run_control_dict[m]}', 
#                                                  f'cmip6_data/tas_Amon_{model_run_cdr_pulse_dict[m]}', 
#                                                  ds_out, variable = 'tas', pulse_size = 100, anomaly = True)

UKESM1_r1 UKESM1_r1
<xarray.Dataset>
Dimensions:    (time: 1680, bnds: 2, lat: 144, lon: 192, lon_vertices: 193,
                lat_vertices: 145)
Coordinates:
  * time       (time) object 1850-01-16 00:00:00 ... 1989-12-16 00:00:00
  * lat        (lat) float64 -89.38 -88.12 -86.88 -85.62 ... 86.88 88.12 89.38
  * lon        (lon) float64 0.9375 2.812 4.688 6.562 ... 355.3 357.2 359.1
    height     float64 1.5
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(1200, 2), meta=np.ndarray>
    lat_bnds   (time, lat, bnds) float64 dask.array<chunksize=(1200, 144, 2), meta=np.ndarray>
    lon_bnds   (time, lon, bnds) float64 dask.array<chunksize=(1200, 192, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(1200, 144, 192), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(193,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


MIROC MIROC
<xarray.Dataset>
Dimensions:    (time: 2892, bnds: 2, lat: 64, lon: 128, lon_vertices: 129,
                lat_vertices: 65)
Coordinates:
  * time       (time) object 1860-01-16 12:00:00 ... 2100-12-16 12:00:00
  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86
  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2
    height     float64 2.0
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(2400, 2), meta=np.ndarray>
    lat_bnds   (time, lat, bnds) float64 dask.array<chunksize=(2400, 64, 2), meta=np.ndarray>
    lon_bnds   (time, lon, bnds) float64 dask.array<chunksize=(2400, 128, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(2400, 64, 128), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(129,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


NORESM2 NORESM2
<xarray.Dataset>
Dimensions:    (time: 1200, bnds: 2, lat: 96, lon: 144, lon_vertices: 145,
                lat_vertices: 97)
Coordinates:
  * time       (time) object 1850-01-16 12:00:00 ... 1949-12-16 12:00:00
  * lat        (lat) float64 -90.0 -88.11 -86.21 -84.32 ... 86.21 88.11 90.0
  * lon        (lon) float64 0.0 2.5 5.0 7.5 10.0 ... 350.0 352.5 355.0 357.5
    height     float64 2.0
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(120, 2), meta=np.ndarray>
    lat_bnds   (time, lat, bnds) float64 dask.array<chunksize=(120, 96, 2), meta=np.ndarray>
    lon_bnds   (time, lon, bnds) float64 dask.array<chunksize=(120, 144, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(120, 96, 144), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(145,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(9

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


ACCESS ACCESS
<xarray.Dataset>
Dimensions:    (time: 1200, bnds: 2, lat: 145, lon: 192, lon_vertices: 193,
                lat_vertices: 146)
Coordinates:
  * time       (time) object 0271-01-16 12:00:00 ... 0370-12-16 12:00:00
  * lat        (lat) float64 -90.0 -88.75 -87.5 -86.25 ... 86.25 87.5 88.75 90.0
  * lon        (lon) float64 0.0 1.875 3.75 5.625 ... 352.5 354.4 356.2 358.1
    height     float64 ...
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(1200, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(145, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(192, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(1200, 145, 192), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(193,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(146,), meta=np.

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


GFDL GFDL
<xarray.Dataset>
Dimensions:    (bnds: 2, lat: 180, lon: 288, time: 1200, lon_vertices: 289,
                lat_vertices: 181)
Coordinates:
  * bnds       (bnds) float64 1.0 2.0
    height     float64 ...
  * lat        (lat) float64 -89.5 -88.5 -87.5 -86.5 ... 86.5 87.5 88.5 89.5
  * lon        (lon) float64 0.625 1.875 3.125 4.375 ... 355.6 356.9 358.1 359.4
  * time       (time) object 0101-01-16 12:00:00 ... 0200-12-16 12:00:00
Dimensions without coordinates: lon_vertices, lat_vertices
Data variables:
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(180, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(288, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(1200, 180, 288), meta=np.ndarray>
    time_bnds  (time, bnds) object dask.array<chunksize=(1200, 2), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(289,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


CANESM5_r1p2 CANESM5_r1p2
<xarray.Dataset>
Dimensions:    (time: 2400, bnds: 2, lat: 64, lon: 128, lon_vertices: 129,
                lat_vertices: 65)
Coordinates:
  * time       (time) object 5401-01-16 12:00:00 ... 5600-12-16 12:00:00
  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86
  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2
    height     float64 ...
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(2400, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(64, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(2400, 64, 128), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(129,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(65,), meta

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


CANESM5_r1p2 CANESM5_r2p2
<xarray.Dataset>
Dimensions:    (time: 2400, bnds: 2, lat: 64, lon: 128, lon_vertices: 129,
                lat_vertices: 65)
Coordinates:
  * time       (time) object 5451-01-16 12:00:00 ... 5650-12-16 12:00:00
  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86
  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2
    height     float64 ...
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(2400, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(64, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(2400, 64, 128), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(129,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(65,), meta

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


CANESM5_r1p2 CANESM5_r3p2
<xarray.Dataset>
Dimensions:    (time: 2400, bnds: 2, lat: 64, lon: 128, lon_vertices: 129,
                lat_vertices: 65)
Coordinates:
  * time       (time) object 5501-01-16 12:00:00 ... 5700-12-16 12:00:00
  * lat        (lat) float64 -87.86 -85.1 -82.31 -79.53 ... 82.31 85.1 87.86
  * lon        (lon) float64 0.0 2.812 5.625 8.438 ... 348.8 351.6 354.4 357.2
    height     float64 ...
Dimensions without coordinates: bnds, lon_vertices, lat_vertices
Data variables:
    time_bnds  (time, bnds) object dask.array<chunksize=(2400, 2), meta=np.ndarray>
    lat_bnds   (lat, bnds) float64 dask.array<chunksize=(64, 2), meta=np.ndarray>
    lon_bnds   (lon, bnds) float64 dask.array<chunksize=(128, 2), meta=np.ndarray>
    tas        (time, lat, lon) float32 dask.array<chunksize=(2400, 64, 128), meta=np.ndarray>
    lon_b      (lon_vertices) float64 dask.array<chunksize=(129,), meta=np.ndarray>
    lat_b      (lat_vertices) float64 dask.array<chunksize=(65,), meta

  ds_out = xr.apply_ufunc(
  ds_out = xr.apply_ufunc(


In [308]:
base = datetime.datetime.strptime("2000", "%Y")
for m in G.keys():
    G[m]['year'] = [base + relativedelta(years=x) for x in range(len(G[m]['year']))]

In [309]:
G_ds = xr.concat([G[m] for m in G.keys()], pd.Index([m for m in G.keys()], name='model'))

In [310]:
G_mean_ds = G_ds.mean(dim = 'model')

## Save Green's Functions

In [311]:
G_ds.to_netcdf('Outputs/G_cdr_ds.nc4')
G_mean_ds.to_netcdf('Outputs/G_cdr_mean_ds.nc4')