<a href="https://colab.research.google.com/github/swartn/cmip6-test/blob/master/cmip6_fgco2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#Environment setup
! pip install --upgrade xarray zarr gcsfs cftime nc-time-axis bokeh intake-esm cmip6_preprocessing 

Collecting xarray
[?25l  Downloading https://files.pythonhosted.org/packages/e3/25/cc8ccc40d21638ae8514ce2aef1f1db3036e31c2adea797c7501302726fa/xarray-0.15.0-py3-none-any.whl (650kB)
[K     |████████████████████████████████| 655kB 2.8MB/s 
[?25hCollecting zarr
[?25l  Downloading https://files.pythonhosted.org/packages/a3/87/383d77399148ef0772da3472b513ecf143252e7c365c51b0f06714800366/zarr-2.4.0.tar.gz (3.3MB)
[K     |████████████████████████████████| 3.3MB 53.9MB/s 
[?25hCollecting gcsfs
  Downloading https://files.pythonhosted.org/packages/3e/9f/864a9ff497ed4ba12502c4037db8c66fde0049d9dd0388bd55b67e5c4249/gcsfs-0.6.0-py2.py3-none-any.whl
Collecting cftime
[?25l  Downloading https://files.pythonhosted.org/packages/53/35/e2fc52247871c51590d6660e684fdc619a93a29f40e3b64894bd4f8c9041/cftime-1.1.0-cp36-cp36m-manylinux1_x86_64.whl (316kB)
[K     |████████████████████████████████| 317kB 50.4MB/s 
[?25hCollecting nc-time-axis
  Downloading https://files.pythonhosted.org/packages/47/2b

In [3]:
!wget https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh
!chmod +x Miniconda3-4.5.4-Linux-x86_64.sh
!bash ./Miniconda3-4.5.4-Linux-x86_64.sh -b -f -p /usr/local

--2020-03-06 05:28:52--  https://repo.continuum.io/miniconda/Miniconda3-4.5.4-Linux-x86_64.sh
Resolving repo.continuum.io (repo.continuum.io)... 104.18.200.79, 104.18.201.79, 2606:4700::6812:c84f, ...
Connecting to repo.continuum.io (repo.continuum.io)|104.18.200.79|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58468498 (56M) [application/x-sh]
Saving to: ‘Miniconda3-4.5.4-Linux-x86_64.sh’


2020-03-06 05:28:52 (183 MB/s) - ‘Miniconda3-4.5.4-Linux-x86_64.sh’ saved [58468498/58468498]

PREFIX=/usr/local
installing: python-3.6.5-hc3d631a_2 ...
Python 3.6.5 :: Anaconda, Inc.
installing: ca-certificates-2018.03.07-0 ...
installing: conda-env-2.6.0-h36134e3_1 ...
installing: libgcc-ng-7.2.0-hdf63c60_3 ...
installing: libstdcxx-ng-7.2.0-hdf63c60_3 ...
installing: libffi-3.2.1-hd88cf55_4 ...
installing: ncurses-6.1-hf484d3e_0 ...
installing: openssl-1.0.2o-h20670df_0 ...
installing: tk-8.6.7-hc745277_3 ...
installing: xz-5.2.4-h14c3975_4 ...
installing: yaml-0.1.7-

In [0]:
!conda install -y -c conda-forge esmpy=7.1.0 xesmf

Solving environment: - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - 

In [0]:
import sys
sys.path.append('/usr/local/lib/python3.8/site-packages')

In [0]:
import numpy as np
import pandas as pd
import xarray as xr
import warnings
import matplotlib.pyplot as plt
import gcsfs
import intake
import xesmf as xe
%matplotlib inline

In [0]:
cat_url = "https://storage.googleapis.com/cmip6/pangeo-cmip6.json"
col = intake.open_esm_datastore(cat_url)
col

In [0]:
from cmip6_preprocessing.preprocessing import combined_preprocessing, replace_x_y_nominal_lat_lon, rename_cmip6

def wrapper(ds):
    ds = ds.copy()
    ds = rename_cmip6(ds)
    ds = replace_x_y_nominal_lat_lon(ds)
    return ds

In [0]:

# lets load a bunch more models this time
# load a few models to illustrate the problem
query = dict(experiment_id=['ssp585', 'historical'], table_id='Omon', 
             variable_id='fgco2', grid_label=['gn'], member_id=['r1i1p1f1', 'r1i1p2f1'])
cat = col.search(**query)

print(cat.df['source_id'].unique())
dset_dict = cat.to_dataset_dict(zarr_kwargs={'consolidated': True}, preprocess=combined_preprocessing)

In [0]:
ssp585_dict = {} # dictionary that will hold spliced DataArrays
for name, ds in dset_dict.items(): # Loop through dictionary
    model_name = name.split(".")[2]
    if ('ssp585' not in name): continue
    ssp585_dict[model_name] = ds

his_dict = {} # dictionary that will hold spliced DataArrays
for name, ds in dset_dict.items(): # Loop through dictionary
    model_name = name.split(".")[2]
    if ('historical' not in name) or (model_name not in ssp585_dict.keys()): continue
    his_dict[model_name] = ds

In [0]:
print(his_dict.keys())
print(ssp585_dict.keys())

In [0]:
# setup a common 1 degree global grid 
ds_out = xe.util.grid_global(1, 1)
ds_out 

In [0]:
kgs_to_molyr=1000./12.*3600*24*365

In [0]:
his_dict['ACCESS-ESM1-5']['fgco2']

In [0]:
 rm_his_dict = {}
for name, ds in his_dict.items():
    print(name)
    regridder = xe.Regridder(ds, ds_out, 'bilinear', ignore_degenerate=True)
    ds_in = ds.sel(time=slice('1980-01-01','2014-12-31')).mean(dim='time')*kgs_to_molyr
    dsrm = regridder(ds_in).compute()
    rm_his_dict[name] = dsrm

In [0]:
fig, axa = plt.subplots(3,3, figsize=(12,12))
for i, (name, ds) in enumerate(rm_his_dict.items()):
    print(name)
    ax=axa.flat[i]
    ds['fgco2'].plot(ax=ax, cmap='RdBu_r')
    ax.set_title(name)
    

In [0]:
axa.flat[0]

**OLD attempt**

In [0]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

experiment_id = "historical" 
variable_id = 'fgco2' 
table_id = 'Omon' 
member_id = 'r1i1p1f1' 

canesm5_query = df.query("activity_id=='CMIP' & "\
                         f"table_id == '{table_id}' & "\
                         f"variable_id == '{variable_id}' & "\
                         f"experiment_id == '{experiment_id}' & "\
                         f"member_id == '{member_id}' & "\
                         f"source_id == 'CanESM5'" 
                        )

# this only needs to be created once
gcs = gcsfs.GCSFileSystem(token='anon')

# stack the data into an xarray
canesm5 = []
for zstore in canesm5_query.zstore.values:
  mapper = gcs.get_mapper(zstore)
  temp = xr.open_zarr(mapper, consolidated=True)
  member_id = zstore.split('/')[7]
  temp['member_id'] = member_id
  canesm5.append(temp)

df = xr.concat(canesm5, dim='member_id')

In [0]:
df

In [0]:
fgco2_lim= canesm5.sel(time=slice('1980-01-01', '1985-12-31')).mean(dim='time')['fgco2']

In [0]:
fgco2_lim.mean(dim='member_id').plot()

In [0]:
fgco2_lim.std(dim='member_id').plot(vmin=0, vmax=1e-9, cmap='Reds')

In [0]:
fgco2_std= canesm5.sel(time=slice('1980-01-01', '1985-12-31')).std(dim='time')['fgco2'].mean(dim='member_id').compute()

In [0]:
fgco2_std.plot(vmin=0, vmax=1e-9, cmap='Reds')

In [0]:
fgco2_std_ann = canesm5.sel(time=slice('1980-01-01', '1985-12-31')).resample(time='A').mean().std(dim='time')['fgco2'].mean(dim='member_id').compute()

In [0]:
fgco2_std_ann.plot(vmin=0, vmax=1e-9, cmap='Reds')