In [1]:
# Load modules

import xarray as xr
import pandas as pd
import numpy as np
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import zarr
import gcsfs
import xesmf as xe
from scipy.interpolate import griddata
from xmip.preprocessing import rename_cmip6
from matplotlib import gridspec
import time

In [2]:
# Head to google catalog to get some vertical motion data

# Read in data catalog

df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.head()

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
0,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,ps,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
1,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rsds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
2,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlus,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
3,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,rlds,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706
4,HighResMIP,CMCC,CMCC-CM2-HR4,highresSST-present,r1i1p1f1,Amon,psl,gn,gs://cmip6/CMIP6/HighResMIP/CMCC/CMCC-CM2-HR4/...,,20170706


In [8]:
df_land = df.query("activity_id=='CMIP'  & variable_id == 'sftlf'  & experiment_id == 'piControl' & member_id=='r1i1p1f1'")
df_land = df_land.drop_duplicates(['source_id'])
df_land = df_land[~df_land['source_id'].isin(['GISS-E2-1-G-CC','NorESM1-F','E3SM-1-1-ECA','EC-Earth3-LR','AWI-ESM-1-1-LR'])]
df_land

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
857,CMIP,NOAA-GFDL,GFDL-CM4,piControl,r1i1p1f1,fx,sftlf,gr1,gs://cmip6/CMIP6/CMIP/NOAA-GFDL/GFDL-CM4/piCon...,,20180701
32254,CMIP,NASA-GISS,GISS-E2-1-H,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-H/pi...,,20180824
32308,CMIP,NASA-GISS,GISS-E2-1-G,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/NASA-GISS/GISS-E2-1-G/pi...,,20180824
43190,CMIP,IPSL,IPSL-CM6A-LR,piControl,r1i1p1f1,fx,sftlf,gr,gs://cmip6/CMIP6/CMIP/IPSL/IPSL-CM6A-LR/piCont...,,20181123
65123,CMIP,MIROC,MIROC6,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/MIROC/MIROC6/piControl/r...,,20190311
67816,CMIP,NCAR,CESM2,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/NCAR/CESM2/piControl/r1i...,,20190320
67858,CMIP,NCAR,CESM2-WACCM,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/NCAR/CESM2-WACCM/piContr...,,20190320
93385,CMIP,CCCma,CanESM5,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/CCCma/CanESM5/piControl/...,,20190429
205553,CMIP,MRI,MRI-ESM2-0,piControl,r1i1p1f1,fx,sftlf,gn,gs://cmip6/CMIP6/CMIP/MRI/MRI-ESM2-0/piControl...,,20190603
205919,CMIP,INM,INM-CM4-8,piControl,r1i1p1f1,fx,sftlf,gr1,gs://cmip6/CMIP6/CMIP/INM/INM-CM4-8/piControl/...,,20190604


In [13]:
#this only needs to be created once
# From Ryan Abernathy's tutorial - Way 2
gcs = gcsfs.GCSFileSystem(token='anon')
fs = gcsfs.GCSFileSystem(token='anon', access='read_only')

# Regridding
new_lats=np.arange(-87,88,2)
new_lons=np.arange(1,359,2)

#time index to assign model times to
new_time_index=pd.date_range(start='1851-01-01',freq='M',periods=3000) #250 years
new_time_index
    
dset_list_sftlf = []
for zstore in df_land.zstore.unique():
    # make a nice concise name from the zstore value
    name = zstore.split('gs://cmip6/CMIP6/')[1].replace('/','.')[:-1]
    name = zstore.split('gs://cmip6/CMIP6/CMIP/')[1].replace('/','.')[:-1]
    name_list = name.split('.')
    name = name_list[1]
    print(name)
    ds = xr.open_zarr(fs.get_mapper(zstore),consolidated=True)
    ds_sftlf_interp = ds.assign(time=new_time_index).interp(lat=new_lats,lon=new_lons)
    ds_sftlf = ds_sftlf_interp.sftlf
    ds_sftlf['name'] = name # Creating a new dimension for each ensemble member
    dset_list_sftlf.append(ds_sftlf)

GFDL-CM4
GISS-E2-1-H
GISS-E2-1-G
IPSL-CM6A-LR
MIROC6
CESM2
CESM2-WACCM
CanESM5
MRI-ESM2-0
INM-CM4-8
MPI-ESM-1-2-HAM
HadGEM3-GC31-LL
MPI-ESM1-2-LR
MPI-ESM1-2-HR
SAM0-UNICON
NorCPM1
E3SM-1-0
ACCESS-CM2
CESM2-FV2
CESM2-WACCM-FV2
E3SM-1-1
ACCESS-ESM1-5
HadGEM3-GC31-MM
EC-Earth3-Veg-LR
EC-Earth3-Veg
TaiESM1
FGOALS-g3
EC-Earth3
AWI-CM-1-1-MR
CMCC-CM2-SR5
EC-Earth3-AerChem


In [14]:
land_control = xr.concat(dset_list_sftlf, dim='name', coords='minimal', compat='override')
land_control.load()

In [15]:
land_control.to_netcdf('/data/keeling/a/mailes2/a/finalized_var_data/CMIP_landmask_31model.nc')

# Testing fluxes

In [26]:
df_t = df.query("activity_id=='CMIP' & table_id == 'Amon' & variable_id == ['rlut','rsut','rsdt'] & experiment_id == 'piControl'")
df_t.drop_duplicates(['variable_id', 'source_id'])
df_t['source_id'].unique()

array(['HadGEM3-GC31-MM', 'GFDL-CM4', 'GFDL-ESM4', 'CNRM-CM6-1',
       'GISS-E2-1-G', 'BCC-CSM2-MR', 'CNRM-ESM2-1', 'MIROC6', 'BCC-ESM1',
       'EC-Earth3-LR', 'MRI-ESM2-0', 'IPSL-CM6A-LR', 'CESM2',
       'CESM2-WACCM', 'UKESM1-0-LL', 'GISS-E2-1-H', 'CanESM5-CanOE',
       'CanESM5', 'INM-CM4-8', 'EC-Earth3-Veg', 'INM-CM5-0',
       'MPI-ESM-1-2-HAM', 'HadGEM3-GC31-LL', 'NESM3', 'MPI-ESM1-2-LR',
       'MPI-ESM1-2-HR', 'EC-Earth3', 'E3SM-1-0', 'CAMS-CSM1-0',
       'MCM-UA-1-0', 'GISS-E2-1-G-CC', 'FGOALS-g3', 'MIROC-ES2L',
       'SAM0-UNICON', 'NorCPM1', 'NorESM1-F', 'AWI-CM-1-1-MR',
       'KACE-1-0-G', 'CNRM-CM6-1-HR', 'FGOALS-f3-L', 'E3SM-1-1',
       'NorESM2-MM', 'ACCESS-CM2', 'IITM-ESM', 'GISS-E2-2-G',
       'CESM2-WACCM-FV2', 'GISS-E2-2-H', 'CESM2-FV2', 'CIESM',
       'ACCESS-ESM1-5', 'E3SM-1-1-ECA', 'TaiESM1', 'AWI-ESM-1-1-LR',
       'EC-Earth3-Veg-LR', 'CAS-ESM2-0', 'CMCC-CM2-SR5',
       'EC-Earth3-AerChem', 'FIO-ESM-2-0', 'NorESM2-LM', 'MIROC-ES2H',
       'ICON-ESM-L

In [None]:
for model in df_t['source_id'].unique():
    df_t