In [310]:
from matplotlib import pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import pandas as pd
import xarray as xr
from morpher.config import parse
import zarr
import fsspec
import xclim as xc


%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = 12, 6

In [255]:
models = pd.read_csv(parse('modelsources'))
models = models[models['in_ensemble']=='Yes']['source_id'].tolist()

['ACCESS-CM2', 'BCC-CSM2-MR', 'CanESM5', 'GFDL-CM4', 'INM-CM5-0', 'TaiESM1']

In [58]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
df.sample(6)

Unnamed: 0,activity_id,institution_id,source_id,experiment_id,member_id,table_id,variable_id,grid_label,zstore,dcpp_init_year,version
123883,ScenarioMIP,CCCma,CanESM5,ssp126,r21i1p2f1,Omon,spco2abio,gn,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp...,,20190429
105678,ScenarioMIP,CCCma,CanESM5,ssp585,r5i1p2f1,Amon,rlut,gn,gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp...,,20190429
156535,DCPP,CCCma,CanESM5,dcppA-hindcast,r6i1p2f1,Amon,hus,gn,gs://cmip6/DCPP/CCCma/CanESM5/dcppA-hindcast/s...,2005.0,20190429
185918,DCPP,CCCma,CanESM5,dcppA-hindcast,r11i1p2f1,Amon,huss,gn,gs://cmip6/DCPP/CCCma/CanESM5/dcppA-hindcast/s...,1974.0,20190429
241468,ScenarioMIP,DKRZ,MPI-ESM1-2-HR,ssp126,r1i1p1f1,Oyr,ppdiaz,gn,gs://cmip6/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-H...,,20190710
61410,ScenarioMIP,BCC,BCC-CSM2-MR,ssp585,r1i1p1f1,Lmon,tran,gn,gs://cmip6/CMIP6/ScenarioMIP/BCC/BCC-CSM2-MR/s...,,20190308


In [59]:
df['activity_id'].unique()

array(['HighResMIP', 'CMIP', 'CFMIP', 'ScenarioMIP', 'RFMIP',
       'AerChemMIP', 'LUMIP', 'DAMIP', 'FAFMIP', 'OMIP', 'GMMIP', 'C4MIP',
       'CDRMIP', 'PMIP', 'LS3MIP', 'DCPP', 'PAMIP', 'ISMIP6'],
      dtype=object)

In [None]:
df_ta = df.query(f"activity_id=='ScenarioMIP' & table_id == 'Amon' & variable_id == 'tas' & experiment_id == 'ssp126' & member_id == 'r1i1p1f1'")
scen_links = df_ta[df_ta['source_id'].isin(models)]
scen_links

In [254]:
insts = scen_links['institution_id'].tolist()
insts

['BCC', 'CCCma', 'INM', 'AS-RCEC', 'CSIRO-ARCCSS']

In [69]:
ds_dict = dict()

for inst in insts:
    print(inst)
    df_ta = scen_links.query(f"institution_id == '{inst}'")

    # get the path to a specific zarr store (the first one from the dataframe above)
    zstore = df_ta.zstore.values[-1]
    print(zstore)

    # create a mutable-mapping-style interface to the store
    mapper = fsspec.get_mapper(zstore)

    # open it using xarray and zarr
    ds = xr.open_zarr(mapper, consolidated=True)
    ds_dict[inst] = ds

    print(f"Lat min is {round(ds.lat.min().values.tolist(),2)} and \n lat max is {round(ds.lat.max().values.tolist(),2)}")
    print(f"Lon min is {ds.lon.min().values.tolist()} and \n lon max is {ds.lon.max().values.tolist()}")

BCC
gs://cmip6/CMIP6/ScenarioMIP/BCC/BCC-CSM2-MR/ssp126/r1i1p1f1/Amon/tas/gn/v20190314/




Lat min is -89.14 and 
 lat max is 89.14
Lon min is 0.0 and 
 lon max is 358.875
CCCma
gs://cmip6/CMIP6/ScenarioMIP/CCCma/CanESM5/ssp126/r1i1p1f1/Amon/tas/gn/v20190429/
Lat min is -87.86 and 
 lat max is 87.86
Lon min is 0.0 and 
 lon max is 357.1875
INM
gs://cmip6/CMIP6/ScenarioMIP/INM/INM-CM5-0/ssp126/r1i1p1f1/Amon/tas/gr1/v20190619/
Lat min is -89.25 and 
 lat max is 89.25
Lon min is 0.0 and 
 lon max is 358.0
AS-RCEC
gs://cmip6/CMIP6/ScenarioMIP/AS-RCEC/TaiESM1/ssp126/r1i1p1f1/Amon/tas/gn/v20201124/
Lat min is -90.0 and 
 lat max is 90.0
Lon min is 0.0 and 
 lon max is 358.75
CSIRO-ARCCSS
gs://cmip6/CMIP6/ScenarioMIP/CSIRO-ARCCSS/ACCESS-CM2/ssp126/r1i1p1f1/Amon/tas/gn/v20210317/
Lat min is -89.38 and 
 lat max is 89.38
Lon min is 0.9375 and 
 lon max is 359.0625


  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime)
  return array(a, dtype, copy=False, order=order)


In [108]:
a = ds_dict[insts[0]].tas
b = ds_dict[insts[1]].tas

all_models = xr.concat([a.assign_coords({'models': insts[0]}),
                        b.assign_coords({'models': insts[1]})],dim='models')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]


In [133]:
mod = insts[5]
ds_dict[mod].tas.assign_coords({'models': mod}).time

IndexError: list index out of range

In [161]:
arrays = []

for n in insts:
    if n=='CSIRO-ARCCSS':
        pass
    else:
        print(n)
        da = ds_dict[n].tas.assign_coords({'models': n})
        da = da.sel(time=slice('2015', '2100'))
        arrays.append(da)

BCC
CCCma
INM
AS-RCEC


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': Fa

Unnamed: 0,Array,Chunk
Bytes,6.33 GiB,640.69 MiB
Shape,"(4, 1032, 536, 768)","(1, 408, 536, 768)"
Count,209 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 6.33 GiB 640.69 MiB Shape (4, 1032, 536, 768) (1, 408, 536, 768) Count 209 Tasks 24 Chunks Type float32 numpy.ndarray",4  1  768  536  1032,

Unnamed: 0,Array,Chunk
Bytes,6.33 GiB,640.69 MiB
Shape,"(4, 1032, 536, 768)","(1, 408, 536, 768)"
Count,209 Tasks,24 Chunks
Type,float32,numpy.ndarray


In [218]:
startyear = parse('yearranges').split('|')[0].split(',')[0]
endyear = parse('yearranges').split('|')[0].split(',')[1]
mean_data = arrays[0].sel(time=slice(startyear, endyear)).mean(dim='time')

'BCC'

In [300]:
df_hist = df.query(f"experiment_id == 'historical' & table_id == 'Amon' & variable_id == 'tas' & member_id == 'r1i1p1f1'")
model_match = mean_data.models.values.tolist()
model_match = [model for model in models if model_match in model]
hist_link = df_hist[df_hist['source_id']==model_match[0]]


# get the path to a specific zarr store (the first one from the dataframe above)
zstore = hist_link.zstore.values[-1]
print(zstore)

# create a mutable-mapping-style interface to the store
mapper = fsspec.get_mapper(zstore)

# open it using xarray and zarr
hist_ds = xr.open_zarr(mapper, consolidated=True)

gs://cmip6/CMIP6/CMIP/BCC/BCC-CSM2-MR/historical/r1i1p1f1/Amon/tas/gn/v20181126/


In [308]:
hist_ds_short = hist_ds.sel(time=slice(parse('baselinestart'), parse('baselineend'))).tas.mean(dim='time')
difference = mean_data - hist_ds_short
difference

Unnamed: 0,Array,Chunk
Bytes,200.00 kiB,200.00 kiB
Shape,"(160, 320)","(160, 320)"
Count,20 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 200.00 kiB 200.00 kiB Shape (160, 320) (160, 320) Count 20 Tasks 1 Chunks Type float32 numpy.ndarray",320  160,

Unnamed: 0,Array,Chunk
Bytes,200.00 kiB,200.00 kiB
Shape,"(160, 320)","(160, 320)"
Count,20 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [311]:
map_proj = ccrs.LambertConformal(central_longitude=-95, central_latitude=45)

difference.squeeze().plot()
plt.scatter(float(parse('longitude')),
            float(parse('latitude')),
            facecolors='none',
            edgecolors='black',
            marker='o',
            transform=ccrs.PlateCarree(),
            s=6,
            linewidths=0.5)

ValueError: Axes should be an instance of GeoAxes, got <class 'matplotlib.axes._subplots.AxesSubplot'>

ValueError: Axes should be an instance of GeoAxes, got <class 'matplotlib.axes._subplots.AxesSubplot'>

<Figure size 864x432 with 2 Axes>