In [265]:
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
from tqdm.notebook import tqdm

xr.set_options(display_style='html')

from dask_gateway import Gateway
from dask.distributed import Client

gateway = Gateway()
cluster = gateway.new_cluster()
cluster.adapt(minimum=1, maximum=20)
client = Client(cluster)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

In [266]:
def load_data(df):
    """
    Load data for given source
    """
    ds = {}
    for source_id in tqdm(df['source_id']):
        vad = df[(df.source_id == source_id)].zstore.values[0]
    
        gcs = gcsfs.GCSFileSystem(token='anon')
        if any(df.variable_id == 'tas'):
            ds[source_id] = xr.open_zarr(gcs.get_mapper(vad), consolidated=True).tas
        elif any(df.variable_id.str.contains('tasmin')):
            ds[source_id] = xr.open_zarr(gcs.get_mapper(vad), consolidated=True).tasmin
        elif any(df.variable_id.str.contains('tasmax')):
            ds[source_id] = xr.open_zarr(gcs.get_mapper(vad), consolidated=True).tasmax
        else:
            ds[source_id] = xr.open_zarr(gcs.get_mapper(vad), consolidated=True).pr
    return ds

In [267]:
# Now we need to limit the spatial domain
# PNW domain: 124.5°W–110.5°W, 41.5°–49.5°N
# Expanded domain: 165°W–100°W, 20°N–60°N

def meanannualmean(df,y):
    ds = {}
    for source_id in tqdm(df.keys()):
        try:
            ds[source_id] = df[source_id].sel(time=slice(y[0],y[1]),lat=slice(41.5,49.5),lon=slice(235,249)).mean() 
        except ValueError:
            ds[source_id] = df[source_id].sel(time=slice(y[0],y[1]),latitude=slice(41.5,49.5),longitude=slice(235,249)).mean()
        except:
            pass
    return ds

In [304]:
def meandiurnalrange(max,min,y):
    ds = {}
    for source_id in tqdm(max.keys()):
        mx = max[source_id].sel(time=slice(y[0],y[1]),lat=slice(41.5,49.5),lon=slice(235,249))
        mx = mx.groupby('time.season').mean(['time','lat','lon'])
        mn = min[source_id].sel(time=slice(y[0],y[1]),lat=slice(41.5,49.5),lon=slice(235,249))
        mn = mn.groupby('time.season').mean(['time','lat','lon'])
        ds[source_id] = mx-mn
    return ds

In [309]:
def main():
    
#==================================================
    # Gather data
    df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')
    
    # Query all historical runs for needed variables
    tas = df.query("activity_id=='CMIP' & experiment_id=='historical' & member_id=='r1i1p1f1' & table_id=='Amon' & variable_id=='tas' & grid_label=='gn'")
    tasmin = df.query("activity_id=='CMIP' & experiment_id=='historical' & member_id=='r1i1p1f1' & table_id=='Amon' & variable_id=='tasmin' & grid_label=='gn'")
    tasmax = df.query("activity_id=='CMIP' & experiment_id=='historical' & member_id=='r1i1p1f1' & table_id=='Amon' & variable_id=='tasmax' & grid_label=='gn'")
    pr = df.query("activity_id=='CMIP' & experiment_id=='historical' & member_id=='r1i1p1f1' & table_id=='Amon' & variable_id=='pr' & grid_label=='gn'")
#==================================================
    print('Loading All Data...')
    
    tas = load_data(tas)
    tasmax = load_data(tasmax)
    tasmin = load_data(tasmin)
    pr = load_data(pr)
#==================================================
    print('Computing mean annual temperature and precipitation...')
    
    annmean_tas = meanannualmean(tas,['1960-01','1999-12'])
    annmean_pr = meanannualmean(pr,['1960-01','1999-12'])
#==================================================
    print ('Computing mean diurnal temperature ranges for winter and summer seasons...')
    
    diurnal_tas = meandiurnalrange(tasmax,tasmin,['1950-01','1999-12']) # Something seems to be up with CMCC-CM2-SR5
#==================================================

In [310]:
if __name__ == "__main__":
    main()

Loading All Data...


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=33.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=33.0), HTML(value='')))


Computing mean annual temperature and precipitation...


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=33.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=33.0), HTML(value='')))


Computing mean diurnal temperature ranges for winter and summer seasons... 


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=20.0), HTML(value='')))

AWI-CM-1-1-MR [25.014816 22.891632 24.483307 25.769714]
AWI-ESM-1-1-LR [ 6.815674 11.822601  8.751556  9.224182]
BCC-CSM2-MR [ 6.3529053 14.75769    8.156433   8.965729 ]
BCC-ESM1 [ 5.4093018 14.092377   6.858673   8.404236 ]
CAS-ESM2-0 [ 7.8840027 13.730682   9.777191   9.279358 ]
FGOALS-g3 [ 7.115753 14.666901 10.336578  9.19928 ]
CanESM5 [ 7.7059326 11.821869   8.933441   9.598206 ]
CMCC-CM2-SR5 [0. 0. 0. 0.]
ACCESS-CM2 [ 6.024414 11.626038  7.935028  8.158447]
ACCESS-ESM1-5 [ 4.4771423 12.608459   7.3803406  8.047546 ]
MPI-ESM-1-2-HAM [ 6.694031 11.354645  8.464081  9.406494]
MIROC6 [ 4.6099854 13.906647   9.700226   8.283905 ]
MPI-ESM1-2-HR [ 7.119934 11.803253  8.964935  9.471466]
MPI-ESM1-2-LR [ 6.9398804 11.248779   8.654175   9.203857 ]
MRI-ESM2-0 [4.815735 9.255249 7.317993 6.811676]
GISS-E2-1-G-CC [ 5.950012 13.603058  8.824463  9.209473]
GISS-E2-1-G [ 6.097168 13.728912  9.185852  9.318024]
GISS-E2-1-H [ 6.0700073 12.888977   9.333801   8.794861 ]
NESM3 [1.3609314 2.5393677