In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
import zarr
import gcsfs
from datetime import timedelta 
import metpy.calc as mpcalc
import cftime
import warnings
import climlab.utils.thermo as climlab
import glob

xr.set_options(display_style='html')
%matplotlib inline
%config InlineBackend.figure_format = 'retina' 
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('https://storage.googleapis.com/cmip6/cmip6-zarr-consolidated-stores.csv')

df_list = []

# Full list of models and their shorter forms
modelfilename_list = ['CCSM4','CanESM2','CanESM5','E3SM-1-0','GFDL-CM4','HadGEM2','HadGEM3-GC31-LL','IPSL-CM6A-LR','MIROC-ES2L','MIROC-ESM',
                     'MIROC5','MIROC6','MPI-ESM','MRI-CGCM3','MRI-ESM2-0','UKESM1-0-LL']

modelname_list=['ccsm4','canam4','canesm5','e3sm','gfdl','hg2','hg3','ipsl','mies2l','miesm','mi5','mi6','mpi','mrcgcm','mresm','ukesm']
modelvar_list = ['sfcWind']
# Hardcoded cmip6
cmip6modelfilename_list = ['CanESM5','E3SM-1-0','GFDL-CM4','HadGEM3-GC31-LL','IPSL-CM6A-LR','MIROC-ES2L','MIROC6','MRI-ESM2-0','UKESM1-0-LL']
member_id = ['r1i1p1f1','r1i1p1f2','r1i1p1f3','r1i1p1f4']

cmip5modelfilename_list = []

# Make cmip5 list
for modelname in modelfilename_list:
    if modelname not in cmip6modelfilename_list:
        cmip5modelfilename_list.append(modelname)

# Define paths
root_path='/data/keeling/a/rytam2/a/4xCO2/'
kernel_path='/data/keeling/a/rytam2/a/kernels/gcms/*CCSM4*.nc'

# Files to select 
cmip6 = df[(df['source_id'].isin(cmip6modelfilename_list)) & (df['member_id'].isin(member_id)) & (df['variable_id'].isin(modelvar_list)) & \
   (df['experiment_id'] == 'abrupt-4xCO2') & (df['table_id'] == 'Amon') ]


# List of filepaths to get from cloud
zstore = cmip6.zstore.values

# this only needs to be created once
gcs = gcsfs.GCSFileSystem(token='anon')

In [3]:
dict_ds_sfcWind = {}


for path in zstore:
        # Open each file 
        ds = xr.open_zarr(path, consolidated=True)
        
        modelname = ds.attrs['parent_source_id']
        print(modelname)

        # Select variable from dataset 
        ds = ds.sfcWind
        

        # Change lon coords
        ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon')
        ds.lat.attrs['units'] = 'degrees' 
        ds.lon.attrs['units'] = 'degrees' 

        # Unify time coords to match the limited data in HadGEM2 (185912 - 200511 -- 146 years)
        ds = ds.isel(time=slice(0,150*12))
        
        #interp grids (selected CCSM4 for coordinates)
        kernel = xr.open_mfdataset(kernel_path)
        kernel_ds = kernel.dRdxi.isel(i=0).expand_dims({'time':ds.time}, axis=2)\
                    .assign_coords({'latitude':kernel.lat,'longitude':kernel.lon})\
                    .rename({'latitude':'lat','longitude':'lon'})
            
        # Interp variables and assign to dict 
        ds = ds.interp_like(kernel_ds)
        dict_ds_sfcWind[modelname] = ds

        time_ds=xr.open_zarr(zstore[2], consolidated=True)
        index=(time_ds.indexes['time'])[0:150*12]
        ds['time']=index


ws_cmip6 = xr.Dataset(dict_ds_sfcWind)

GFDL-CM4
IPSL-CM6A-LR
MRI-ESM2-0
UKESM1-0-LL
CanESM5
HadGEM3-GC31-LL
MIROC6
E3SM-1-0
MIROC-ES2L


# CMIP5

In [4]:
dict_ds_sfcWind_cmip5 = {}
dict_ds_uas = {}
dict_ds_vas = {}

modelvar_list = ['sfcWind','uas','vas']
for modelname in cmip5modelfilename_list:

    for var in modelvar_list:
        # Get all files 
        filepath = glob.glob(root_path+'%s*%s*.nc'%(var,modelname))
        
        # If-loop Combine all .nc files as one dataset 
        if len(filepath)==1:
            ds = xr.open_mfdataset(filepath);
        elif len(filepath) > 1: 
            ds = xr.open_mfdataset(filepath,combine="by_coords");
        elif len(filepath)==0:
            if var == 'uas':
                filepath = glob.glob(root_path+'%s*%s*.nc'%('ua',modelname))
                if len(filepath)==1:
                    ds = xr.open_mfdataset(filepath);
                elif len(filepath) > 1: 
                    ds = xr.open_mfdataset(filepath,combine="by_coords");
                ds = xr.open_mfdataset(filepath,combine="by_coords");
            elif var == 'vas':
                filepath = glob.glob(root_path+'%s*%s*.nc'%('va',modelname))
                if len(filepath)==1:
                    ds = xr.open_mfdataset(filepath);
                elif len(filepath) > 1: 
                    ds = xr.open_mfdataset(filepath,combine="by_coords");
        print(modelname,len(filepath),'\n',filepath)
                    
                    
        # Select variable from dataset 
        if var == 'sfcWind':    
            if modelname == 'CCSM4': #calculate ccsm4 WS with ua/va 
                continue
            else:
                ds = ds.sfcWind
        elif var == 'uas':
            if modelname == 'CCSM4':
                ds = ds.ua.sel(plev=1e5)
            else: 
                ds = ds.uas
        elif var == 'vas':
            if modelname == 'CCSM4':
                ds = ds.va.sel(plev=1e5)
            else: 
                ds = ds.vas
        
        # Change lon coords
        ds = ds.assign_coords(lon=(((ds.lon + 180) % 360) - 180)).sortby('lon')
        ds.lat.attrs['units'] = 'degrees' 
        ds.lon.attrs['units'] = 'degrees' 
      
       # Unify time coords to start in year 1 and end in 150
        ds = ds.isel(time=slice(0,150*12))
            
        print(modelname, np.shape(ds),'\n')
        
        #interp grids (selected CCSM4 for coordinates)
        kernel_ds = kernel.dRdxi.isel(i=0).expand_dims({'time':ds.indexes['time'][0:150*12]}, axis=2)\
            .assign_coords({'latitude':kernel.lat,'longitude':kernel.lon})\
            .rename({'latitude':'lat','longitude':'lon'})
            
        # Interp variables and assign to dict 
        if var == 'sfcWind':
            ds = ds.interp_like(kernel_ds)
            dict_ds_sfcWind_cmip5[modelname] = ds
        elif var == 'uas':
            ds = ds.interp_like(kernel_ds)
            dict_ds_uas[modelname] = ds
        elif var == 'vas':
            ds = ds.interp_like(kernel_ds)
            dict_ds_vas[modelname] = ds
       
    
        # Unify time coordinates as datetime64
        index=time_ds.indexes['time'][0:150*12]
        ds['time']=index
        
ws_cmip5 = xr.Dataset(dict_ds_sfcWind_cmip5)

CCSM4 0 
 []
CCSM4 3 
 ['/data/keeling/a/rytam2/a/4xCO2/ua_Amon_CCSM4_abrupt4xCO2_r1i1p1_185001-189912.nc', '/data/keeling/a/rytam2/a/4xCO2/ua_Amon_CCSM4_abrupt4xCO2_r1i1p1_190001-194912.nc', '/data/keeling/a/rytam2/a/4xCO2/ua_Amon_CCSM4_abrupt4xCO2_r1i1p1_195001-200012.nc']
CCSM4 (1800, 192, 288) 

CCSM4 3 
 ['/data/keeling/a/rytam2/a/4xCO2/va_Amon_CCSM4_abrupt4xCO2_r1i1p1_185001-189912.nc', '/data/keeling/a/rytam2/a/4xCO2/va_Amon_CCSM4_abrupt4xCO2_r1i1p1_190001-194912.nc', '/data/keeling/a/rytam2/a/4xCO2/va_Amon_CCSM4_abrupt4xCO2_r1i1p1_195001-200012.nc']
CCSM4 (1800, 192, 288) 

CanESM2 1 
 ['/data/keeling/a/rytam2/a/4xCO2/sfcWind_Amon_CanESM2_abrupt4xCO2_r1i1p1_185001-199912.nc']
CanESM2 (1800, 64, 128) 

CanESM2 1 
 ['/data/keeling/a/rytam2/a/4xCO2/uas_Amon_CanESM2_abrupt4xCO2_r1i1p1_185001-199912.nc']
CanESM2 (1800, 64, 128) 

CanESM2 1 
 ['/data/keeling/a/rytam2/a/4xCO2/vas_Amon_CanESM2_abrupt4xCO2_r1i1p1_185001-199912.nc']
CanESM2 (1800, 64, 128) 

HadGEM2 8 
 ['/data/keeling/a

In [5]:
# Calculate ws from ua/va for CCSM4 
#Metpy has Pint quantity units; chunk removes that 


uas = xr.Dataset(dict_ds_uas)
vas = xr.Dataset(dict_ds_vas)

ws_cmip5['CCSM4'] = mpcalc.wind_speed(uas.CCSM4,vas.CCSM4).rename('CCSM4').chunk()
ws_cmip5=ws_cmip5.drop(['plev','height'])
ws_cmip5

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,18 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 17.80 MiB Shape (1800, 36, 72) (1800, 36, 72) Count 18 Tasks 1 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,18 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,2.97 MiB
Shape,"(1800, 36, 72)","(300, 36, 72)"
Count,80 Tasks,8 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 2.97 MiB Shape (1800, 36, 72) (300, 36, 72) Count 80 Tasks 8 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,2.97 MiB
Shape,"(1800, 36, 72)","(300, 36, 72)"
Count,80 Tasks,8 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,18 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 17.80 MiB Shape (1800, 36, 72) (1800, 36, 72) Count 18 Tasks 1 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,18 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,19 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 17.80 MiB Shape (1800, 36, 72) (1800, 36, 72) Count 19 Tasks 1 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,19 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,24 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 17.80 MiB Shape (1800, 36, 72) (1800, 36, 72) Count 24 Tasks 1 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,24 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,24 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 17.80 MiB Shape (1800, 36, 72) (1800, 36, 72) Count 24 Tasks 1 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,17.80 MiB
Shape,"(1800, 36, 72)","(1800, 36, 72)"
Count,24 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,5.93 MiB
Shape,"(1800, 36, 72)","(600, 36, 72)"
Count,85 Tasks,3 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.80 MiB 5.93 MiB Shape (1800, 36, 72) (600, 36, 72) Count 85 Tasks 3 Chunks Type float32 numpy.ndarray",72  36  1800,

Unnamed: 0,Array,Chunk
Bytes,17.80 MiB,5.93 MiB
Shape,"(1800, 36, 72)","(600, 36, 72)"
Count,85 Tasks,3 Chunks
Type,float32,numpy.ndarray


In [6]:
# Merge the two Xarrays together 

ws = xr.merge([ws_cmip5,ws_cmip6])

In [7]:
# Slice time period and save as nc files 
path='/data/keeling/a/rytam2/ccf_model_spread/data/preprocessed/'

ws.to_netcdf(path+'ws_4xCO2_updated_CMIP5&6_Y1-150.nc')