In [4]:
import importlib
import pandas as pd
import xarray as xr
import numpy as np
import datetime as dt
from numpy import nan
from constants import *
import sys
import warnings
import math
import os
import cftime
from glob import glob
from timeit import default_timer as timer # try to measure time
from CASutils import readdata_utils as read
from CASutils import calendar_utils as cal

In [5]:
# define functions
def read_cmip6(filepath,datestart,dateend):
    #open netcdf dataset
    dat = xr.open_mfdataset(filepath, coords="minimal", join="override", decode_times = True, use_cftime=True)  
    #convert calendar to standard, setting missing values as NaNs
    dat = dat.convert_calendar("standard", use_cftime=True, align_on="date",missing=np.nan)
    #interpolate the dataset using cftim_range
    dateidx = xr.cftime_range(datestart,dateend,freq='D',calendar="standard") 
    dat = dat.interp(time=dateidx,method="nearest") 
    #take slice
    dat = dat.sel(time=slice(datestart, dateend))
    dat = xr.decode_cf(dat, use_cftime = True) 
    return dat

def read_slice(filepath, datestart, dateend,latmin,latmax,plev):
    """Read in a time slice from datestart to dateend and calculate the zonal mean.
    Try using datetime64 and if that doesn't work decode times manually.
    Args:
        filepath (string) = path to files e.g., "/path/to/files/*.nc"
        datestart (string) = start date for time slice
        dateend (string) = end date for time slice
    """

    try:
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                 decode_times=True, use_cftime=True).\
                 sel(time=slice(datestart, dateend),lat=slice(latmin,latmax))
        
        if len(plev) == 1:
            dat = dat.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            dat = dat.sel(plev=slice(plev[0]+1,plev[1]-1)) #manual tolerance of 1 because method="nearest" is not implemented for slices

        try:
            datzm=dat.mean(dim="lon")
    
        except NotImplementedError: #load data first because Not implemented by dask: ! might slow down things
            dat.load()
            datzm=dat.mean(dim="lon")

        except:
            # deal with problematic coordinate names
            dat=dat.rename({"longitude":"lon", "latitude":"lat"})
            datzm=dat.mean(dim="lon")

    except:
        print("Something's wierd about the time axis, decoding manually")
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                   decode_times=False)
    
        try:
            datzm=dat.mean(dim="lon")
    
        except:
            # deal with problematic coordinate names
            dat=dat.rename({"longitude":"lon", "latitude":"lat"})
            datzm=dat.mean(dim="lon")
            
        datzm=xr.decode_cf(datzm, use_cftime=True)
    
        datzm=datzm.sel(time=slice(datestart, dateend),lat=slice(latmin,latmax))
        if len(plev) == 1:
            datzm = datzm.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            datzm = datzm.sel(plev=slice(plev[0]+1,plev[1]-1))  #manual tolerance of 1 because method="nearest" is not implemented for slices

        datetimeindex=datzm.indexes['time'].to_datetimeindex()
        datzm['time'] = datetimeindex

    return datzm

def read_field(filepath, datestart, dateend,latmin,latmax,lonmin,lonmax,plev,norm=False):
    """Read in a time slice from datestart to dateend and calculate the zonal mean.
    Try using datetime64 and if that doesn't work decode times manually.
    Args:
        filepath (string) = path to files e.g., "/path/to/files/*.nc"
        datestart (string) = start date for time slice
        dateend (string) = end date for time slice
    """

    try:
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                 decode_times=True, use_cftime=True)
        if norm:
            dat = norm_lon(dat)
        
        dat = dat.sel(time=slice(datestart, dateend),lat=slice(latmin,latmax),lon=slice(lonmin,lonmax))
        
        if len(plev) == 1:
            dat = dat.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            dat = dat.sel(plev=slice(plev[0]+1,plev[1]-1)) #manual tolerance of 1 because method="nearest" is not implemented for slices

    except:
        print("Something's wierd about the time axis, decoding manually")
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                   decode_times=False)
        if norm: 
            dat = norm_lon(dat)
        dat=xr.decode_cf(dat, use_cftime=True)
    
        dat=dat.sel(time=slice(datestart, dateend),lat=slice(latmin,latmax),lon=slice(lonmin,lonmax))
        if len(plev) == 1:
            dat = dat.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            dat = dat.sel(plev=slice(plev[0]+1,plev[1]-1))  #manual tolerance of 1 because method="nearest" is not implemented for slices

        datetimeindex=dat.indexes['time'].to_datetimeindex()
        dat['time'] = datetimeindex

    return dat



def get_lat_lon_res(ds):
    '''Function to obtain the average lat and lon gridspacing from a dataset of a non regular model grid. '''
    lat = ds.coords['lat']
    lon = ds.coords['lon']
    difflat = lat - lat.shift(lat=1)
    latres = difflat.mean().to_numpy()
    difflon = lon - lon.shift(lon=1)
    lonres = difflon.mean().to_numpy()
    return latres, lonres

def def_domain(ncdf,min_lat,max_lat,min_lon,max_lon):
    LatIndexer, LonIndexer = 'lat', 'lon'
    ncdf = ncdf.loc[{LatIndexer: slice(min_lat, max_lat),
                      LonIndexer: slice(min_lon, max_lon)}]
    return ncdf

def norm_lon(ncdf):
    ncdf.coords['lon'] = (ncdf.coords['lon'] + 180) % 360 - 180
    return ncdf.sortby(ncdf.lon)

def get_ONDJFM_day(ncdf, months=[1,2,3,10,11,12],timedim="day"):
    return ncdf.isel({timedim:ncdf[timedim].dt.month.isin(months)})

def area_weighted_avg(ds):
    '''Weighted average over the latitudinal coordinate'''
    lat = ds.lat.values
    AreaWeight = np.cos(lat*np.pi/180)
    #AreaWeight = AreaWeight/np.average(AreaWeight) #Normalize weight
    ds_avg = np.average(ds,axis=1,weights = AreaWeight)
    return ds_avg

In [6]:
#os.chdir('/home/lseverino/MT/scripts')
importlib.reload(read)
importlib.reload(cal)
warnings.filterwarnings('ignore')


In [7]:
## constants
#paths
histpath="/net/atmos/data/cmip6/historical/"
ssp119path="/net/atmos/data/cmip6/ssp119/"
ssp126path="/net/atmos/data/cmip6/ssp126/"
ssp245path="/net/atmos/data/cmip6/ssp245/"
ssp370path="/net/atmos/data/cmip6/ssp370/"
ssp585path="/net/atmos/data/cmip6/ssp585/"

scenlist = ["historical","ssp126","ssp245","ssp370","ssp585"]
pathlist = [histpath,ssp126path,ssp245path,ssp370path,ssp585path]
pathdic = {"historical":histpath,"ssp126":ssp126path,"ssp245":ssp245path,"ssp370":ssp370path,"ssp585":ssp585path}
var="sfcWindmax"
pathout="../cmip6/"+var+'/'

cmip6models=pd.read_csv('../cmip6csvinfo/cmip6csvinfo_timeseries_ssp585_luca_daily.csv')




#select all models with minimum 3 members for the 5 scenarios
mods_3mem_allscen = models_df.where(models_df>=3).dropna(how='any').iloc[:-1,:]
mods_1mem_allscen = models_df.where(models_df>=1).dropna(how='any').iloc[:-1,:]


In [8]:
# populate dict with model names and member names
# force to get the same members as for sfcWindmax
varSWM = 'day/sfcWindmax'
dicscen = dict()
for ind,scen in enumerate(scenlist):
    path = pathlist[ind]
    dicscen[scen] = dict()
    for subdir in os.scandir(path+varSWM):
        #models_rcp85[subdir.name]=[]
        dicscen[scen][subdir.name] = [len(os.listdir(subdir))]
        dicscen[scen][subdir.name].append(os.listdir(subdir))
nmems_hist = dicscen['historical']
nmems_ssp585 = dicscen['ssp585']

In [21]:
#consider ONJDFM: start in D, finishes in M, adjust to have same number of days in both periods
ybegp = 1980 ; monbegp = 10 ; yendp = 2010 ; monendp = 3 ; daybegp = 1 ; dayendp = 30# dates for Past period, only takes 30th 
#ybegf = 2070 ; monbegf = 1 ; yendf = 2099 ; monendf = 12 ; daybegf = 1 ; dayendf = 31# dates for Future period
ybegf = 2070 ; monbegf = 10 ; yendf = 2100 ; monendf = 3 ; daybegf = 1 ; dayendf = 30# otherwise dont have the same length

# total number of months (used for checking)
nmonthsp = (yendp-ybegp-1)*12 + (12-monbegp+1) + monendp
nmonthsf = (yendf-ybegf-1)*12 + (12-monbegf+1) + monendf

# set up date names
dateformat ='%Y-%m-%d'

datebegp=str(ybegp)+"-"+str(monbegp).zfill(2)+"-"+str(daybegp).zfill(2)
dateendp=str(yendp)+"-"+str(monendp).zfill(2)+"-"+str(dayendp).zfill(2)
datebegf=str(ybegf)+"-"+str(monbegf).zfill(2)+"-"+str(daybegf).zfill(2)
dateendf=str(yendf)+"-"+str(monendf).zfill(2)+"-"+str(dayendf).zfill(2)

#set up daterange indexes
#daysidp = pd.date_range(datebegp,dateendp,freq='D')
#daysidf = pd.date_range(datebegf,dateendf,freq='D')

daysidp = xr.cftime_range(datebegp,dateendp,freq='D',calendar='standard')
daysidf = xr.cftime_range(datebegf,dateendf,freq='D',calendar='standard')

#nb of days
ndaysp = len(daysidp)
ndaysf = len(daysidf)

dayrangep = np.arange(1,ndaysp+1,1)
dayrangef = np.arange(1,ndaysf+1,1)

monrangep = np.arange(1,nmonthsp+1,1)
monrangef = np.arange(1,nmonthsf+1,1)

latout=np.linspace(-90,90,73)
lonout=np.linspace(0,357.5,144)
#lonout=np.linspace(0,360,144) # try to remove issues at border

# plevuse=[100000,92500,85000,70000,60000,50000,40000,30000,25000,20000,15000,10000,
#        7000,5000,3000,2000,1000]
# plevuse=[1000]



In [52]:
##select variables
#models

modlist = modlist_ssp585 + modlist_allscen
#modlist = modlist[:2]
modset = "modlist_allmods"
models = pd.Series(modlist)
nmods = len(modlist)
#scenarios
selscen = ["historical",'ssp585']
nscens = len(selscen)

#indices from O2020
#[Tres,var,latmin, latmax,lonmin,lonmax,pmax,pmin]
#ind_dict = {"sfcT":["Amon","tas",np.nan,np.nan,np.nan,np.nan,np.nan],"pol": ["Amon","ta",60,90,np.nan,np.nan,100000,70000],
#            "trop":["Amon","ta",-20,20,np.nan,np.nan,40000,15000],"strat":["Amon","ua",70,90,np.nan,np.nan,25000,3000],
#           "Nino4":["Amon","tas",-5,5,160,210,np.nan,np.nan],"Nino3":["Amon","tas",-5,5,210,270,np.nan],
#            "NAWH":["Amon","tas",35,60,320,350,np.nan,np.nan]}

#indices from ZS2017
#ind_dict = {"sfcT":["Amon","tas",np.nan,np.nan,np.nan,np.nan,np.nan],"pol": ["Amon","ta",60,90,np.nan,np.nan,85000],
#            "trop":["Amon","ta",-30,30,np.nan,np.nan,25000],"strat":["Amon","ua",70,80,np.nan,np.nan,2000]}

#indices from Cu2021
# normalize lon + take winter avg anomalies!!
ind_dict = {"NAO-neg1":["Amon","zg",50,82,-91,30,50000],"NAO-pos1":["Amon","zg",31,49,-63,-5,50000],
            "EA-neg1":["Amon","zg",59,71,63,99,50000],"EA-neg2":["Amon","zg",45,61,-49,-2,50000],
            "EA-pos1":["Amon","zg",9,34,-70,25,50000],"EAWR-neg1":["Amon","zg",37,66,33,68,50000],
            "EAWR-pos1":["Amon","zg",34,50,100,140,50000],"EAWR-pos2":["Amon","zg",44,60,-10,13,50000],
            "SCA-neg1":["Amon","zg",37,56,80,118,50000],"SCA-neg2":["Amon","zg",33,41,-21,9,50000],
            "SCA-pos1":["Amon","zg",63,75,-35,53,50000],
           }

##custom indices
#ind_dict = {"Sbox":["Amon","ta",15,35,280,330,95000,85000],"Nbox":["Amon","ta",40,60,320,350,95000,85000],
#           "TCWV":["Amon","prw",40,60,315,345,np.nan,np.nan]}

ind_list = list(ind_dict.keys())
#ind_list = ind_list[:2]

normalize_lon = True #True when indices given in normalized lon
win_ano = True #take anomalies wrt winter averaged

#define types of indices
sfcfields = ["sfcT","Nino4","Nino3","NAWH","TCWV"]
lonmeanfields = ["pol","trop","strat"]
pslices = ["pol","trop","strat","TCWV","Sbox","Nbox","NAO-neg1","EA-neg1","EA-pos1","EAWR-pos1",
           "SCA-neg1","SCA-pos1","NAO-pos1","EA-neg2","EAWR-neg1","EAWR-pos2","SCA-neg2"]


#select maximum of members to use 
nmems_max = 3
mem_idx = range(nmems_max)
#initiate df to store results
iterrows = [modlist,mem_idx]
row_idx= pd.MultiIndex.from_product(iterrows,names=["model","imem"])
itercols = [ind_list,selscen]

col_idx= pd.MultiIndex.from_product(itercols,names=["indice","scen"])

ind_df = pd.DataFrame(index=row_idx,columns=col_idx)
savename="_".join(["WAN","weather","indices","CU21","sp_avg",modset]+selscen)+".csv"


#initiate ds to save time series
nmonths = 180 #30*6 months of data
monrange = np.arange(1,nmonths+1,1)
ds_dict = {}
for indice in ind_list:
    ds_dict[indice]=xr.DataArray(np.nan*np.ones([nmods ,nmems_max,nscens,nmonths]), coords=[modlist, mem_idx, selscen ,monrange], dims=['model','member','scen','month'],name=indice) 
    
ind_ds = xr.Dataset(ds_dict)

#get member names
memname_df = pd.read_csv('/home/lseverino/MT/metadata/memnames_ssp585_hist_SWM.csv',header=[0,1],index_col=0)

In [53]:
#remote indices: taking the average

nmods = len(models)
memout=np.arange(0,nmem_max)
modout=np.arange(0,nmods)

for index, modname in models.iteritems():
    
    members = memname_df.loc[modname,scen]
    nmems = nmem_max - members.isna().sum()
    
    #iterate over the scenarios
    for scen in selscen:    
            scenpath = pathdic[scen]
            #select date
            if scen == 'historical':
                datebeg = datebegp
                dateend = dateendp
                nmonths = nmonthsp
            else: 
                datebeg = datebegf
                dateend = dateendf
                nmonths = nmonthsf
            
            #iterate over the members
            for imem in range(nmems):
                #select member (same as the one used for damage computation)
                memname = memname_df.loc[modname,(scen,str(imem))]
                
                #iterate over the indices
                for indice in ind_list:
                    #select var, lat and plev
                    infos = ind_dict[indice]
                    dirv = infos[0]
                    var = infos[1]
                    latmin = infos[2]
                    latmax = infos[3]
                    lonmin = infos[4]
                    lonmax = infos[5]
                    plev = infos[6:]
                    
                    
                    #paths
                    scendir = glob(scenpath+"/"+dirv+"/"+var+"/"+modname+"/"+memname+"/*/")
                    scendir = scendir[0]
                    print("Processing "+indice+" "+scen+" for "+modname+" "+memname+"...")
                    
                    if indice in sfcfields:
                        field = read.read_sfc(scendir+"*.nc", datebeg,dateend)
                    
                    elif indice in lonmeanfields:
                        try:
                            field = read_slice(scendir+"*.nc", datebeg,dateend,latmin,latmax,plev)
                
                        except ValueError:
                            #assume first file contains what we need
                            fpath = glob(scenpath+"/"+dirv+"/"+var+"/"+modname+"/"+memname+"/*/*.nc")[0] 
                            print("Look into: "+fpath)
                            field = read_slice(fpath, datebeg,dateend,latmin,latmax,plev)
                    else:
                        try:
                            field = read_field(scendir+"*.nc", datebeg,dateend,latmin,latmax,lonmin,lonmax,plev,norm=normalize_lon)
                
                        except ValueError:
                            #assume first file contains what we need
                            fpath = glob(scenpath+"/"+dirv+"/"+var+"/"+modname+"/"+memname+"/*/*.nc")[0] 
                            print("Look into: "+fpath)
                            field = read_field(fpath, datebeg,dateend,latmin,latmax,lonmin,lonmax,plev,norm=normalize_lon)
                        
                    
                    field = field[var]
            
                    #check that everything good with time axis
                    
                    if (field.time.size !=  nmonths):
                        print("something's wrong, nmonths="+str(nmonths)+" but field has size "+str(field.time.size))
                    
                
                    ## do appropriate means and selection
                    
                    if indice in ["Nino4","Nino3","NAWH","TCWV"]:
                        field = field.sel(lon=slice(lonmin,lonmax))
                          
                    if indice in pslices:
                        field = field.mean(dim="plev")
                    
                    #select ONDJFM only if not global warming
                    if indice!="sfcT":
                        field = get_ONDJFM_day(field,timedim="time")
                        
                    #lon average if not already done
                    if indice not in lonmeanfields:
                        field = field.mean(dim=("lon"))   
                        
                    
                    if win_ano:
                        field = field.groupby('time.month') - field.groupby('time.month').mean()
                    
                    #do approximate area weighted average
                    field_avg = area_weighted_avg(field)
                    
                    ind_ds[indice].loc[dict(model=modname,member=imem,scen=scen,month=monrange)] = field_avg
                    #average over time dimension and write into dataframe
                    ind_df.loc[(modname,imem),(indice,scen)] = field_avg.mean()
                    
                    del field
                        
ind_df = ind_df.astype(np.float64)
ind_df.to_csv("/home/lseverino/MT/circulation/"+savename)

Processing NAO-neg1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing NAO-pos1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-neg1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-neg2 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-pos1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EAWR-neg1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EAWR-pos1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing EAWR-pos2 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing SCA-neg1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing SCA-neg2 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing SCA-pos1 historical for AWI-CM-1-1-MR r1i1p1f1...
Processing NAO-neg1 ssp585 for AWI-CM-1-1-MR r1i1p1f1...
Processing NAO-pos1 ssp585 for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-neg1 ssp585 for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-neg2 ssp585 for AWI-CM-1-1-MR r1i1p1f1...
Processing EA-pos1 ssp585 for AWI-CM-1-1-MR r1i1p1f1...
Processing EAWR-neg1 ssp585 for AWI-CM-1-1-MR r

In [58]:
savename="_".join(["mon_ts","WAN","weather","indices","CU21","sp_avg",modset]+selscen)+".nc"
ind_ds.to_netcdf("/home/lseverino/MT/circulation/"+savename)

In [59]:
ind_ds2 = xr.open_dataset("/home/lseverino/MT/circulation/"+savename)

In [54]:
idx = pd.IndexSlice
diff = ind_df.loc[:,idx[:,"ssp585"]] - ind_df.loc[:,idx[:,"historical"]].values
diff_mean = diff.groupby("model").mean()

In [55]:
diff_mean

indice,NAO-neg1,NAO-pos1,EA-neg1,EA-neg2,EA-pos1,EAWR-neg1,EAWR-pos1,EAWR-pos2,SCA-neg1,SCA-neg2,SCA-pos1
scen,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585,ssp585
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
ACCESS-CM2,1.536563e-05,-6.1e-05,6.1e-05,2.262584e-06,-2.796411e-05,-3.157212e-05,2.9e-05,6e-06,-3.167973e-05,-9.2e-05,3e-05
ACCESS-ESM1-5,1.934709e-05,8.1e-05,4e-06,5.404699e-05,-1.016839e-05,-4.819767e-05,1.9e-05,-1.9e-05,-3.62944e-05,-3.2e-05,-3.7e-05
AWI-CM-1-1-MR,-4.749135e-05,1.4e-05,8e-06,-3.838366e-05,2.976814e-05,-1.678305e-05,8e-06,-4.1e-05,6.835006e-05,1.4e-05,-2.2e-05
BCC-CSM2-MR,1.016339e-05,4.9e-05,-2.5e-05,-1.556587e-05,-6.710615e-05,3.424309e-06,3.6e-05,0.000116,-2.445987e-08,-0.000132,0.000152
CMCC-CM2-SR5,4.306667e-05,2e-06,1e-05,-6.134683e-05,-7.052908e-08,2.867971e-08,-8e-05,1.3e-05,-3.271589e-05,5.9e-05,1.3e-05
CMCC-ESM2,4.201379e-05,-2e-05,-2.8e-05,1.07622e-07,4.040627e-05,-2.668466e-05,9e-06,4.9e-05,-2.231943e-05,2.1e-05,-5.1e-05
CNRM-CM6-1,-2.814682e-05,-9e-06,-7.2e-05,4.944561e-05,1.35416e-05,-3.891374e-05,-3.5e-05,2e-05,2.005468e-05,-4.6e-05,1.1e-05
CNRM-CM6-1-HR,5.588299e-05,-7e-05,-1.6e-05,0.0001100393,-5.857243e-05,4.172158e-05,-9.3e-05,4e-06,-6.645177e-05,2.1e-05,-2.2e-05
CNRM-ESM2-1,1.640421e-05,9.1e-05,-0.000106,-5.07778e-05,-8.86652e-06,-5.862509e-06,-3e-05,5e-06,-5.967988e-05,-6.3e-05,2.7e-05
CanESM5,4.628776e-05,0.000126,-3e-05,-3.12258e-05,4.379378e-05,4.932329e-05,5.4e-05,-5e-06,4.332668e-05,5.9e-05,-0.000109


In [56]:
diff_mean.mean()

indice     scen  
NAO-neg1   ssp585    2.074261e-06
NAO-pos1   ssp585    5.048131e-06
EA-neg1    ssp585   -6.660787e-06
EA-neg2    ssp585    4.404369e-07
EA-pos1    ssp585   -1.174099e-06
EAWR-neg1  ssp585   -9.869429e-06
EAWR-pos1  ssp585   -9.050506e-06
EAWR-pos2  ssp585    5.143737e-06
SCA-neg1   ssp585   -8.106378e-06
SCA-neg2   ssp585   -4.938508e-06
SCA-pos1   ssp585   -5.633489e-06
dtype: float64

In [78]:
field_avg.mean()

20.36402635997781