# Calculate Climatology for CESM1 S2S Runs

### CESM1 data is located in:


### Climatology is calculated following the SubX protocol. It is output to a file:


### Function for calculating climatology is located in:
`clim_utils.py`

In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob

from clim_utils import daily_climo_subx

### Pre-processing function called by `open_mfdataset` to handle files with missing data or incorrect times

In [3]:
# File Path information
#model='70Lwaccm6'
#path='/glade/p/nsc/ncgd0042/ssfcst/'+model+'70Lwaccm6'/p1/'
#dstr='00z_d01_d45'

model='CESM1_30LCAM5'
path='/glade/scratch/jrichter/CPC_DATA/'+model
dstr='00z_d01_d45'

# Variable to be processed
varname='tas_2m'

# Years to be gotton
sdate='19990106' # This was set by hand as the first init date for the first year
edate='20151231'
yrs_list=np.arange(1999,2016)

# Initialization months
mnums=['01','02','12']
mstrs=['jan','feb','dec']


# Ensemble members
enss=['00','01','02','03','04'] 

### Read in each ensemble member for hindcasts for 2m Temperature
* `init` is concat dimension for `open_mfdataset`
* Each month is read in individually, then all are combined by the `init` dimension
* `ens` for each `init` is read in, then the data are combined by the `ens` dimension

In [4]:
# Create empty list to append data for each ensemble member
fcst_ds_ens=[]

# Loop over ensembles
for iens in enss:

    # Create empty list to append data for each month
    fcst_ds_months=[]
    
    # Get list of files and read in data for each month for this ensemble member
    for mnum,mstr in zip(mnums,mstrs):
        
        # Get all the filenames for this month for all years
        fnames = [f'{path}/{varname}/{year}/{mnum}/{varname}_{model}_*{mstr}{year}_{dstr}_m{iens}.nc' for year in yrs_list]       
        print(fnames)
        
        # Create list of all filenames for this ensemble member, month, and all years
        files1=[]
        for files in fnames:
            f2=glob.glob(files)
            for f in f2:
                files1.append(f)
       
        # Read in data concatentating over the init dimension
        fcst_ds_tmp=xr.open_mfdataset(files1,parallel=True,combine='nested',
                                      decode_times=False,concat_dim='init')

        # Create dates for the init dimension and assign them
        init_dates_all=pd.date_range(start=sdate,end=edate,freq='7D')
        djf_dates=init_dates_all[(init_dates_all.month==int(mnum))]
        fcst_ds_tmp['init']=djf_dates

        # Append the latest month to the list
        fcst_ds_months.append(fcst_ds_tmp)
        
    
    # Combine the months into the init dimension
    fcst_ds_months = xr.combine_nested(fcst_ds_months, concat_dim=['init'])
    
    # Append this ensenble member
    fcst_ds_ens.append(fcst_ds_months)
    
    
fcst_ds_ens   

['/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/1999/01/pr_CESM1_30LCAM5_*jan1999_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2000/01/pr_CESM1_30LCAM5_*jan2000_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2001/01/pr_CESM1_30LCAM5_*jan2001_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2002/01/pr_CESM1_30LCAM5_*jan2002_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2003/01/pr_CESM1_30LCAM5_*jan2003_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2004/01/pr_CESM1_30LCAM5_*jan2004_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2005/01/pr_CESM1_30LCAM5_*jan2005_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2006/01/pr_CESM1_30LCAM5_*jan2006_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCAM5/pr/2007/01/pr_CESM1_30LCAM5_*jan2007_00z_d01_d45_m00.nc', '/glade/scratch/jrichter/CPC_DATA/CESM1_30LCA

OSError: no files to open

In [None]:
# Combine data over ensemble dimension
fcst_ds_ens = xr.combine_nested(fcst_ds_ens, concat_dim=['ens'])
fcst_ds_ens['ens']=np.arange(0,len(enss))

fcst_ds_ens

### Calculate the Ensemble Mean

In [None]:
fcst_ds=fcst_ds_ens.mean(dim='ens')

In [None]:
fcst_ds

### Determine leads and set them as integers for the lead dimension

In [None]:
nt=fcst_ds['TIME'].size
leads=np.arange(0,nt)
fcst_ds=fcst_ds.rename({'TIME':'lead','LAT':'lat','LON':'lon','TAS':varname})
fcst_ds['lead']=leads

### Calculate the climatology and save to file

In [None]:
climo_out_path='/glade/scratch/kpegion/ESPWG/data/'+model+'/hcst/climo/'
cfname=climo_out_path+'climo_'+varname+'.nc'
print(cfname)
climo=daily_climo_subx(fcst_ds[varname],varname,fname=cfname)