# 1.1: Process SMYLE

In [None]:
%load_ext autoreload
%autoreload 2
import xarray as xr 
import numpy as np  
import cftime
import copy
import scipy.stats
from scipy import signal
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
%matplotlib inline

from SMYLEutils import calendar_utils as cal
from SMYLEutils import stat_utils as stat
from SMYLEutils import mapplot_utils as maps
from SMYLEutils import colorbar_utils as cbars
from SMYLEutils import io_utils as io

In [None]:
import dask
from dask.distributed import wait
dask.__version__

## Create Dask

In [None]:
cluster.close()
client.close()

In [None]:
2

In [None]:
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=3,
        memory='300GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=10GB',
        project='p93300070',
        walltime='05:00:00',
        interface='mgt',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
    })
    client = Client(cluster)
    return cluster, client

In [None]:
cluster, client = get_ClusterClient()
cluster.scale(30)

In [None]:
cluster

## Read in POP monthly data
- The data loading steps can take ~30 minutes
- Chosen field is returned as a dask array with leading dimensions of Y (initialization year), M (ensemble member), and L (lead month).
- "time" which gives prediction verification time (centered time for a given season) is also dimensioned with (Y,L)


In [None]:
def preprocessor(ds0,nlead,field):
    """ This preprocessor is applied on an individual timeseries file basis. Edit this appropriately
    for a your analysis to speed up processing. 
    """
    ds0 = cal.time_set_mid(ds0,'time')
    
    # # select the depth of your choosing (dimension name varies with variable)
    d0 = ds0[field].sel(z_t=0,method='nearest').isel(time=slice(0, nlead))
    # d0 = ds0[field].isel(time=slice(0, nlead))

    d0 = d0.assign_coords(L=("time", np.arange(d0.sizes["time"])+1))
    d0 = d0.swap_dims({"time": "L"})
    d0 = d0.to_dataset(name=field)
    d0 = d0.reset_coords(["time"])
    d0["time"] = d0.time.expand_dims("Y")
    return d0   

In [None]:
var = 'diazChl' # variable of interest
depth = 'surface'
time = 'monthly' #

## February Initialization

In [None]:
%%time
# SMYLE-Feb CO3 data
# process all 20 ensemble members, all start dates from 1970-2018:
field = var
datadir = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'
casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
filetype = '.pop.h.'
filetemplate = datadir+casename+'/ocn/proc/tseries/month_1/'+casename+filetype+field+'.*.nc'
ens = 20 
nlead = 24
firstyear = 1970
lastyear  = 2019
startmonth = 2

chunk = {}
smyle02 = io.get_monthly_data(filetemplate,filetype,ens,nlead,field,firstyear,lastyear,startmonth,preprocessor,chunks=chunk)
smyle02.nbytes/1e9 #GB

In [None]:
%%time
smyle02 = smyle02.persist()

In [None]:
%%time
smyle02 = smyle02.load()
smyle02_time = smyle02.time.load()

smyle02.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.' + depth + '.02.nc')
smyle02_time.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.02.time.nc')

In [None]:
del smyle02, smyle02_time

## May Initialization

In [None]:
%%time
# SMYLE-May CO3 data
# process all 20 ensemble members, all start dates from 1970-2018:
field = var
datadir = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'
casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
filetype = '.pop.h.'
filetemplate = datadir+casename+'/ocn/proc/tseries/month_1/'+casename+filetype+field+'.*.nc'
ens = 20 
nlead = 24
firstyear = 1970
lastyear  = 2019
startmonth = 5

chunk = {}
smyle05 = io.get_monthly_data(filetemplate,filetype,ens,nlead,field,firstyear,lastyear,startmonth,preprocessor,chunks=chunk)
smyle05.nbytes/1e9 #GB

In [None]:
%%time
smyle05 = smyle05.persist()

In [None]:
%%time
smyle05 = smyle05.load()
smyle05_time = smyle05.time.load()

smyle05.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.' + depth + '.05.nc')
smyle05_time.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.05.time.nc')

In [None]:
del smyle05, smyle05_time

## August Initialization

In [None]:
%%time
# SMYLE-May CO3 data
# process all 20 ensemble members, all start dates from 1970-2018:
field = var
datadir = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'
casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
filetype = '.pop.h.'
filetemplate = datadir+casename+'/ocn/proc/tseries/month_1/'+casename+filetype+field+'.*.nc'
ens = 20 
nlead = 24
firstyear = 1970
lastyear  = 2019
startmonth = 8

chunk = {}
smyle08 = io.get_monthly_data(filetemplate,filetype,ens,nlead,field,firstyear,lastyear,startmonth,preprocessor,chunks=chunk)
smyle08.nbytes/1e9 #GB

In [None]:
%%time
smyle08 = smyle08.persist()

In [None]:
%%time
smyle08 = smyle08.load()
smyle08_time = smyle08.time.load()

smyle08.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.' + depth + '.08.nc')
smyle08_time.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.08.time.nc')

In [None]:
del smyle08, smyle08_time

## November Initialization

In [None]:
%%time
# SMYLE-May CO3 data
# process all 20 ensemble members, all start dates from 1970-2018:
field = var
datadir = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'
casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
filetype = '.pop.h.'
filetemplate = datadir+casename+'/ocn/proc/tseries/month_1/'+casename+filetype+field+'.*.nc'
ens = 20 
nlead = 24
firstyear = 1970
lastyear  = 2019
startmonth = 11

chunk = {}
smyle11 = io.get_monthly_data(filetemplate,filetype,ens,nlead,field,firstyear,lastyear,startmonth,preprocessor,chunks=chunk)
smyle11.nbytes/1e9 #GB

In [None]:
%%time
smyle11 = smyle11.persist()

In [None]:
%%time
smyle11 = smyle11.load()
smyle11_time = smyle11.time.load()

smyle11.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.' + depth + '.11.nc')
smyle11_time.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.11.time.nc')

In [None]:
cluster.close()
client.close()