# 1.1: Process SMYLE (Nov 23 initialization)

In [None]:
%load_ext autoreload
%autoreload 2
import xarray as xr 
import numpy as np  
import cftime
import copy
import scipy.stats
from scipy import signal
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
%matplotlib inline

from SMYLEutils import calendar_utils as cal
from SMYLEutils import stat_utils as stat
from SMYLEutils import mapplot_utils as maps
from SMYLEutils import colorbar_utils as cbars
from SMYLEutils import io_utils as io

In [None]:
import dask
from dask.distributed import wait
dask.__version__

## Create Dask

In [None]:
cluster.close()
client.close()

In [None]:
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=3,
        memory='300GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=10GB',
        project='p93300070',
        walltime='05:00:00',
        interface='mgt',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'
    })
    client = Client(cluster)
    return cluster, client

In [None]:
cluster, client = get_ClusterClient()
cluster.scale(30)

## Read in POP monthly data
- The data loading steps can take ~30 minutes
- Chosen field is returned as a dask array with leading dimensions of Y (initialization year), M (ensemble member), and L (lead month).

In [None]:
def preprocessor(ds0,nlead,field):
    """ This preprocessor is applied on an individual timeseries file basis. Edit this appropriately
    for a your analysis to speed up processing. 
    """
    ds0 = cal.time_set_mid(ds0,'time')
    
    # # select the depth of your choosing
    d0 = ds0[field].sel(z_t=0,method='nearest').isel(time=slice(0, nlead))
    # d0 = ds0[field].isel(time=slice(0, nlead))

    d0 = d0.assign_coords(L=("time", np.arange(d0.sizes["time"])+1))
    d0 = d0.swap_dims({"time": "L"})
    d0 = d0.to_dataset(name=field)
    d0 = d0.reset_coords(["time"])
    d0["time"] = d0.time.expand_dims("Y")
    return d0   

In [None]:
var = 'co3_sat_arag' # 
depth = 'surface'
time = 'monthly' #

# November Initialization 2020-2023

In [None]:
%%time
# SMYLE-Feb CO3 data
# process all 20 ensemble members, all start dates from 1970-2018:
field = var
datadir = '/glade/campaign/cesm/development/espwg/SMYLE/archive/'
casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
# casename = 'b.e21.BSMYLE.f09_g17.????-MM.EEE'
# BSMYLE-XT-beta
# b.e21.BSMYLE.f09_g17.2023-08.001
filetype = '.pop.h.'
filetemplate = datadir+casename+'/ocn/proc/tseries/month_1/'+casename+filetype+field+'.*.nc'
ens = 20 
nlead = 24
firstyear = 1970
lastyear  = 2020
startmonth = 11

chunk = {}
smyle11 = io.get_monthly_data(filetemplate,filetype,ens,nlead,field,firstyear,lastyear,startmonth,preprocessor,chunks=chunk)
smyle11.nbytes/1e9 #GB

In [None]:
%%time
smyle11 = smyle11.persist()

In [None]:
%%time
print('loading')
smyle11_use = smyle11.load()
smyle11_time = smyle11.time.load()

print('saving')
smyle11_use.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.' + depth + '.live11.update.new_run.nc')
smyle11_time.to_netcdf('/glade/derecho/scratch/smogen/SMYLE-Extreme/'+ var +'.' + time + '.live11.time.update.new_run.nc')

In [None]:
del smyle11_use