In [1]:
%load_ext autoreload
%autoreload 2
import xarray as xr 
import numpy as np  
import cftime
import datetime
import copy
import scipy.stats
from scipy import signal
from functools import partial
import glob
import dask
import gsw
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.units as munits
from matplotlib.dates import ConciseDateConverter
munits.registry[cftime.DatetimeNoLeap] = ConciseDateConverter()
munits.registry[cftime.datetime] = ConciseDateConverter()
#from geocat.viz import util as gvutil
#import util
import cartopy
import xesmf as xe
#import xskillscore as xs
#cartopy.config['pre_existing_data_dir']='/ihesp/shared/cartopy_features'
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.util import add_cyclic_point
import matplotlib.ticker as mticker
%matplotlib inline

import os

#from HRDPutils import calendar_utils as cal
#from HRDPutils import stat_utils as stat
#from HRDPutils import mapplot_utils as maps
#from HRDPutils import colorbar_utils as cbars
#from HRDPutils import io_utils as io
#from HRDPutils import regrid_utils as regrid

In [2]:
import dask
from dask.distributed import wait
dask.__version__

'2021.09.0'

## Create Dask Cluster

In [11]:
# Close out Dask Cluster and release workers:
# NOTE:  only run this cell to terminate Dask Cluster!
#cluster.close()
#client.close()

In [4]:
# Use this if computing annual means:
def get_ClusterClient():
    import dask
    from dask_jobqueue import PBSCluster
    from dask.distributed import Client
    cluster = PBSCluster(
        cores=1,
        memory='50GB',
        processes=1,
        queue='casper',
        resource_spec='select=1:ncpus=1:mem=50GB',
        project='NCGD0011',
        walltime='02:00:00',
        interface='ib0',)

    dask.config.set({
        'distributed.dashboard.link':
        'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status',
        'array.slicing.split_large_chunks': True
    })
    client = Client(cluster)
    return cluster, client

cluster, client = get_ClusterClient()
cluster.scale(72) 

In [5]:
cluster

0,1
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/fredc/proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.12.206.60:41207,Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/fredc/proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Main Processing

In [6]:
%%time
# get POP grid
infile='/glade/work/fredc/metric/grid/POP_tx0.1v3_grid.nc'
dsg = xr.open_dataset(infile)
tlon = dsg.TLONG.persist()
tlat = dsg.TLAT.persist()
tarea = dsg.TAREA.persist()
dzt = dsg.DZT.persist()
dzu = dsg.DZU.persist()
dxt = dsg.DXT.persist()
dyt = dsg.DYT.persist()
dxu = dsg.DXU.persist()
dyu = dsg.DYU.persist()
htn = dsg.HTN.persist()
hte = dsg.HTE.persist()
ht = dsg.HT.persist()
hu = dsg.HU.persist()

CPU times: user 22.9 ms, sys: 25.9 ms, total: 48.8 ms
Wall time: 535 ms


In [7]:
#tfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP26/HR/b.e13.BRCP26C5.ne120_t12.cesm-ihesp-hires1.0.42.003/ocn/proc/tseries/month_1/b.e13.BRCP26C5.ne120_t12.cesm-ihesp-hires1.0.42.003.pop.h.TEMP.200601-210012.nc'
#tfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP45/HR/b.e13.BRCP45C5.ne120_t12.cesm-ihesp-hires1.0.42.003/ocn/proc/tseries/month_1/b.e13.BRCP45C5.ne120_t12.cesm-ihesp-hires1.0.42.003.pop.h.TEMP.200601-210012.nc'
#tfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP85/HR/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.30.002/ocn/proc/tseries/month_1/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.30.002.pop.h.TEMP.200601-210012.nc'
tfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP85/HR/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.31.003/ocn/proc/tseries/month_1/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.31.003.pop.h.TEMP.200601-210012.nc'
dst = xr.open_dataset(tfile, chunks={"nlat": 200, "nlon": 100, "z_t":62})
dst['time'] = dst.time - datetime.timedelta(15) #middle of the month
temp_all = dst.TEMP

In [8]:
#sfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP26/HR/b.e13.BRCP26C5.ne120_t12.cesm-ihesp-hires1.0.42.003/ocn/proc/tseries/month_1/b.e13.BRCP26C5.ne120_t12.cesm-ihesp-hires1.0.42.003.pop.h.SALT.200601-210012.nc'
#sfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP45/HR/b.e13.BRCP45C5.ne120_t12.cesm-ihesp-hires1.0.42.003/ocn/proc/tseries/month_1/b.e13.BRCP45C5.ne120_t12.cesm-ihesp-hires1.0.42.003.pop.h.SALT.200601-210012.nc'
#sfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP85/HR/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.30.002/ocn/proc/tseries/month_1/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.30.002.pop.h.SALT.200601-210012.nc'
sfile = '/glade/campaign/collections/cmip/CMIP6/iHESP/BRCP85/HR/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.31.003/ocn/proc/tseries/month_1/b.e13.BRCP85C5.ne120_t12.cesm-ihesp-hires1.0.31.003.pop.h.SALT.200601-210012.nc'
dss = xr.open_dataset(sfile, chunks={"nlat": 200, "nlon": 100, "z_t":62})
dss['time'] = dst['time']
salt_all = dss.SALT

In [9]:
years = np.unique(temp_all.time.dt.year)

In [10]:
for y in years:
    
    temp = temp_all.sel(time=slice(cftime.DatetimeNoLeap(y, 1, 1), cftime.DatetimeNoLeap(y, 12, 31)))
    temp = temp.where(temp!=-1)
    salt = salt_all.sel(time=slice(cftime.DatetimeNoLeap(y, 1, 1), cftime.DatetimeNoLeap(y, 12, 31)))
    salt = salt.where(salt!=-1000)
    
    month_length = temp.time.dt.days_in_month
    wgts = month_length.groupby("time.year") / month_length.groupby("time.year").sum()
 
    ytemp = (temp * wgts).resample(time="AS").sum(dim="time") / (xr.ones_like(temp) * wgts).resample(time="AS").sum(dim="time")
    ytemp = ytemp.where(ytemp!=0)

    ysalt = (salt * wgts).resample(time="AS").sum(dim="time") / (xr.ones_like(salt) * wgts).resample(time="AS").sum(dim="time")
    ysalt = ysalt.where(ysalt!=0)
    
    p = gsw.p_from_z(-dst.z_t/100., tlat)
    SA = gsw.SA_from_SP(ysalt, p, tlon, tlat)
    CT = gsw.CT_from_pt(SA, ytemp)
    rho = gsw.rho(SA,CT,p)
    rho['z_t'] = dzt.z_t
    
    #Write to netcdf
    dso = rho.to_dataset(name='RHO')
    
    outdir = os.path.dirname(tfile).replace('month_1','year_1')
    fout = os.path.split(tfile)[-1].split('.')[:-3]
    fout.append('RHO')
    fout.append('{:04d}'.format(dso.time.dt.year[0].values))
    fout.append('nc')
    fout = '.'.join(fout)
    fout = os.path.join(outdir,fout)
    
    dso.to_netcdf(fout, unlimited_dims='time')