In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
from glob import glob
import xarray as xr
import numpy as np
import esmlab
import pop_tools 
# import ncar_jobqueue
# from dask.distributed import Client

### need dask if variable is full 60 depth layers

In [2]:
from distributed import Client
from ncar_jobqueue import NCARCluster
import dask

# Create our NCAR Cluster - which uses PBSCluster under the hood
cluster = NCARCluster(walltime='01:00:00')

# Spin up 20 workers
cluster.scale(20)

# Assign the cluster to our Client
client = Client(cluster)

In [3]:
client

0,1
Client  Scheduler: tcp://10.12.206.60:35305  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [4]:
case = 'g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.4p2z.001'
user = 'kristenk'
path = '/glade/scratch/kristenk/archive/' + case + '/ocn/proc/tseries/month_1'
variables = ['TEMP','HMXL'] #'spC','coccoC','diatC','microzooC','mesozooC','IFRAC','photoC_TOT_zint']
#['NO3','ALK','POC_FLUX_100m']
#['TAUX','TAUY']
#['TEMP']
#['graze_cocco_zoo2','graze_sp_zoo2','graze_zoo1_zoo2','graze_diaz_zoo2']
#['graze_cocco_zoo3','graze_diaz_zoo3','graze_diat_zoo3','graze_zoo2_zoo3','graze_diat_zoo4','graze_zoo3_zoo4']
#['graze_sp_zoo1','graze_diaz_zoo1','graze_sp_zoo2','graze_diaz_zoo2','graze_cocco_zoo2','graze_zoo1_zoo2']
#['spChl','diazChl','diatChl','coccoChl']
#['photoC_diat_zint','photoC_sp_zint','photoC_diaz_zint','photoC_cocco_zint','photoC_TOT_zint']
#['zoo1C','zoo2C','zoo3C','zoo4C']
#['x_graze_zoo1_zint','x_graze_zoo2_zint','x_graze_zoo3_zint','x_graze_zoo4_zint']
#['diazC','diatC','spC','coccoC']
#['PO4','Fe','NO3','SiO3','IFRAC']
#['sp_light_lim_Cweight_avg_100m','diat_light_lim_Cweight_avg_100m','PD']

In [5]:
path

'/glade/scratch/kristenk/archive/g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.4p2z.001/ocn/proc/tseries/month_1'

In [6]:
# IAFm='004201-005112'
# IAFy='0042-0051'
# IAFm='000101-006112'
# IAFy='0001-0061'

IAFm='006201-012212'
IAFy='0062-0122'

# IAFm='012301-018312'
# IAFy='0123-0183'

# IAFm='018401-024412'
# IAFy='0184-0244'

In [7]:
%%time
ds_list = []
print(f'reading {case}')
ds = xr.Dataset()

for v in variables:
    
    print(v)
    
    files = sorted(glob(f'{path}/{case}.pop.h.{v}.{IAFm}.nc'))     
  
    dsv=xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, concat_dim="time",
                       drop_variables=["transport_components", "transport_regions"], decode_times=True, chunks={'z_t' : 1})

    ds = xr.merge((ds, dsv))
    ds_list.append(ds)

reading g.e22.GOMIPECOIAF_JRA-1p4-2018.TL319_g17.4p2z.001
TEMP
HMXL
CPU times: user 914 ms, sys: 83 ms, total: 997 ms
Wall time: 8.43 s


In [8]:
ds.time_bound

Unnamed: 0,Array,Chunk
Bytes,11.44 kiB,11.44 kiB
Shape,"(732, 2)","(732, 2)"
Count,5 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 11.44 kiB 11.44 kiB Shape (732, 2) (732, 2) Count 5 Tasks 1 Chunks Type object numpy.ndarray",2  732,

Unnamed: 0,Array,Chunk
Bytes,11.44 kiB,11.44 kiB
Shape,"(732, 2)","(732, 2)"
Count,5 Tasks,1 Chunks
Type,object,numpy.ndarray


In [9]:
coords = {'x':'TLONG','y':'TLAT'}
keepthese = ['z_t','time_bound','TAREA','time'] + variables
keep_vars = keepthese +list(coords.values())+['dz','KMT']

In [10]:
ds = ds.drop([v for v in ds.variables if v not in keep_vars])

In [11]:
#fix time dimension so it's a mean of the timebounds
ds["time"] = ds.time_bound.compute().mean(dim="d2")

In [12]:
%%time
ds = ds.resample({'time':'A'}).mean(dim='time').compute()

CPU times: user 15.7 s, sys: 3.63 s, total: 19.3 s
Wall time: 5min 14s


In [13]:
ds

In [14]:
for v in variables:
    
    keep_vars = ['z_t','time_bound','TAREA','time','dz','KMT', v]  + list(coords.values())
    
    ds_out = ds.drop([v for v in ds.variables if v not in keep_vars])
    
    outfile='/glade/scratch/kristenk/archive/'+case+'/ocn/proc/tseries/year_1/'+case+'.annual.'+v+'.'+IAFy+'.nc'
    ds_out.to_netcdf(outfile)

In [15]:
cluster.close()