In [14]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
from glob import glob
import xarray as xr
import numpy as np
import esmlab
import pop_tools 
# import ncar_jobqueue
# from dask.distributed import Client

### need dask if variable is full 60 depth layers

In [39]:
from distributed import Client
from ncar_jobqueue import NCARCluster
import dask

# Create our NCAR Cluster - which uses PBSCluster under the hood
cluster = NCARCluster(walltime='01:00:00')

# Spin up 20 workers
cluster.scale(20)

# Assign the cluster to our Client
client = Client(cluster)

In [40]:
client

0,1
Client  Scheduler: tcp://10.12.206.4:37786  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [59]:
case = 'g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.scope_v1'
user = 'kristenk'
path = '/glade/scratch/kristenk/archive/' + case + '/ocn/proc/tseries/month_1'
variables = ['Fe']
#['O2','SiO3'] #['NO3','PO4','POC_FLUX_100m']
#['TAUX','TAUY']
#['TEMP']
#['graze_cocco_zoo2','graze_sp_zoo2','graze_zoo1_zoo2','graze_diaz_zoo2']
#['graze_cocco_zoo3','graze_diaz_zoo3','graze_diat_zoo3','graze_zoo2_zoo3','graze_diat_zoo4','graze_zoo3_zoo4']
#['graze_sp_zoo1','graze_diaz_zoo1','graze_sp_zoo2','graze_diaz_zoo2','graze_cocco_zoo2','graze_zoo1_zoo2']
#['spChl','diazChl','diatChl','coccoChl']
#['photoC_diat_zint','photoC_sp_zint','photoC_diaz_zint','photoC_cocco_zint','photoC_TOT_zint']
#['zoo1C','zoo2C','zoo3C','zoo4C']
#['x_graze_zoo1_zint','x_graze_zoo2_zint','x_graze_zoo3_zint','x_graze_zoo4_zint']
#['diazC','diatC','spC','coccoC']
#['PO4','Fe','NO3','SiO3','IFRAC']
#['sp_light_lim_Cweight_avg_100m','diat_light_lim_Cweight_avg_100m','PD']

In [60]:
# IAFm='004201-005112'
# IAFy='0042-0051'
IAFm='000101-006112'
IAFy='0001-0061'

# IAFm='006201-012212'
# IAFy='0062-0122'

# IAFm='012301-018312'
# IAFy='0123-0183'

# IAFm='018401-024412'
# IAFy='0184-0244'

In [61]:
%%time
ds_list = []
print(f'reading {case}')
ds = xr.Dataset()

for v in variables:
    
    print(v)
    
    files = sorted(glob(f'{path}/{case}.pop.h.{v}.{IAFm}.nc'))     
  
    dsv=xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, concat_dim="time",
                       drop_variables=["transport_components", "transport_regions"], decode_times=True, chunks={'z_t' : 1})

    ds = xr.merge((ds, dsv))
    ds_list.append(ds)

reading g.e22a06.G1850ECOIAF_JRA_PHYS_DEV.TL319_g17.scope_v1
Fe
CPU times: user 20.3 ms, sys: 0 ns, total: 20.3 ms
Wall time: 245 ms


In [62]:
coords = {'x':'TLONG','y':'TLAT'}
keepthese = ['z_t','time_bound','TAREA','time'] + variables
keep_vars = keepthese +list(coords.values())+['dz','KMT']

In [63]:
ds = ds.drop([v for v in ds.variables if v not in keep_vars])

In [64]:
#fix time dimension so it's a mean of the timebounds
ds["time"] = ds.time_bound.compute().mean(dim="d2")

In [65]:
%%time
ds = ds.resample({'time':'A'}).mean(dim='time').compute()

CPU times: user 18.2 s, sys: 3.98 s, total: 22.1 s
Wall time: 6min 16s


In [66]:
ds

In [67]:
for v in variables:
    
    keep_vars = ['z_t','time_bound','TAREA','time','dz','KMT', v]  + list(coords.values())
    
    ds_out = ds.drop([v for v in ds.variables if v not in keep_vars])
    
    outfile='/glade/scratch/kristenk/archive/'+case+'/ocn/proc/tseries/year_1/'+case+'.annual.'+v+'.'+IAFy+'.nc'
    ds_out.to_netcdf(outfile)

In [68]:
cluster.close()