In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
from glob import glob
from collections import OrderedDict
import xarray as xr
import numpy as np
import esmlab
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy
import cartopy.crs as ccrs
import seawater as sw
from seawater.library import T90conv
from scipy import stats
import pop_tools 

from pint import UnitRegistry
import ncar_jobqueue
from dask.distributed import Client

units = UnitRegistry()
units.define('micromoles_per_kilogram = umol / kg') # used in WOA datasets

Cannot write to data cache folder '/glade/p/cesmdata/cseg'. Will not be able to download remote data files. Use environment variable 'CESMDATAROOT' to specify another directory.


In [2]:
cluster = ncar_jobqueue.NCARCluster(project='ncgd0011')
cluster.adapt(minimum_jobs=0, maximum_jobs=30)
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://10.12.205.11:33721  Dashboard: https://jupyterhub.ucar.edu/dav/user/kristenk/proxy/8787/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [27]:
case = 'g.e22b05.G1850ECOIAF_JRA.TL319_g17.001'
user = 'kristenk'
path = '/glade/scratch/kristenk/archive/' + case + '/ocn/proc/tseries/month_1'
#variables = ['O2','SiO3','POC_FLUX_100m','CaCO3_PROD_zint']
#variables = ['photoC_diat_zint','photoC_diaz_zint','photoC_sp_zint','photoC_TOT_zint']
#variables = ['NO3', 'PO4']
variables = ['FG_CO2']

In [28]:
IAFm='000101-006112'
IAFy='0001-0061'

In [29]:
%%time
ds_list = []
print(f'reading {case}')
ds = xr.Dataset()

for v in variables:
    
    print(v)
    
    files = sorted(glob(f'{path}/{case}.pop.h.{v}.{IAFm}.nc'))     
  
    dsv=xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, concat_dim="time",
                       drop_variables=["transport_components", "transport_regions"], decode_times=True, chunks={'z_t' : 1})

    ds = xr.merge((ds, dsv))
    ds_list.append(ds)

reading g.e22b05.G1850ECOIAF_JRA.TL319_g17.001
FG_CO2
CPU times: user 138 ms, sys: 188 ms, total: 327 ms
Wall time: 8.11 s


In [30]:
coords = {'x':'TLONG','y':'TLAT'}
keepthese = ['z_t','time_bound','TAREA','time'] + variables
keep_vars = keepthese +list(coords.values())+['dz','KMT']

In [31]:
ds = ds.drop([v for v in ds.variables if v not in keep_vars])

In [33]:
#fix time dimension so it's a mean of the timebounds
ds["time"] = ds.time_bound.compute().mean(dim="d2")

In [34]:
%%time
ds = ds.resample({'time':'A'}).mean(dim='time').compute()

CPU times: user 2.95 s, sys: 8.54 s, total: 11.5 s
Wall time: 22.5 s


In [35]:
ds

In [36]:
for v in variables:
    
    keep_vars = ['z_t','time_bound','TAREA','time','dz','KMT', v]  + list(coords.values())
    
    ds_out = ds.drop([v for v in ds.variables if v not in keep_vars])
    
    outfile='/glade/scratch/kristenk/archive/'+case+'/ocn/proc/tseries/year_1/'+case+'.annual.'+v+'.'+IAFy+'.nc'
    ds_out.to_netcdf(outfile)