In [1]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import os
from glob import glob
from collections import OrderedDict
import xarray as xr
import numpy as np
import esmlab
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import cartopy
import cartopy.crs as ccrs
import seawater as sw
from seawater.library import T90conv
from scipy import stats
import pop_tools 

from pint import UnitRegistry
import ncar_jobqueue
from dask.distributed import Client

units = UnitRegistry()
units.define('micromoles_per_kilogram = umol / kg') # used in WOA datasets

Cannot write to data cache folder '/glade/p/cesmdata/cseg'. Will not be able to download remote data files. Use environment variable 'CESMDATAROOT' to specify another directory.


In [2]:
cluster = ncar_jobqueue.NCARCluster(project='ncgd0011')
cluster.adapt(minimum_jobs=0, maximum_jobs=30)
client = Client(cluster)
client

0,1
Client  Scheduler: tcp://10.12.205.20:37676  Dashboard: https://jupyterhub.ucar.edu/dav/user/kristenk/proxy/46256/status,Cluster  Workers: 0  Cores: 0  Memory: 0 B


In [33]:
case = 'g.e22b05.G1850ECOIAF_JRA.TL319_g17.005'
user = 'kristenk'
path = '/glade/scratch/kristenk/archive/' + case + '/ocn/proc/tseries/month_1'
#variables = ['O2','SiO3','POC_FLUX_100m']
#variables = ['photoC_diat_zint','photoC_diaz_zint','photoC_sp_zint','photoC_TOT_zint']
#variables = ['photoC_cocco_zint']
#variables = ['NO3', 'PO4']
variables = ['FG_CO2','CaCO3_PROD_zint']

In [34]:
# IAFm='000101-006112'
# IAFy='0001-0061'

IAFm='006201-012212'
IAFy='0062-0122'

# IAFm='012301-018312'
# IAFy='0123-0183'

# IAFm='018401-024412'
# IAFy='0184-0244'


In [35]:
%%time
ds_list = []
print(f'reading {case}')
ds = xr.Dataset()

for v in variables:
    
    print(v)
    
    files = sorted(glob(f'{path}/{case}.pop.h.{v}.{IAFm}.nc'))     
  
    dsv=xr.open_mfdataset(files, data_vars="minimal", coords='minimal', compat="override", parallel=True, concat_dim="time",
                       drop_variables=["transport_components", "transport_regions"], decode_times=True, chunks={'z_t' : 1})

    ds = xr.merge((ds, dsv))
    ds_list.append(ds)

reading g.e22b05.G1850ECOIAF_JRA.TL319_g17.005
FG_CO2
CaCO3_PROD_zint
CPU times: user 1.99 s, sys: 386 ms, total: 2.37 s
Wall time: 30.8 s


In [36]:
ds.time_bound

Unnamed: 0,Array,Chunk
Bytes,11.71 kB,11.71 kB
Shape,"(732, 2)","(732, 2)"
Count,5 Tasks,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 11.71 kB 11.71 kB Shape (732, 2) (732, 2) Count 5 Tasks 1 Chunks Type object numpy.ndarray",2  732,

Unnamed: 0,Array,Chunk
Bytes,11.71 kB,11.71 kB
Shape,"(732, 2)","(732, 2)"
Count,5 Tasks,1 Chunks
Type,object,numpy.ndarray


In [37]:
coords = {'x':'TLONG','y':'TLAT'}
keepthese = ['z_t','time_bound','TAREA','time'] + variables
keep_vars = keepthese +list(coords.values())+['dz','KMT']

In [38]:
ds = ds.drop([v for v in ds.variables if v not in keep_vars])

In [39]:
#fix time dimension so it's a mean of the timebounds
ds["time"] = ds.time_bound.compute().mean(dim="d2")

In [40]:
%%time
ds = ds.resample({'time':'A'}).mean(dim='time').compute()

CPU times: user 3.92 s, sys: 488 ms, total: 4.41 s
Wall time: 13 s


In [41]:
ds

In [42]:
for v in variables:
    
    keep_vars = ['z_t','time_bound','TAREA','time','dz','KMT', v]  + list(coords.values())
    
    ds_out = ds.drop([v for v in ds.variables if v not in keep_vars])
    
    outfile='/glade/scratch/kristenk/archive/'+case+'/ocn/proc/tseries/year_1/'+case+'.annual.'+v+'.'+IAFy+'.nc'
    ds_out.to_netcdf(outfile)