In [1]:
import xarray as xr
import glob
import numpy as np
import pandas as pd
import sys
from utils.pyfunctions import *

In [10]:
# Setup your PBSCluster
import dask
from dask_jobqueue import PBSCluster
from dask.distributed import Client
ncores=1
nmem='10GB'
cluster = PBSCluster(
    cores=ncores, # The number of cores you want
    memory=nmem, # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus='+str(ncores)+':mem='+nmem, # Specify resources
    project='P93300641', # Input your project ID here
    walltime='03:00:00', # Amount of wall time
    #interface='ib0', # Interface to use
)

# Scale up
cluster.scale(10)

# Setup your client
client = Client(cluster)

Perhaps you already have a cluster running?
Hosting the HTTP server on port 44937 instead
  f"Port {expected} is already in use.\n"


### Post processing 

In [6]:
yy = (1986,2005)
dir='/glade/campaign/cgd/tss/projects/PPE/ctsm530_OAAT/hist/'
key = './clm6_minioaat.csv'
exp='oaat'
tape='h0'
utils_path = '/glade/u/home/linnia/CLM6-PPE/clm6-oaat/utils/'

dvs=['GPP','AR','HR','NPP','NBP','NEP','ER',
     'EFLX_LH_TOT','FCTR','FCEV','FGEV','BTRANMN','FGR','FSH',
     'SOILWATER_10CM','TWS','QRUNOFF','SNOWDP','H2OSNO','FSNO',
     'TLAI','FSR','ALTMAX','TV','TG',
     'FAREA_BURNED','COL_FIRE_CLOSS',
     'TOTVEGC','TOTECOSYSC','TOTSOMC_1m',
     'TOTVEGN','TOTECOSYSN']

In [12]:
ds= get_exp(exp,dir,key,dvs,'h0',yy,utils_path)

la=xr.open_dataset(utils_path+'sparsegrid_landarea.nc').landarea
b=xr.open_dataset(utils_path+'whit/whitkey.nc').biome
out=xr.Dataset()
    
for v in dvs:

        x=amean(ds[v])
        amp=(ds[v].groupby('time.year').max()-ds[v].groupby('time.year').min()).mean(dim='year').compute()

        out[v+'_gridded_mean']=x.mean(dim='year')

        out[v+'_global_amp'] =gmean(amp,la)
        out[v+'_global_std'] =gmean(x.std(dim='year'),la)
        out[v+'_global_mean']=gmean(x.mean(dim='year'),la)

        out[v+'_biome_amp']  =bmean(amp,la,b)
        out[v+'_biome_std']  =bmean(x.std(dim='year'),la,b)
        out[v+'_biome_mean'] =bmean(x.mean(dim='year'),la,b)

        for dv in out.data_vars:
            if v in dv:
                out[dv].attrs=ds[v].attrs

In [None]:
key = 'clm6_minioaat.csv'
df_key = pd.read_csv(key)

out = out.assign_coords(
    id=('ens', df_key['key'].values),
    param=('ens', df_key['param'].values),
    minmax=('ens', df_key['minmax'].values)
)

out = out.set_index(ens=["param", "minmax", "id"], append=True)

In [13]:
out.to_netcdf('/glade/campaign/cgd/tss/projects/PPE/ctsm530_OAAT/postp/ctsm530_oaat_postp_1986-2005.nc')

### Default

In [15]:
yr0 = 1985; yr1 = 2005
tape = 'h0'
dir = '/glade/derecho/scratch/linnia/ctsm5.3.0_transient_test0005/run/'
fs   = np.array(sorted(glob.glob(dir+'*'+tape+'*')))
yrs  = np.array([int(f.split(tape)[1][1:5]) for f in fs])
#bump back yr0, if needed
uyrs=np.unique(yrs)
yr0=uyrs[(uyrs/yr0)<=1][-1]

#find index to subset files
ix    = (yrs>=yr0)&(yrs<=yr1)
files    = fs[ix] 
dims  = ['ens','time']

In [19]:
dvs=['HR','NPP','NBP','NEP','ER',
     'EFLX_LH_TOT','FCTR','FCEV','FGEV','BTRANMN','FGR','FSH',
     'SOILWATER_10CM','TWS','QRUNOFF','SNOWDP','H2OSNO','FSNO',
     'TLAI','FSR','ALTMAX','TV','TG',
     'FAREA_BURNED','COL_FIRE_CLOSS',
     'TOTVEGC','TOTECOSYSC','TOTSOMC_1m',
     'TOTVEGN','TOTECOSYSN']

In [20]:
def preprocess(ds):
    return ds[dvs]
ds_default = xr.open_mfdataset(files,combine='nested',concat_dim=dims,
                       parallel=True,preprocess=preprocess)

KeyError: 'HR'

In [None]:
ds_default

### PFT

In [7]:
### PFT data
ds1=get_exp(exp,dir,key,dvs,'h1',yy,utils_path)
lapft=xr.open_dataset(utils_path+'sparsegrid_landarea.nc').landarea_pft
pft=xr.DataArray(ds1.pfts1d_itype_veg.values,dims='pft',name='pft')

for v in ds1.data_vars:
    if v!='HR':
        x=amean(ds1[v])
        amp=(ds1[v].groupby('time.year').max()-ds1[v].groupby('time.year').min()).mean(dim='year').compute()

        out[v+'_pft_amp']  =pftmean(amp,lapft,pft)
        out[v+'_pft_std']  =pftmean(x.std(dim='year'),lapft,pft)
        out[v+'_pft_mean'] =pftmean(x.mean(dim='year'),lapft,pft)

        for dv in out.data_vars:
            if v in dv:
                out[dv].attrs=ds[v].attrs

In [None]:
out.to_netcdf('/glade/campaign/cgd/tss/projects/PPE/ctsm530_OAAT/postp/ctsm530_oaat_pft_postp_1986-2005.nc')