# Process TTE-related variables from the CESM-LE

In [1]:
%matplotlib inline
import os
import shutil

from glob import glob

import cftime

import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib.colors as colors

import cartopy.crs as ccrs
from cartopy.util import add_cyclic_point

import intake
import pop_tools
import esmlab
import util

import warnings
warnings.filterwarnings('ignore')

## Spin up dask cluster

In [2]:
import dask

# Use dask jobqueue
from dask_jobqueue import PBSCluster

# Import a client
from dask.distributed import Client

# Setup your PBSCluster
cluster = PBSCluster(
    cores=2, # The number of cores you want
    memory='256 GB', # Amount of memory
    processes=1, # How many processes
    queue='casper', # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
    local_directory='$TMPDIR', # Use your local directory
    resource_spec='select=1:ncpus=2:mem=256GB', # Specify resources
    project='NCGD0011', # Input your project ID here
    walltime='02:00:00', # Amount of wall time
    interface='ib0', # Interface to use
)
# Scale up
cluster.scale(32)

# Change your url to the dask dashboard so you can see it
dask.config.set({'distributed.dashboard.link':'https://jupyterhub.hpc.ucar.edu/stable/user/{USER}/proxy/{port}/status'})

# Setup your client
client = Client(cluster)

In [3]:
client

0,1
Client  Scheduler: tcp://10.12.206.63:36197  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/8787/status,Cluster  Workers: 18  Cores: 36  Memory: 4.19 TiB


In [4]:
grid = pop_tools.get_grid('POP_gx1v6')
grid

## Read the CESM-LE data 

We will use [`intake-esm`](https://intake-esm.readthedocs.io/en/latest/), which is a data catalog tool.
It enables querying a database for the files we want, then loading those directly as an `xarray.Dataset`.

First step is to set the "collection" for the CESM-LE, which depends on a json file conforming to the [ESM Catalog Specification](https://github.com/NCAR/esm-collection-spec).

In [5]:
catalog_file = '/glade/u/home/kristenk/TTE_CESM-LE/krill-cesm-le/notebooks/data/glade-cesm1-le.json'
#catalog_file = '/glade/collections/cmip/catalog/intake-esm-datastore/catalogs/glade-cesm1-le.json'
variables = ['TEMP'] ##['SHF_QSW'] #QSW_HTP','SHF_QSW','QSW_HBL'] #'diatC', 'spC', 'zooC'] #, 'TEMP','IFRAC', 
             #'graze_diat', 'graze_sp', 'graze_diaz']

experiments = ['20C', 'RCP85']
stream = 'pop.h'
    
col = intake.open_esm_datastore(catalog_file, sep=',')
col

Unnamed: 0,unique
experiment,7
case,108
component,6
stream,15
variable,1052
date_range,116
member_id,40
path,191066
ctrl_branch_year,6
ctrl_experiment,4


Now we will search the collection for the ensemble members (unique `member_id`'s) that have a chlorophyll field. This is necessary because the ocean biogeochemistry was corrupted in some members and the data deleted.

In this cell, `member_id` is a list of the ensemble members we want to operate on.

In [6]:
col_sub = col.search(experiment=['20C'],                      
                     stream='pop.h', 
                     variable=['diatChl'])

member_id = list(col_sub.df.member_id.unique())
print(member_id)

[1, 2, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 101, 102, 103, 104, 105]


## Now let's search for the data we want

Specify a list of variables and perform a search. Under the hood, the `search` functionality uses [`pandas`](https://pandas.pydata.org/) data frames. We can view that frame here using the `.df` syntax.

In [7]:
col_sub = col.search(
    experiment=experiments, 
    stream=stream, 
    variable=variables,
    member_id=member_id,
    )

print(col_sub)

col_sub.df.head()

<glade-cesm1-le catalog with 2 dataset(s) from 95 asset(s)>


Unnamed: 0,experiment,case,component,stream,variable,date_range,member_id,path,ctrl_branch_year,ctrl_experiment,ctrl_member_id
0,20C,b.e11.B20TRC5CNBDRD.f09_g16.001,ocn,pop.h,TEMP,185001-200512,1,/glade/campaign/cesm/collections/cesmLE/CESM-C...,402,CTRL,1
1,20C,b.e11.B20TRC5CNBDRD.f09_g16.002,ocn,pop.h,TEMP,192001-200512,2,/glade/campaign/cesm/collections/cesmLE/CESM-C...,1920,20C,1
2,20C,b.e11.B20TRC5CNBDRD.f09_g16.009,ocn,pop.h,TEMP,192001-200512,9,/glade/campaign/cesm/collections/cesmLE/CESM-C...,1920,20C,1
3,20C,b.e11.B20TRC5CNBDRD.f09_g16.010,ocn,pop.h,TEMP,192001-200512,10,/glade/campaign/cesm/collections/cesmLE/CESM-C...,1920,20C,1
4,20C,b.e11.B20TRC5CNBDRD.f09_g16.011,ocn,pop.h,TEMP,192001-200512,11,/glade/campaign/cesm/collections/cesmLE/CESM-C...,1920,20C,1


Now we can use the [`to_dataset_dict`](https://intake-esm.readthedocs.io/en/latest/api.html#intake_esm.core.esm_datastore.to_dataset_dict) method to return a dictionary of `xarray.Dataset`'s. `intake_esm` makes groups of these according to rules in the collection spec file.

We can use the `preprocess` parameter to pass in a function that makes some corrections to the dataset. So first we define a function that does the following:
- fix the time coordinate to be the middle of the interval
- drop the singleton dimension on SST (which screws up coordinate alignment)
- subset to the time-interval 1920-2100

In [8]:
client

0,1
Client  Scheduler: tcp://10.12.206.63:36197  Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/kristenk/proxy/8787/status,Cluster  Workers: 18  Cores: 36  Memory: 4.19 TiB


In [9]:
%%time
dsets = col_sub.to_dataset_dict(cdf_kwargs={'chunks': {'time':5}, 'decode_times': False})
dsets


--> The keys in the returned dictionary of datasets are constructed as follows:
	'component,experiment,stream'


CPU times: user 3.11 s, sys: 55.1 ms, total: 3.16 s
Wall time: 7.15 s


{'ocn,RCP85,pop.h': <xarray.Dataset>
 Dimensions:               (d2: 2, lat_aux_grid: 395, member_id: 34, moc_comp: 3, moc_z: 61, nlat: 384, nlon: 320, time: 1140, transport_comp: 5, transport_reg: 2, z_t: 60, z_t_150m: 15, z_w: 60, z_w_bot: 60, z_w_top: 60)
 Coordinates: (12/13)
     TLAT                  (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
     TLONG                 (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
     ULAT                  (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
     ULONG                 (nlat, nlon) float64 dask.array<chunksize=(384, 320), meta=np.ndarray>
   * lat_aux_grid          (lat_aux_grid) float32 -79.49 -78.95 ... 89.47 90.0
   * moc_z                 (moc_z) float32 0.0 1e+03 2e+03 ... 5.25e+05 5.5e+05
     ...                    ...
   * z_t                   (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05
   * z_t_150m              (z_t_150m) float32 500.0 1.5e+

Now, let's compute the total surface chlorophyll, put time at the mid-point of the interval, and subset to 1920-2100.

In [10]:
dsets['ocn,RCP85,pop.h']

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.04 TiB,140.62 MiB
Shape,"(34, 1140, 60, 384, 320)","(1, 5, 60, 384, 320)"
Count,29473 Tasks,7752 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.04 TiB 140.62 MiB Shape (34, 1140, 60, 384, 320) (1, 5, 60, 384, 320) Count 29473 Tasks 7752 Chunks Type float32 numpy.ndarray",1140  34  320  384  60,

Unnamed: 0,Array,Chunk
Bytes,1.04 TiB,140.62 MiB
Shape,"(34, 1140, 60, 384, 320)","(1, 5, 60, 384, 320)"
Count,29473 Tasks,7752 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.94 MiB 0.94 MiB Shape (384, 320) (384, 320) Count 2 Tasks 1 Chunks Type float64 numpy.ndarray",320  384,

Unnamed: 0,Array,Chunk
Bytes,0.94 MiB,0.94 MiB
Shape,"(384, 320)","(384, 320)"
Count,2 Tasks,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (60,) (60,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",60  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 240 B 240 B Shape (60,) (60,) Count 2 Tasks 1 Chunks Type float32 numpy.ndarray",60  1,

Unnamed: 0,Array,Chunk
Bytes,240 B,240 B
Shape,"(60,)","(60,)"
Count,2 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,768 B,768 B
Shape,"(3,)","(3,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 768 B 768 B Shape (3,) (3,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",3  1,

Unnamed: 0,Array,Chunk
Bytes,768 B,768 B
Shape,"(3,)","(3,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.81 kiB,80 B
Shape,"(1140, 2)","(5, 2)"
Count,458 Tasks,228 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 17.81 kiB 80 B Shape (1140, 2) (5, 2) Count 458 Tasks 228 Chunks Type float64 numpy.ndarray",2  1140,

Unnamed: 0,Array,Chunk
Bytes,17.81 kiB,80 B
Shape,"(1140, 2)","(5, 2)"
Count,458 Tasks,228 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.25 kiB,1.25 kiB
Shape,"(5,)","(5,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 1.25 kiB 1.25 kiB Shape (5,) (5,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",5  1,

Unnamed: 0,Array,Chunk
Bytes,1.25 kiB,1.25 kiB
Shape,"(5,)","(5,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(2,)","(2,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray
"Array Chunk Bytes 512 B 512 B Shape (2,) (2,) Count 2 Tasks 1 Chunks Type |S256 numpy.ndarray",2  1,

Unnamed: 0,Array,Chunk
Bytes,512 B,512 B
Shape,"(2,)","(2,)"
Count,2 Tasks,1 Chunks
Type,|S256,numpy.ndarray


In [11]:
def compute_chl_surf(ds):
    """compute surface chl"""

    ds['Chl_surf'] = (ds.diatChl + ds.spChl + ds.diazChl).isel(z_t_150m=0)
    ds.Chl_surf.attrs = ds.diatChl.attrs
    ds.Chl_surf.attrs['long_name'] = 'Surface chlorophyll'

    return ds.drop(['diatChl', 'spChl', 'diazChl'])

def compute_NPP_zint(ds):
    """compute NPP"""
    
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'})
    ds['NPP'] = ((ds.photoC_diat + ds.photoC_sp + ds.photoC_diaz) * dz150m).sum(dim='z_t_150m')
    ds.NPP.attrs = ds.photoC_diat.attrs
    ds.NPP.attrs['long_name'] = 'NPP'
    ds.NPP.attrs['units'] = ds.NPP.attrs['units'] + ' cm'
    
    ds['NPP_diat'] = ((ds.photoC_diat) * dz150m).sum(dim='z_t_150m')
    ds.NPP_diat.attrs = ds.photoC_diat.attrs
    ds.NPP_diat.attrs['long_name'] = 'NPP by diatoms'
    ds.NPP_diat.attrs['units'] = ds.NPP_diat.attrs['units'] + ' cm'
    
    ds['NPP_sp'] = ((ds.photoC_sp) * dz150m).sum(dim='z_t_150m')
    ds.NPP_sp.attrs = ds.photoC_sp.attrs
    ds.NPP_sp.attrs['long_name'] = 'NPP by small phytoplankton'
    ds.NPP_sp.attrs['units'] = ds.NPP_sp.attrs['units'] + ' cm'
    
    return ds.drop(['photoC_diat', 'photoC_sp', 'photoC_diaz'])



def compute_zoo_prod_zint(ds):
    """compute zooplankton production"""
    
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'})
    ds['zoo_prod'] = ((ds.graze_diat + ds.graze_sp + ds.graze_diaz) * dz150m).sum(dim='z_t_150m') * 0.30 #GGE=0.3
    ds.zoo_prod.attrs = ds.graze_diat.attrs
    ds.zoo_prod.attrs['long_name'] = 'Zooplankton production'
    ds.zoo_prod.attrs['units'] = ds.zoo_prod.attrs['units'] + ' cm'
    
    return ds.drop(['graze_diat', 'graze_sp', 'graze_diaz'])

def compute_zoo_loss_zint(ds):
    """compute zooplankton loss integral"""
    
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'})
    ds['zoo_loss_zint'] = ((ds.zoo_loss) * dz150m).sum(dim='z_t_150m')
    ds.zoo_loss_zint.attrs = ds.zoo_loss.attrs
    ds.zoo_loss_zint.attrs['long_name'] = 'Zooplankton loss depth integral'
    ds.zoo_loss_zint.attrs['units'] = ds.zoo_loss_zint.attrs['units'] + ' cm'
    
    return ds.drop(['zoo_loss'])

def compute_TEMP_50m(ds):
    """compute top 50m mean temperature"""
    
    ds['TEMP_50m_mean'] = ds.TEMP.isel(z_t=slice(0,5)).mean(dim='z_t')
    ds.TEMP_50m_mean.attrs = ds.TEMP.attrs
    ds.TEMP_50m_mean.attrs['long_name'] = 'Mean temperature over top 50m'
    
    return ds.drop(['TEMP'])


def compute_TEMP_100m(ds):
    """compute top 100m mean temperature"""
    
    ds['TEMP_100m_mean'] = ds.TEMP.isel(z_t=slice(0,10)).mean(dim='z_t')
    ds.TEMP_100m_mean.attrs = ds.TEMP.attrs
    ds.TEMP_100m_mean.attrs['long_name'] = 'Mean temperature over top 100m'
    
    return ds.drop(['TEMP'])

def compute_spC_zint(ds):
    """compute spC zint"""
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['spC_zint'] = (ds.spC * dz150m).sum(dim='z_t_150m')
    ds.spC_zint.attrs = ds.spC.attrs
    ds.spC_zint.attrs['units'] = 'mmol m-2'
    ds.spC_zint.attrs['long_name'] = 'depth-integrated small phytoplankton carbon'
    
    return ds.drop(['spC'])
    
    
def compute_spC_50m_zint(ds):
    """compute spC 50m zint"""
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['spC_50m_zint'] = (ds.spC * dz150m).isel(z_t_150m=slice(0,5)).sum(dim='z_t_150m')
    ds.spC_50m_zint.attrs = ds.spC.attrs
    ds.spC_50m_zint.attrs['units'] = 'mmol m-2'
    ds.spC_50m_zint.attrs['long_name'] = '50m depth-integrated small phytoplankton carbon'
    
    return ds.drop(['spC'])

def compute_diatC_50m_zint(ds):
    """compute diatC 50m zint"""
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['diatC_50m_zint'] = (ds.diatC * dz150m).isel(z_t_150m=slice(0,5)).sum(dim='z_t_150m')
    ds.diatC_50m_zint.attrs = ds.diatC.attrs
    ds.diatC_50m_zint.attrs['units'] = 'mmol m-2'
    ds.diatC_50m_zint.attrs['long_name'] = '50m depth-integrated diatom carbon'
    
    return ds.drop(['diatC'])


def compute_diatC_zint(ds):
    """compute diatC zint"""
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['diatC_zint'] = (ds.diatC * dz150m).sum(dim='z_t_150m')
    ds.diatC_zint.attrs = ds.diatC.attrs
    ds.diatC_zint.attrs['units'] = 'mmol m-2'
    ds.diatC_zint.attrs['long_name'] = 'depth-integrated diatom carbon'
    
    return ds.drop(['diatC'])

def compute_zooC_zint(ds):
    """compute zooC zint"""
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['zooC_zint'] = (ds.zooC * dz150m).sum(dim='z_t_150m')
    ds.zooC_zint.attrs = ds.zooC.attrs
    ds.zooC_zint.attrs['units'] = 'mmol m-2'
    ds.zooC_zint.attrs['long_name'] = 'depth-integrated zooplankton carbon'
    
    return ds.drop(['zooC'])

def compute_POC_FLUX_100m(ds):
    """compute POC export"""
    ds['POC_flux_100m'] = ds.POC_FLUX_IN.isel(z_t=10)
    ds.POC_flux_100m.attrs = ds.POC_FLUX_IN.attrs
    ds.POC_flux_100m.attrs['long_name'] = 'POC flux at 10m'
    
    return ds.drop(['POC_FLUX_IN'])

def put_HMXL(ds):
    """put HMXL"""
    ds['HMXL'] = ds.HMXL
    ds.HMXL.attrs = ds.HMXL.attrs
    
    return ds

def put_XMXL(ds):
    """put XMXL"""
    ds['XMXL'] = ds.XMXL
    ds.XMXL.attrs = ds.XMXL.attrs
    
    return ds

def put_TMXL(ds):
    """put TMXL"""
    ds['TMXL'] = ds.TMXL
    ds.TMXL.attrs = ds.TMXL.attrs
    
    return ds


def put_Jint_100m_Fe(ds):
    """put Jint 100m Fe """
    ds['Jint_100m_Fe'] = ds.Jint_100m_Fe
    ds.Jint_100m_Fe.attrs = ds.Jint_100m_Fe.attrs
    
    return ds

def put_tend_zint_100m_Fe(ds):
    """put tend zint 100m Fe """
    ds['tend_zint_100m_Fe'] = ds.tend_zint_100m_Fe
    ds.tend_zint_100m_Fe.attrs = ds.tend_zint_100m_Fe.attrs
    
    return ds

def put_QSW(ds):
    """put QSW vars"""
    
#     ds['QSW_HTP'] = ds.QSW_HTP
#     ds.QSW_HTP.attrs = ds.QSW_HTP.attrs
    
    ds['SHF_QSW'] = ds.SHF_QSW
    ds.SHF_QSW.attrs = ds.SHF_QSW.attrs
    
#     ds['QSW_HBL'] = ds.QSW_HBL
#     ds.QSW_HBL.attrs = ds.QSW_HBL.attrs
    
    return ds

def compute_Cweight_diat_lightlim(ds):
    """compute diatom biomass weighted light limitation"""
    
    ds['diat_Cweight_light_lim'] = (ds.diat_light_lim * ds.diatC).sum(dim='z_t_150m')/ds.diatC.sum(dim='z_t_150m')
    ds.diat_Cweight_light_lim.attrs = ds.diat_light_lim.attrs
    ds.diat_Cweight_light_lim.attrs['long_name'] = 'Biomass weighted diatom light limitation term'
    
    return ds.drop(['diatC', 'diat_light_lim'])

def compute_Cweight_diat_Fe_lim(ds):
    """compute diatom biomass weighted Fe limitation"""
    
    ds['diat_Cweight_Fe_lim'] = (ds.diat_Fe_lim * ds.diatC).sum(dim='z_t_150m')/ds.diatC.sum(dim='z_t_150m')
    ds.diat_Cweight_Fe_lim.attrs = ds.diat_Fe_lim.attrs
    ds.diat_Cweight_Fe_lim.attrs['long_name'] = 'Biomass weighted diatom Fe limitation term'
    
    return ds.drop(['diatC', 'diat_Fe_lim'])

def compute_Cweight_sp_Fe_lim(ds):
    """compute sp biomass weighted Fe limitation"""
    
    ds['sp_Cweight_Fe_lim'] = (ds.sp_Fe_lim * ds.spC).sum(dim='z_t_150m')/ds.spC.sum(dim='z_t_150m')
    ds.sp_Cweight_Fe_lim.attrs = ds.sp_Fe_lim.attrs
    ds.sp_Cweight_Fe_lim.attrs['long_name'] = 'Biomass weighted small phytoplankton Fe limitation term'
    
    return ds.drop(['spC', 'sp_Fe_lim'])

def compute_Cweight_sp_lightlim(ds):
    """compute sp biomass weighted light limitation"""
    
    ds['sp_Cweight_light_lim'] = (ds.sp_light_lim * ds.spC).sum(dim='z_t_150m')/ds.spC.sum(dim='z_t_150m')
    ds.sp_Cweight_light_lim.attrs = ds.sp_light_lim.attrs
    ds.sp_Cweight_light_lim.attrs['long_name'] = 'Biomass weighted small phytoplankton light limitation term'
    
    return ds.drop(['spC', 'sp_light_lim'])


def compute_PAR_surf(ds):
    """compute surface PAR"""

    ds['PAR_surf'] = ds.PAR_avg.isel(z_t_150m=0)
    ds.PAR_surf.attrs = ds.PAR_avg.attrs
    ds.PAR_surf.attrs['long_name'] = 'PAR avg in the top 10m'

    return ds.drop(['PAR_avg'])

def compute_PAR_65m(ds):
    """compute PAR at 65m"""

    ds['PAR_65m'] = ds.PAR_avg.isel(z_t_150m=6)
    ds.PAR_65m.attrs = ds.PAR_avg.attrs
    ds.PAR_65m.attrs['long_name'] = 'PAR avg in 60 to 70m depth level'

    return ds.drop(['PAR_avg'])

def compute_Fe_65m(ds):
    """compute Fe at 65m"""

    ds['Fe_65m'] = ds.Fe.isel(z_t=6)
    ds.Fe_65m.attrs = ds.Fe.attrs
    ds.Fe_65m.attrs['long_name'] = 'Fe in 60 to 70m depth level'

    return ds.drop(['Fe'])

def compute_PAR_25m(ds):
    """compute PAR at 25m"""

    ds['PAR_25m'] = ds.PAR_avg.isel(z_t_150m=2)
    ds.PAR_25m.attrs = ds.PAR_avg.attrs
    ds.PAR_25m.attrs['long_name'] = 'PAR avg in 20 to 30m depth level'

    return ds.drop(['PAR_avg'])

def compute_Fe_25m(ds):
    """compute Fe at 25m"""

    ds['Fe_25m'] = ds.Fe.isel(z_t=2)
    ds.Fe_25m.attrs = ds.Fe.attrs
    ds.Fe_25m.attrs['long_name'] = 'Fe in 20 to 30m depth level'

    return ds.drop(['Fe'])

def compute_Fe_surf(ds):
    """compute surface Fe"""

    ds['Fe_surf'] = ds.Fe.isel(z_t=0)
    ds.Fe_surf.attrs = ds.Fe.attrs
    ds.Fe_surf.attrs['long_name'] = 'Fe in the top 10m'

    return ds.drop(['Fe'])

def compute_PAR_avg_150m(ds):
    """compute average PAR over top 150m """

    ds['PAR_avg_150m'] = ds.PAR_avg.mean(dim='z_t_150m')
    ds.PAR_avg_150m.attrs = ds.PAR_avg.attrs
    ds.PAR_avg_150m.attrs['long_name'] = 'PAR avg in the top 150m'

    return ds.drop(['PAR_avg'])

def compute_Fe_avg_150m(ds):
    """compute average Fe over top 150m """

    ds['Fe_avg_150m'] = ds.Fe.isel(z_t=slice(0,15)).mean(dim='z_t')
    ds.Fe_avg_150m.attrs = ds.Fe.attrs
    ds.Fe_avg_150m.attrs['long_name'] = 'Fe avg in the top 150m'

    return ds.drop(['Fe'])


def compute_Fe_avg_50m(ds):
    """compute average Fe over top 50m """

    ds['Fe_avg_50m'] = ds.Fe.isel(z_t=slice(0,5)).mean(dim='z_t')
    ds.Fe_avg_50m.attrs = ds.Fe.attrs
    ds.Fe_avg_50m.attrs['long_name'] = 'Fe avg in the top 50m'

    return ds.drop(['Fe'])

def compute_PAR_avg_50m(ds):
    """compute average PAR over top 50m """

    ds['PAR_avg_50m'] = ds.PAR_avg.isel(z_t_150m=slice(0,5)).mean(dim='z_t_150m')
    ds.PAR_avg_50m.attrs = ds.PAR_avg.attrs
    ds.PAR_avg_50m.attrs['long_name'] = 'PAR avg in the top 50m'

    return ds.drop(['PAR_avg'])

def compute_Fe_Cweight_150m_avg(ds):
    """compute biomass weighted Fe over top 150m"""
    
    dz150m = ds.dz.isel(z_t=slice(0, 15)).rename({'z_t': 'z_t_150m'}) * 0.01 #converting from cm to m
    ds['Fe_Cweight_150m_avg'] = (ds.Fe.isel(z_t=slice(0,15)).rename({'z_t': 'z_t_150m'}) * (ds.spC + ds.diatC)) #first take Fe times biomass for each level
    ds['Fe_Cweight_150m_avg'] =  ds.Fe_Cweight_150m_avg.sum(dim='z_t_150m')    #now sum it up                
    ds['Fe_Cweight_150m_avg'] = ds['Fe_Cweight_150m_avg']/(ds.spC + ds.diatC).sum(dim='z_t_150m') #now divide by sum of the weights
    ds.Fe_Cweight_150m_avg.attrs = ds.Fe.attrs
    ds.Fe_Cweight_150m_avg.attrs['long_name'] = 'Biomass weighted Fe mean over top 150m'
    
    return ds.drop(['spC', 'diatC', 'Fe'])


########################!!!!!!!!!!!!!
def compute_PAR_Cweight_150m_avg(ds):
    """compute biomass weighted PAR over top 150m"""
    
    ds['PAR_Cweight_150m_avg'] = (ds.PAR_avg * (ds.spC + ds.diatC)) #first take PAR times biomass for each level
    ds['PAR_Cweight_150m_avg'] =  ds.PAR_Cweight_150m_avg.sum(dim='z_t_150m')    #now sum it up                
    ds['PAR_Cweight_150m_avg'] = ds['PAR_Cweight_150m_avg']/(ds.spC + ds.diatC).sum(dim='z_t_150m') #now divide by sum of the weights
    ds.PAR_Cweight_150m_avg.attrs = ds.PAR_avg.attrs
    ds.PAR_Cweight_150m_avg.attrs['long_name'] = 'Biomass weighted PAR mean over top 150m'
    
    return ds.drop(['spC', 'diatC', 'PAR_avg'])

def fix_time(ds):
    ds = ds.copy(deep=True)
    
    time_attrs = ds.time.attrs
    time_encoding = ds.time.encoding
    
    ds['time'] = xr.DataArray(
        cftime.num2date(
            ds.time_bound.mean(dim='d2'), 
            units=ds.time.units, 
            calendar=ds.time.calendar
        ), 
        dims=('time')
    )
    
    time_encoding['units'] = time_attrs.pop('units')
    time_encoding['calendar'] = time_attrs.pop('calendar')
    
    ds.time.attrs = time_attrs
    ds.time.encoding = time_encoding
    return ds    

In [12]:
%%time

# fix time
dsets2 = {key: fix_time(ds) for key, ds in dsets.items()}
print('fixed time')

# subset time
dsets2 = {key: ds.sel(time=slice('1920', '2100')) for key, ds in dsets2.items()}
print('subset time done')

# compute surface chl
#dsets2 = {key: compute_chl_surf(ds) for key, ds in dsets2.items()}

# compute NPP
#dsets2 = {key: compute_NPP_zint(ds) for key, ds in dsets2.items()}

# # compute PAR avg over the top 150m
# dsets2 = {key: compute_PAR_avg_50m(ds) for key, ds in dsets2.items()}

# # compute Fe avg over the top 150m
# dsets2 = {key: compute_Fe_avg_50m(ds) for key, ds in dsets2.items()}

# compute top 50m temperature
dsets2 = {key: compute_TEMP_50m(ds) for key, ds in dsets2.items()}
print('computed top 50m temp')


# # compute top 100m temperature
# dsets2 = {key: compute_TEMP_100m(ds) for key, ds in dsets2.items()}
# print('computed top 100m temp')

# # compute depth integrated zooplankton produciton
# dsets2 = {key: compute_zoo_prod_zint(ds) for key, ds in dsets2.items()}
# print('computed zoo prod')

# ###compute depth integrated zooplankton loss
# dsets2 = {key: compute_zoo_loss_zint(ds) for key, ds in dsets2.items()}
# print('computed zoo loss')



# # # compute depth integrated sp produciton
# dsets2 = {key: compute_diatC_50m_zint(ds) for key, ds in dsets2.items()}
# print('computed spC zint')

# # compute depth integrated zooplankton produciton
# dsets2 = {key: compute_diatC_zint(ds) for key, ds in dsets2.items()}
# print('computed diatC zint')

# # compute depth integrated zooplankton carbon
# dsets2 = {key: compute_zooC_zint(ds) for key, ds in dsets2.items()}
# print('computed zooC zint')

# # compute depth integrated zooplankton carbon
# dsets2 = {key: compute_POC_FLUX_100m(ds) for key, ds in dsets2.items()}
# print('computed POC export')

# dsets2 = {key: put_HMXL(ds) for key, ds in dsets2.items()}
# print('put HMXL')

# dsets2 = {key: put_XMXL(ds) for key, ds in dsets2.items()}
# print('put XMXL')

# dsets2 = {key: put_TMXL(ds) for key, ds in dsets2.items()}
# print('put TMXL')

# dsets2 = {key: compute_Cweight_diat_lightlim(ds) for key, ds in dsets2.items()}
# print('compute biomass weighted diatom light limitation')

# dsets2 = {key: compute_Cweight_sp_lightlim(ds) for key, ds in dsets2.items()}
# print('compute biomass weighted sp light limitation')

# dsets2 = {key: compute_Cweight_diat_Fe_lim(ds) for key, ds in dsets2.items()}
# print('compute biomass weighted diat Fe limitation')

# dsets2 = {key: compute_Cweight_sp_Fe_lim(ds) for key, ds in dsets2.items()}
# print('compute biomass weighted sp Fe limitation')

# dsets2 = {key: compute_PAR_surf(ds) for key, ds in dsets2.items()}
# print('extract PAR surf')

# dsets2 = {key: compute_Fe_surf(ds) for key, ds in dsets2.items()}
# print('extract Fe surf')

# dsets2 = {key: compute_PAR_65m(ds) for key, ds in dsets2.items()}
# print('extract PAR at 65m')

# dsets2 = {key: compute_Fe_65m(ds) for key, ds in dsets2.items()}
# print('extract Fe at 65m')

# dsets2 = {key: compute_PAR_25m(ds) for key, ds in dsets2.items()}
# print('extract PAR at 25m')

# dsets2 = {key: compute_Fe_25m(ds) for key, ds in dsets2.items()}
# print('extract Fe at 25m')


# dsets2 = {key: put_QSW(ds) for key, ds in dsets2.items()}
# print('put QSW')


# dsets2 = {key: put_Jint_100m_Fe(ds) for key, ds in dsets2.items()}
# print('put Jint 100m Fe')

# dsets2 = {key: put_tend_zint_100m_Fe(ds) for key, ds in dsets2.items()}
# print('put tend_zint 100m Fe')


# dsets2 = {key: compute_PAR_Cweight_150m_avg(ds) for key, ds in dsets2.items()}
# print('compute_PAR_Cweight_150m_avg')

# dsets2 = {key: compute_Fe_Cweight_150m_avg(ds) for key, ds in dsets2.items()}
# print('compute_Fe_Cweight_150m_avg')
# dsets2

fixed time
subset time done
computed top 50m temp
CPU times: user 1.01 s, sys: 22 ms, total: 1.03 s
Wall time: 1.66 s


In [13]:
# dsets2

Concatenate the datasets in time, i.e. 20C + RCP8.5 experiments.

In [14]:
ordered_dsets_keys = ['ocn,20C,pop.h', 'ocn,RCP85,pop.h']
#ordered_dsets_keys = ['ocn.20C.pop.h', 'ocn.RCP85.pop.h']
ds = xr.concat(
    [dsets2[exp] for exp in ordered_dsets_keys], 
    dim='time', 
    data_vars='minimal',
    #compat='override' ## added this
)
time_encoding = dsets2[ordered_dsets_keys[0]].time.encoding
ds

Unnamed: 0,Array,Chunk
Bytes,33.94 kiB,80 B
Shape,"(2172, 2)","(5, 2)"
Count,1476 Tasks,435 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 33.94 kiB 80 B Shape (2172, 2) (5, 2) Count 1476 Tasks 435 Chunks Type float64 numpy.ndarray",2  2172,

Unnamed: 0,Array,Chunk
Bytes,33.94 kiB,80 B
Shape,"(2172, 2)","(5, 2)"
Count,1476 Tasks,435 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,33.80 GiB,2.34 MiB
Shape,"(34, 2172, 384, 320)","(1, 5, 384, 320)"
Count,155729 Tasks,14790 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 33.80 GiB 2.34 MiB Shape (34, 2172, 384, 320) (1, 5, 384, 320) Count 155729 Tasks 14790 Chunks Type float32 numpy.ndarray",34  1  320  384  2172,

Unnamed: 0,Array,Chunk
Bytes,33.80 GiB,2.34 MiB
Shape,"(34, 2172, 384, 320)","(1, 5, 384, 320)"
Count,155729 Tasks,14790 Chunks
Type,float32,numpy.ndarray


In [15]:
variables= ['TEMP_50m_mean'] #['Fe_Cweight_150m_avg']#['Jint_100m_Fe','tend_zint_100m_Fe'] #['QSW_HTP','SHF_QSW','QSW_HBL'] #['PAR_surf'] 
#['diatC_zint','spC_zint','zooC_zint'] #,'zoo_prod','TEMP_100m_mean']

### Compute seasonal means

In [16]:
season = 'MAM'

In [17]:
%%time
ds_djf = util.ann_mean(ds, season=season, time_bnds_varname='time_bound', time_centered=True)
ds_djf

CPU times: user 8.42 s, sys: 108 ms, total: 8.53 s
Wall time: 8.57 s


Unnamed: 0,Array,Chunk
Bytes,5.63 GiB,0.94 MiB
Shape,"(181, 34, 384, 320)","(1, 1, 384, 320)"
Count,313788 Tasks,6154 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.63 GiB 0.94 MiB Shape (181, 34, 384, 320) (1, 1, 384, 320) Count 313788 Tasks 6154 Chunks Type float64 numpy.ndarray",181  1  320  384  34,

Unnamed: 0,Array,Chunk
Bytes,5.63 GiB,0.94 MiB
Shape,"(181, 34, 384, 320)","(1, 1, 384, 320)"
Count,313788 Tasks,6154 Chunks
Type,float64,numpy.ndarray


In [18]:
for var in variables:
    ds_djf[var] = ds_djf[var].chunk((5,34,384,320))

In [19]:
%%time
ds_djf.load()

CPU times: user 3min 19s, sys: 16.5 s, total: 3min 35s
Wall time: 10min 39s


### write out data seasonal dataset

In [20]:
%%time

#var = variables[0]

for var in variables:


    print('starting variable: ', var)

    keep_vars = ['time_bound','TAREA','time','dz','KMT', 'member_id','TLAT','TLONG', var]

    ds_out = ds_djf.drop([v for v in ds_djf.variables if v not in keep_vars])


    ds_out.compute()
    outfile='/glade/scratch/kristenk/CESM-LE-output/CESM-LE-'+var+'_'+season+'mean.nc'
    ds_out.to_netcdf(outfile)

starting variable:  TEMP_50m_mean
CPU times: user 87.4 ms, sys: 1.72 s, total: 1.81 s
Wall time: 3.58 s


In [21]:
#cluster.close()

In [None]:
#client.close()