In [1]:
# DASK client set

import os
import sys
from dask.distributed import Client

client = Client(scheduler_file='/proj/kimyy/Dropbox/source/python/all/mpi/scheduler_10.json')  

def setup_module_path():
    module_path = '/proj/kimyy/Dropbox/source/python/all/Modules/CESM2'
    if module_path not in sys.path:
        sys.path.append(module_path)

client.run(setup_module_path)

client

notebook_path = os.path.abspath(".")
_, _, relative_path = notebook_path.partition('/all/')
relative_path = '/all/' + relative_path
relative_path

'/all/Model/CESM2/Earth_System_Predictability/ASSM/Aleph'

In [2]:
# load public modules

import xarray as xr
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from scipy import stats
from scipy.interpolate import griddata
import warnings
warnings.simplefilter(action='ignore')
import pandas as pd
import cftime
import pop_tools
import time
import subprocess
import re as re_mod
import cftime
import datetime


In [3]:
# load private modules

import sys
sys.path.append('/proj/kimyy/Dropbox/source/python/all/Modules/CESM2')
from KYY_CESM2_preprocessing import CESM2_config


In [4]:
ds_grid = pop_tools.get_grid('POP_gx1v7')

cfg_var_spChl=CESM2_config()
cfg_var_spChl.year_s=1960
cfg_var_spChl.year_e=2020
cfg_var_spChl.setvar('spChl')
# cfg_var_spChl.list()

cfg_var_spChl_SURF=CESM2_config()
cfg_var_spChl_SURF.year_s=1960
cfg_var_spChl_SURF.year_e=2020
cfg_var_spChl_SURF.setvar('spChl_SURF')
# cfg_var_spChl_SURF.list()

start_date = cftime.DatetimeNoLeap(cfg_var_spChl.year_s, 1, 1)
end_date = cftime.DatetimeNoLeap(cfg_var_spChl.year_e+1, 1, 1)


In [5]:
# define preprocessing function

exceptcv=['time','lon','lat', 'TLONG', 'TLAT', 'z_t', 
          cfg_var_spChl.var, cfg_var_spChl_SURF.var]

def process_coords(ds, sd, ed, drop=True, except_coord_vars=exceptcv):
    """Preprocessor function to drop all non-dim coords, which slows down concatenation."""
    coord_vars = []
    for v in np.array(ds.coords) :
        if not v in except_coord_vars:
            coord_vars += [v]
    for v in np.array(ds.data_vars) :
        if not v in except_coord_vars:
            coord_vars += [v]
    
    if drop:
        ds= ds.drop(coord_vars)
        # ds= ds.sel(time=slice(sd, ed))
        # ds= ds.sel(time=slice(sd, ed)).isel(lev=slice(0, 11))
        ds= ds.sel(time=slice(sd, ed)).isel(z_t=0)
        return ds
    else:
        return ds.set_coords(coord_vars)


def process_coords_d2y(ds, sd, ed, drop=True, except_coord_vars=exceptcv):
    """Preprocessor function to drop all non-dim coords, which slows down concatenation."""
    coord_vars = []
    for v in np.array(ds.coords) :
        if not v in except_coord_vars:
            coord_vars += [v]
    for v in np.array(ds.data_vars) :
        if not v in except_coord_vars:
            coord_vars += [v]
    
    if drop:
        ds= ds.drop(coord_vars)
        ds= ds.sel(time=slice(sd, ed))
        # ds= ds.sel(time=slice(sd, ed)).isel(lev=slice(1, 11))
        dt_array = np.full(ds.time_values.shape, datetime.timedelta(days=0.5))
        new_time= ds.time_values - dt_array
        ds = ds.assign(time_values=new_time)
        ds = ds.groupby('time.year').mean(dim='time', skipna=True)
        return ds
    else:
        return ds.set_coords(coord_vars)


def process_coords_hcst(ds, drop=True, except_coord_vars=exceptcv):
    """Preprocessor function to drop all non-dim coords, which slows down concatenation."""
    coord_vars = []
    for v in np.array(ds.coords) :
        if not v in except_coord_vars:
            coord_vars += [v]
    for v in np.array(ds.data_vars) :
        if not v in except_coord_vars:
            coord_vars += [v]
    
    if drop:
        ds= ds.drop(coord_vars)
        return ds
    else:
        return ds.set_coords(coord_vars)








In [6]:
cfg_var_spChl_SURF.LE_path_load(cfg_var_spChl_SURF.var)
cfg_var_spChl_SURF.LE_file_list[0]

[['/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.001.pop.h.ecosys.nday1.spChl_SURF.19600102-19700101.nc',
  '/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.001.pop.h.ecosys.nday1.spChl_SURF.19700102-19800101.nc',
  '/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.001.pop.h.ecosys.nday1.spChl_SURF.19800102-19900101.nc',
  '/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.001.pop.h.ecosys.nday1.spChl_SURF.19900102-20000101.nc',
  '/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.001.pop.h.ecosys.nday1.spChl_SURF.20000102-20100101.nc',
  '/proj/jedwards/archive/b.e21.BHISTsmbb.f09_g17.LE2-1011.001/ocn/proc/tseries/day_1/b.e21.BHISTsmbb.f09_g17.LE2-1011.0

In [7]:
# # Read LE dataset

# start_time = time.time()


# cfg_var_spChl_SURF.LE_path_load(cfg_var_spChl_SURF.var)
# cfg_var_spChl_SURF.LE_ds = xr.open_mfdataset(cfg_var_spChl_SURF.LE_file_list[0], 
#                        chunks={'time': 12}, 
#                        combine='nested', 
#                        concat_dim=[[*cfg_var.LE_ensembles], 'time'], 
#                        parallel=True,
#                        preprocess=lambda ds: process_coords(ds, start_date, end_date),
#                        decode_cf=True, 
#                        decode_times=True)      


# cfg_var_spChl_SURF.LE_ds = cfg_var_spChl_SURF.LE_ds.rename({"concat_dim": "ens_LE"})
# # new_time = cfg_var.LE_ds.time - np.array([datetime.timedelta(days=15)] * len(cfg_var.LE_ds.time))
# # cfg_var_spChl_SURF.LE_ds = cfg_var_spChl_SURF.LE_ds.assign_coords(time=new_time)

# end_time = time.time()
# elapsed_time = end_time - start_time
# print('elasped time for reading LE: ' + str(elapsed_time))

In [8]:
# match1=re_mod.search(r'-(\d{6})', "b.e21.BHISTsmbb.f09_g17.LE2-1231.011.pop.h.ecosys.nday1.spChl_SURF.20100102-20141231.nc")
# match1.group(1)[0:4]