In [1]:
import numpy as np
import xarray as xr
import netCDF4 as nc
import model_crommelin_seasonal
import feature_crommelin 
from importlib import reload
import sys 
import os
from os import mkdir, makedirs
from os.path import join,exists
from importlib import reload
import pickle

In [2]:
import matplotlib.pyplot as plt 
%matplotlib inline

Create directories to save results

In [3]:
# Organize the directory of data and features to be read from the data. 
topic_dir = "/scratch/jf4241/crommelin"
filedict = dict({
    "ra": dict({
        "traj": dict({
            "dir": join(topic_dir, "reanalysis", "trajectory", "2022-07-25", "0"), # Directory with possibly many files
        }),
        "feat_all": dict({
            "dir": join(topic_dir, "reanalysis", "features_all", "2022-07-25", "0"),
            "filename": "crom_feat_all.nc",
        }),
        "feat_tpt": dict({
            "dir": join(topic_dir, "reanalysis", "features_tpt", "2022-07-25", "0",),
            "filename": "crom_feat_tpt.nc",
        }),
    }),
    "hc": dict({
        "traj": dict({
            "dir": join(topic_dir, "hindcast", "trajectory", "2022-07-25", "0"), # Directory with possibly multiple files
        }),
        "feat_all": dict({
            "dir": join(topic_dir, "hindcast", "features_all", "2022-07-25", "0"),
            "filename": "crom_feat_all.nc",
        }),
        "feat_tpt": dict({
            "dir": join(topic_dir, "hindcast", "features_tpt", "2022-07-25", "0"),
            "filename": "crom_feat_tpt.nc",
        })
    }),
    "results": dict({
        "dir": join(topic_dir, "results", "2022-07-25", "0")
    })
})

In [4]:
# Create the directories if they don't exist already
for src in ["ra","hc"]:
    for fmt in ["traj","feat_all","feat_tpt"]:
        path = filedict[src][fmt]["dir"]
        if not exists(path):
            makedirs(path, exist_ok=True)
path = filedict["results"]["dir"]
if not exists(path): makedirs(path, exist_ok=True)

Set physical parameters

In [5]:
dt_samp = 0.5 # Time step to save out
dt_szn = 0.74 # Time resolution for the seasonal model 
szn_start = 300.0
szn_length = 250.0
year_length = 400.0
Nt_szn = int(szn_length / dt_szn)
szn_avg_window = 5.0
burnin_time = 500.0

In [6]:
reload(model_crommelin_seasonal)

<module 'model_crommelin_seasonal' from '/home/jf4241/ecmwf/tpt_ecmwf/demo/model_crommelin_seasonal.py'>

Initialize the model.

In [7]:
fundamental_param_dict = dict({
    "b": 0.5, "beta": 1.25, "gamma_limits": [0.15, 0.22], 
    "C": 0.1, "x1star": 0.95, "r": -0.801, "year_length": year_length,
})
crom = model_crommelin_seasonal.SeasonalCrommelinModel(fundamental_param_dict)


Create "reanalysis" in the file folder reserved for contiguous data.

In [8]:
# Run the model initially for a bit to find the attractor. Start arbitrarily 1/5 of the way through year 1958
t_init = (1957 + 0.2)*fundamental_param_dict["year_length"]
x0 = np.zeros((1,7))
x0[0,-1] = t_init
t_save = np.arange(0, burnin_time, dt_samp)
traj_filename_burnin = join(filedict["ra"]["traj"]["dir"], "burnin.nc")
crom.integrate_and_save(x0,t_save,traj_filename_burnin)

In [9]:
x_burnin = xr.open_dataset(traj_filename_burnin)["X"]
x0 = x_burnin[:,-1].data
duration = 100*fundamental_param_dict["year_length"]
t_init_str,_ = crom.date_format(x0[0,-1])
t_fin_str,_ = crom.date_format(x0[0,-1] + duration)
traj_filename_ra = join(filedict["ra"]["traj"]["dir"], f"ra_{t_init_str}_to_{t_fin_str}.nc")
param_filename_ra = join(filedict["ra"]["traj"]["dir"], "params")

In [14]:
t_save = np.arange(0,duration,dt_samp)
crom.integrate_and_save(x0,t_save,traj_filename_ra,metadata_filename=param_filename_ra)

Integrated through time 1000.0000000001588 out of 39999.5
Integrated through time 2000.0999999992764 out of 39999.5
Integrated through time 3000.099999998367 out of 39999.5
Integrated through time 4000.0999999974574 out of 39999.5
Integrated through time 5000.0000000006585 out of 39999.5
Integrated through time 6000.000000004296 out of 39999.5
Integrated through time 7000.000000007934 out of 39999.5
Integrated through time 8000.000000011572 out of 39999.5
Integrated through time 9000.00000001521 out of 39999.5
Integrated through time 10000.000000018848 out of 39999.5
Integrated through time 11000.000000022486 out of 39999.5
Integrated through time 12000.000000026124 out of 39999.5
Integrated through time 13000.000000029762 out of 39999.5
Integrated through time 14000.0000000334 out of 39999.5
Integrated through time 15000.000000037038 out of 39999.5
Integrated through time 16000.000000040676 out of 39999.5
Integrated through time 17000.00000003311 out of 39999.5
Integrated through time

Create "hindcast" data in the file folder reserved for hindcast data.

In [10]:
# Generate hindcast dataset
t_abs_range = crom.q["year_length"]*np.array([1960,1970])
crom.generate_hindcast_dataset(
    [traj_filename_ra],filedict["hc"]["traj"]["dir"],t_abs_range,dt_samp,
    ens_size=30,ens_duration=47,ens_gap=13,pert_scale=0.01
)

KeyboardInterrupt: 

Extract features of interest from both the reanalysis and hindcast datasets. This will be expensive, as we have to read from a large database of files. Therefore, we should minimize the number of times we do this in development. Therefore, we should read in ALL possible features we MIGHT use for the downstream tasks of K-means clustering. Some extra reduction is likely necessary.

In [11]:
reload(feature_crommelin)

<module 'feature_crommelin' from '/home/jf4241/ecmwf/tpt_ecmwf/demo/feature_crommelin.py'>

# All possible features for DGA

In [13]:
# Extract all features of potential use from the reanalysis dataset. 
# Don't waste RAM by time-delay embedding explicitly
# Encode this in a function that loops through all files in a directory and puts features into one file
crom_feat = feature_crommelin.SeasonalCrommelinModelFeatures()
qra = pickle.load(open(param_filename_ra,"rb"))

In [39]:
# WARNING! So that this steps work, the 'feature' coordinate of each observable must be uniquely named. 
# For example, if the identity observable and energy observable both have a coordinate called "feature", 
# then xarray will assume it's the same and smush them together, filling in NaNs. x
feat_all = dict()

In [14]:
for src in ["ra","hc"]:
    ds_feature_list = []
    file_list = [f for f in os.listdir(filedict[src]["traj"]["dir"]) if (f.startswith(src) and f.endswith(".nc"))]
    for filename in file_list:
        ds = xr.open_dataset(join(filedict[src]["traj"]["dir"], filename))
        ds_feature_list += [
            xr.Dataset(
                data_vars = dict({
                    "identity": crom_feat.identity_observable(ds, qra),
                    "energy": crom_feat.energy_observable(ds, qra),
                    "energy_tendency": crom_feat.energy_tendency_observable(ds, qra),
                    "energy_exchange": crom_feat.energy_exchange_observable(ds, qra),
                    "phase": crom_feat.phase_observable(ds, qra),
                })
            )
        ]
        ds.close()
    feat_all[src] = xr.concat(ds_feature_list,dim="ensemble") # ensemble is a new dimension
    feat_all[src].to_netcdf(join(filedict[src]["feat_all"]["dir"], filedict[src]["feat_all"]["filename"]))

# Subset observables for use in DGA

In [None]:
feat_all = dict()
feat_tpt = dict()
for src in ["ra","hc"]:
    feat_all[src] = xr.open_dataset(join(filedict[src]["feat_all"]["dir"], filedict[src]["feat_all"]["filename"]))
    da_tpt = xr.DataArray(
        coords = {
            "ensemble": feat_all[src].coords["ensemble"],
            "member": feat_all[src].coords["member"],
            "t_sim": feat_all[src].coords["t_sim"],
            "feature": ["E_01","E_02","E_11","E_12","E_tot","Edot_dissipation","Edot_forcing"],
        },
        data = np.zeros((feat_all[src]["ensemble"].size, feat_all[src]["member"].size, feat_all[src]["t_sim"].size, 7)),
    )
    da_tpt.loc[dict(feature="E_01")] = feat_all[src]["energy"].sel(reservoir="E01")
    # TODO: finish filling in the rest

In [35]:
epd = dict(
    dt_szn = 0.74,
    szn_start = 300.0,
    szn_length = 250.0,
    year_length = 400.0,
    szn_avg_window = 5.0,
)
epd["Nt_szn"] = int(epd["szn_length"] / epd["dt_szn"])

In [36]:
crom_feat.set_event_params(epd)

In [38]:
data_vars_clim = dict()
t_dim = np.where(np.array(feat_all["hc"].dims) == 't_sim')[0][0]
for obs_name in list(feat_all["hc"].data_vars.keys()):
    shp = list(feat_all["hc"][obs_name].shape)
    shp[t_dim] = crom_feat.Nt_szn
    data_vars_clim[obs_name] = np.zeros(shp)

In [None]:
for obs_name in list(feat_all["hc"].data_vars.keys()):
    

In [15]:
feat_all["ra"].data_vars.keys()

KeysView(Data variables:
    identity         (ensemble, member, t_sim, feature) float64 0.8252 ... 8....
    energy           (ensemble, member, t_sim, reservoir) float64 0.3405 ... nan
    energy_tendency  (ensemble, member, t_sim, reservoir) float64 nan ... 0.0
    energy_exchange  (ensemble, member, t_sim, source, sink) float64 0.0 ... 0.0
    phase            (ensemble, member, t_sim, wavenumber) float64 0.6467 ......)

In [None]:
# Load the already-executed model (contiguous)
X_ra_cont = xr.open_dataset(traj_filename_ra)["X"]
print(X_ra_cont.sel(feature="x1").isel(t_sim=0))
print(X_ra_cont.sel(feature="x2").isel(t_sim=0))

In [None]:
# Plot x1 and x4 over time for 4 annual cycles
fig,ax = plt.subplots(ncols=2, figsize=(12,6))
xr.plot.plot(
    X_ra_cont.sel(feature='x1',member=0)
    .where(X_ra_cont['t_sim'] < 4*crom.q["year_length"], drop=True),
    x='t_sim', ax=ax[0]
)
xr.plot.plot(
    X_ra_cont.sel(feature='x4',member=0)
    .where(X_ra_cont['t_sim'] < 4*crom.q["year_length"], drop=True),
    x='t_sim', ax=ax[1]
)


In [None]:
# Compute the climatology
feat_crom.compute_climatology(in_filename,save_filename)

In [None]:
# Plot some hindcasts on top of climatology
