# Utility to create stable file used for reproduciblity checks

## v2.1 Code

In [1]:
import pickle
import numpy as np
import os.path as osp
import os
import pandas as pd
import tensorflow as tf
import sys
sys.path.append('..')
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams
from data_funcs import build_train_dict
from utils import read_yml, read_pkl, print_dict_summary

In [2]:
pkl_file = "../data/test_CA_202401.pkl"
case_name = "NV020_202401"
# Destination File
outfile = "../data/reproducibility_dict_v2_TEST.pkl"

## Read Data and Extract Case

### Read subdict directly

In [3]:
# dat = load_and_fix_data(pkl_file)
# print_dict_summary(dat[case_name])

### Extract processed case

In [4]:
# train = pkl2train([pkl_file])

In [5]:
train = read_pkl(pkl_file)

loading file ../data/test_CA_202401.pkl


In [6]:
params_data = read_yml("../params_data.yaml")

In [7]:
params_data.update({'hours': 168})

In [8]:
train = build_train_dict([pkl_file], params_data=params_data,
                         spatial=False)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Extracting data from input file ../data/test_CA_202401.pkl
loading file ../data/test_CA_202401.pkl
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Splitting Timeseries into smaller portions to aid with data filtering. Input data param for max timeseries hours: 168
Processing case: CNFC1_202401
Length of y vector: 168
Partitions of length 168 from case CNFC1_202401: 1
Processing case: CRVC1_202401
Length of y vector: 168
Partitions of length 168 from case CRVC1_202401: 1
Processing case: FCHC1_202401
Length of y vector: 168
Partitions of length 168 from case FCHC1_202401: 1
Processing case: FTNC1_202401
Length of y vector: 168
Partitions of length 168 from case FTNC1_202401: 1
Processing case: HTRC1_202401
Length of y vector: 168
Partitions of length 168 from case HTRC1_202401: 1
Processing case: KRNC1_202401
Length of y vector: 168
Partitions of length 168 from case KRNC1_202401: 1
P

In [9]:
print_dict_summary(train[case_name])

time: NumPy array of shape (168,), type object
X: NumPy array of shape (168, 15), min: -128.93263270355502, max: 5677.0
y: NumPy array of shape (168,), min: 8.62, max: 15.98
 id : NV020_202401
 case : NV020_202401
 filename : ../data/test_CA_202401.pkl
loc
      STID : NV020
      lat : 38.7482
      lon : -119.53656
      elev : 5677
      pixel_x : 268.6997896202013
      pixel_y : 444.2995027841032
features_list: Array of 15 items
 atm_source : HRRR
 hours : 168


## Add Reproducibility Info

In [10]:
params = read_yml('../params.yaml', subkey="rnn_repro")
params

{'batch_size': 32,
 'timesteps': 5,
 'optimizer': 'adam',
 'rnn_layers': 1,
 'rnn_units': 20,
 'dense_layers': 1,
 'dense_units': 5,
 'activation': ['linear', 'linear'],
 'dropout': [0.2, 0.2],
 'recurrent_dropout': 0.2,
 'reset_states': True,
 'epochs': 300,
 'learning_rate': 0.001,
 'clipvalue': 10.0,
 'phys_initialize': False,
 'stateful': True,
 'verbose_weights': True,
 'verbose_fit': False,
 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain'],
 'scale': True,
 'scaler': 'minmax',
 'time_fracs': [0.5, 0.2, 0.3],
 'early_stopping_patience': 9999,
 'predict_spinup_hours': None}

In [11]:
repro_info = {
    'phys_initialize': "NOT YET IMPLEMENTED WITH v2.1",
    'rand_initialize':{
        'fitted_weights_hash': '01513ac086d842dc67d40eb94ee1110c',
        'preds_hash': '4999d10893207f2b40086e3f84c214a3'
    },
    'env_info':{
        'py_version': sys.version[0:6],
        'tf_version': tf.__version__,
        'seed': 123
    },
    'params': RNNParams(params)
}

train[case_name]['repro_info'] = repro_info

Checking params...
Input dictionary passed all checks.
Calculating shape params based on features list, timesteps, and batch size
Input Feature List: ['Ed', 'Ew', 'solar', 'wind', 'rain']
Input Timesteps: 5
Input Batch Size: 32
Calculated params:
Number of features: 5
Batch Shape: (32, 5, 5)
{'batch_size': 32, 'timesteps': 5, 'optimizer': 'adam', 'rnn_layers': 1, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 5, 'activation': ['linear', 'linear'], 'dropout': [0.2, 0.2], 'recurrent_dropout': 0.2, 'reset_states': True, 'epochs': 300, 'learning_rate': 0.001, 'clipvalue': 10.0, 'phys_initialize': False, 'stateful': True, 'verbose_weights': True, 'verbose_fit': False, 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain'], 'scale': True, 'scaler': 'minmax', 'time_fracs': [0.5, 0.2, 0.3], 'early_stopping_patience': 9999, 'predict_spinup_hours': None, 'n_features': 5, 'batch_shape': (32, 5, 5)}


## Write Output

In [12]:
with open(outfile, 'wb') as file:
    print(f"Writing file: {outfile}")
    pickle.dump(train[case_name], file)

Writing file: ../data/reproducibility_dict_v2_TEST.pkl
