# Utility to create stable file used for reproduciblity checks

## v2.1 Code

In [1]:
import pickle
import numpy as np
import os.path as osp
import os
import pandas as pd
import tensorflow as tf
import sys
sys.path.append('..')
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams
from utils import read_yml, read_pkl, print_dict_summary, load_and_fix_data

In [2]:
pkl_file = "../data/test_CA_202401.pkl"
case_name = "NV020_202401"
# Destination File
outfile = "../data/reproducibility_dict_v2_TEST.pkl"

## Read Data and Extract Case

### Read subdict directly

In [3]:
dat = load_and_fix_data(pkl_file)
print_dict_summary(dat[case_name])

loading file ../data/test_CA_202401.pkl
loc
      STID : NV020
      lat : 38.7482
      lon : -119.53656
      elev : 5677
      pixel_x : 268.6997896202013
      pixel_y : 444.2995027841032
RAWS
     temp: NumPy array of shape (1009,), min: 263.15, max: 280.506
     fm: NumPy array of shape (1009,), min: 8.62, max: 16.2
     rh: NumPy array of shape (1009,), min: 14.52, max: 93.7
     wind: NumPy array of shape (1009,), min: nan, max: nan
     time_raws: NumPy array of shape (1009,), type object
      hours : 1009
     time: NumPy array of shape (168,), type object
     Ed: NumPy array of shape (1009,), min: 8.049790374634147, max: 30.42933179653953
     Ew: NumPy array of shape (1009,), min: 6.999615289304687, max: 28.609014502317358
HRRR
     time: NumPy array of shape (168,), type object
     f00
          temp: NumPy array of shape (168,), min: 262.1491059538274, max: 278.6557143358586
          rh: NumPy array of shape (168,), min: 19.766470197815824, max: 88.04762200131668
    

### Extract processed case

In [4]:
train = pkl2train([pkl_file])

In [5]:
print_dict_summary(train[case_name])

 id : NV020_202401
 case : NV020_202401
 filename : ../data/test_CA_202401.pkl
loc
      STID : NV020
      lat : 38.7482
      lon : -119.53656
      elev : 5677
      pixel_x : 268.6997896202013
      pixel_y : 444.2995027841032
 hours : 168
 h2 : 168
time: NumPy array of shape (168,), type object
 scale_fm : 1
X: NumPy array of shape (168, 8), min: -119.53656, max: 5677.0
features_list: Array of 8 items
y: NumPy array of shape (168,), min: 8.62, max: 15.98


## Add Reproducibility Info

In [6]:
params = read_yml('../params.yaml', subkey="rnn_repro")
params

{'batch_size': 32,
 'timesteps': 5,
 'optimizer': 'adam',
 'rnn_layers': 1,
 'rnn_units': 20,
 'dense_layers': 1,
 'dense_units': 5,
 'activation': ['linear', 'linear'],
 'centering': [0.0, 0.0],
 'dropout': [0.2, 0.2],
 'recurrent_dropout': 0.2,
 'reset_states': True,
 'epochs': 300,
 'learning_rate': 0.001,
 'clipvalue': 10.0,
 'phys_initialize': False,
 'stateful': True,
 'verbose_weights': True,
 'verbose_fit': False,
 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain'],
 'scale': True,
 'scaler': 'minmax',
 'train_frac': 0.5,
 'val_frac': 0.2}

In [7]:
repro_info = {
    'phys_initialize': "NOT YET IMPLEMENTED WITH v2.1",
    'rand_initialize':{
        'fitted_weights_hash': '01513ac086d842dc67d40eb94ee1110c',
        'preds_hash': '4999d10893207f2b40086e3f84c214a3'
    },
    'env_info':{
        'py_version': sys.version[0:6],
        'tf_version': tf.__version__,
        'seed': 123
    },
    'params': RNNParams(params)
}

train[case_name]['repro_info'] = repro_info

Checking params...
Input dictionary passed all checks.
Calculating shape params based on features list, timesteps, and batch size
Input Feature List: ['Ed', 'Ew', 'solar', 'wind', 'rain']
Input Timesteps: 5
Input Batch Size: 32
Calculated params:
Number of features: 5
Batch Shape: (32, 5, 5)
{'batch_size': 32, 'timesteps': 5, 'optimizer': 'adam', 'rnn_layers': 1, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 5, 'activation': ['linear', 'linear'], 'centering': [0.0, 0.0], 'dropout': [0.2, 0.2], 'recurrent_dropout': 0.2, 'reset_states': True, 'epochs': 300, 'learning_rate': 0.001, 'clipvalue': 10.0, 'phys_initialize': False, 'stateful': True, 'verbose_weights': True, 'verbose_fit': False, 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain'], 'scale': True, 'scaler': 'minmax', 'train_frac': 0.5, 'val_frac': 0.2, 'n_features': 5, 'batch_shape': (32, 5, 5)}


## Write Output

In [8]:
with open(outfile, 'wb') as file:
    print(f"Writing file: {outfile}")
    pickle.dump(train[case_name], file)

Writing file: ../data/reproducibility_dict_v2_TEST.pkl
