# Utility to create stable file used for reproduciblity checks

## v2.3 Code

In [1]:
import pickle
import numpy as np
import os.path as osp
import os
import pandas as pd
import tensorflow as tf
import sys
sys.path.append('..')
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams
from data_funcs import build_train_dict, combine_nested
from utils import read_yml, read_pkl, print_dict_summary, retrieve_url

In [2]:
filename = "fmda_rocky_202403-05_f05.pkl"
retrieve_url(
    url = f"https://demo.openwfm.org/web/data/fmda/dicts/{filename}", 
    dest_path = f"../data/{filename}")

Target data already exists at ../data/fmda_rocky_202403-05_f05.pkl


In [3]:
pkl_file = f"../data/{filename}"
case_name = "CHAC2_202403"
# Destination File
outfile = "../data/reproducibility_dict_v2.3.pkl"

## Read Data and Extract Case

In [4]:
params_data = read_yml("../params_data.yaml")
params_data.update({'hours': None})
params_data

{'max_intp_time': 10,
 'zero_lag_threshold': 10,
 'hours': None,
 'min_fm': 1,
 'max_fm': 90,
 'min_rain': 0,
 'max_rain': 100,
 'min_wind': 0,
 'max_wind': 35,
 'min_solar': 0,
 'max_solar': 1400,
 'min_soilm': 0,
 'features_all': ['Ed',
  'Ew',
  'solar',
  'wind',
  'elev',
  'lon',
  'lat',
  'soilm',
  'canopyw',
  'groundflux',
  'rain']}

In [5]:
train = build_train_dict([pkl_file], params_data=params_data,
                         spatial=False, forecast_step=3)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Extracting data from input file ../data/fmda_rocky_202403-05_f05.pkl
loading file ../data/fmda_rocky_202403-05_f05.pkl
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresponding times of output data
Shifted time based on forecast step 3. Dropping NA at beginning of feature data and corresp

In [6]:
train.keys()

dict_keys(['CHAC2_202403', 'CHRC2_202403', 'DYKC2_202403', 'LKGC2_202403', 'CCEC2_202403', 'RDKC2_202403', 'RFRC2_202403', 'WLCC2_202403', 'CCRU1_202403', 'YLSU1_202403', 'SPKW4_202403', 'ESPC2_202403', 'MRFC2_202403', 'PKLC2_202403', 'CUHC2_202403', 'BAWC2_202403', 'BTAC2_202403', 'BMOC2_202403', 'TCTM8_202403', 'LEIW4_202403', 'HRSN1_202403', 'SBFN1_202403', 'DOHS2_202403', 'CRRS2_202403', 'NMOS2_202403', 'RDCS2_202403', 'TGSK1_202403', 'QNRK1_202403', 'RESN1_202403', 'VRFN1_202403', 'KSHC2_202403', 'TR563_202403', 'CGLK1_202403', 'MRLS2_202403', 'CCYC2_202403', 'JNSC2_202403', 'LSHI4_202403', 'LLKS2_202403', 'AGTN1_202403', 'WCAS2_202403', 'RHRS2_202403', 'TS485_202403', 'TS578_202403', 'HITI4_202403', 'KRNK1_202403', 'AENC2_202403', 'LPFC2_202403', 'TS872_202403', 'WPKS2_202403', 'CSPS2_202403', 'SDSS2_202403', 'SHDS2_202403', 'RWES2_202403', 'PMTW4_202403', 'BEYC2_202403', 'MTRN1_202403', 'MKVN1_202403', 'TT562_202403', 'TT563_202403', 'TT564_202403', 'TT565_202403', 'TT566_202403

In [7]:
single_case = train[case_name].copy()
print_dict_summary(single_case)

 id : CHAC2_202403
 case : CHAC2_202403
 filename : ../data/fmda_rocky_202403-05_f05.pkl
loc
      STID : CHAC2
      lat : 37.19944
      lon : -108.48917
      elev : 7126
      pixel_x : 575.9094807938177
      pixel_y : 558.2578858477347
time: NumPy array of shape (2205,), type object
X: NumPy array of shape (2205, 15), min: -108.48917, max: 74860.23798649301
y: NumPy array of shape (2205,), min: 2.0, max: 25.7
features_list: Array of 15 items
 atm_source : HRRR
 forecast_step : 3


In [8]:
spatial_case = combine_nested(train.copy())
print_dict_summary(spatial_case)

id: Array of 70 items
case: Array of 70 items
filename: Array of 70 items
time: Array of 70 items
X: Array of 70 items
y: Array of 70 items
atm_source: Array of 70 items
forecast_step: Array of 70 items
loc
     STID: Array of 70 items
     lat: Array of 70 items
     lon: Array of 70 items
     elev: Array of 70 items
     pixel_x: Array of 70 items
     pixel_y: Array of 70 items
features_list: Array of 15 items


## Add Reproducibility Info

In [9]:
params = read_yml('../params.yaml', subkey="rnn_repro")
params

{'timesteps': 12,
 'batch_size': 32,
 'hidden_layers': ['lstm', 'dense'],
 'hidden_units': [30, 30],
 'hidden_activation': ['tanh', 'relu'],
 'dropout': 0.2,
 'recurrent_dropout': 0.2,
 'output_layer': 'dense',
 'output_activation': 'linear',
 'output_dimension': 1,
 'learning_rate': 0.001,
 'early_stopping_patience': 5,
 'epochs': 30,
 'reset_states': True,
 'bmin': 10,
 'bmax': 200,
 'batch_schedule_type': 'step',
 'estep': 5,
 'scaler': 'standard',
 'time_fracs': [0.8, 0.1, 0.1],
 'space_fracs': [0.8, 0.1, 0.1],
 'stateful': True,
 'features_list_single': ['Ed', 'Ew', 'solar', 'wind', 'rain'],
 'features_list_spatial': ['Ed',
  'Ew',
  'solar',
  'wind',
  'rain',
  'lon',
  'lat',
  'elev'],
 'verbose_fit': True,
 'verbose_weights': True,
 'return_sequences': False,
 'predict_spinup_hours': 5,
 'phys_initialize': False}

In [10]:
params_single = params.copy()
# Remove keys not used for single case
keys_to_remove = ['features_list_spatial', 'space_fracs']
for key in keys_to_remove:
    params_single.pop(key)
params_single['features_list'] = params_single.pop('features_list_single')

# Remove keys not used for spatial case
params_spatial = params.copy()
params_spatial.pop('features_list_single')
params_spatial['features_list'] = params_spatial.pop('features_list_spatial')

In [11]:
repro_info_single = {
    'phys_initialize': {
        'fitted_weights_hash': '3383b02ea14e17f45192f91e3fccadb9',
        'preds_hash': '638509a812e26e06e8474b0e05c4396a'
    },
    'rand_initialize':{
        'fitted_weights_hash': 'cd5fffe7ede2a19dff4a6b74527364ac',
        'preds_hash': 'a84e1d81e2eb790a1b5768747a4335ef'
    },
    'env_info':{
        'py_version': sys.version[0:6],
        'tf_version': tf.__version__,
        'seed': 123
    },
    'params': RNNParams(params_single)
}

single_case['repro_info'] = repro_info_single

Checking params...
Input dictionary passed all checks.
Input dictionary passed all checks.
Calculating shape params based on features list, timesteps, and batch size
Input Feature List: ['Ed', 'Ew', 'solar', 'wind', 'rain']
Input Timesteps: 12
Input Batch Size: 32
Calculated params:
Number of features: 5
Batch Shape: (32, 12, 5)
{'timesteps': 12, 'batch_size': 32, 'hidden_layers': ['lstm', 'dense'], 'hidden_units': [30, 30], 'hidden_activation': ['tanh', 'relu'], 'dropout': 0.2, 'recurrent_dropout': 0.2, 'output_layer': 'dense', 'output_activation': 'linear', 'output_dimension': 1, 'learning_rate': 0.001, 'early_stopping_patience': 5, 'epochs': 30, 'reset_states': True, 'bmin': 10, 'bmax': 200, 'batch_schedule_type': 'step', 'estep': 5, 'scaler': 'standard', 'time_fracs': [0.8, 0.1, 0.1], 'stateful': True, 'verbose_fit': True, 'verbose_weights': True, 'return_sequences': False, 'predict_spinup_hours': 5, 'phys_initialize': False, 'features_list': ['Ed', 'Ew', 'solar', 'wind', 'rain'], 

In [12]:
repro_info_spatial = {
    'phys_initialize': "Not Implemented Yet",
    'rand_initialize':{
        'fitted_weights_hash': 'bd0d8a186d837eedfe2347cfde82b579',
        'preds_hash': '6a44fd3df418f59d80a7b85d8c5d8463'
    },
    'env_info':{
        'py_version': sys.version[0:6],
        'tf_version': tf.__version__,
        'seed': 123
    },
    'params': RNNParams(params_spatial)
}
spatial_case['repro_info'] = repro_info_spatial

Checking params...
Input dictionary passed all checks.
Input dictionary passed all checks.
Calculating shape params based on features list, timesteps, and batch size
Input Feature List: ['Ed', 'Ew', 'solar', 'wind', 'rain', 'lon', 'lat', 'elev']
Input Timesteps: 12
Input Batch Size: 32
Calculated params:
Number of features: 8
Batch Shape: (32, 12, 8)
{'timesteps': 12, 'batch_size': 32, 'hidden_layers': ['lstm', 'dense'], 'hidden_units': [30, 30], 'hidden_activation': ['tanh', 'relu'], 'dropout': 0.2, 'recurrent_dropout': 0.2, 'output_layer': 'dense', 'output_activation': 'linear', 'output_dimension': 1, 'learning_rate': 0.001, 'early_stopping_patience': 5, 'epochs': 30, 'reset_states': True, 'bmin': 10, 'bmax': 200, 'batch_schedule_type': 'step', 'estep': 5, 'scaler': 'standard', 'time_fracs': [0.8, 0.1, 0.1], 'space_fracs': [0.8, 0.1, 0.1], 'stateful': True, 'verbose_fit': True, 'verbose_weights': True, 'return_sequences': False, 'predict_spinup_hours': 5, 'phys_initialize': False, 'f

## Write Output

In [13]:
with open(outfile, 'wb') as file:
    print(f"Writing file: {outfile}")
    pickle.dump({'single': single_case, 'spatial': spatial_case}, file)

Writing file: ../data/reproducibility_dict_v2.3.pkl
