# v2.2 run RNN with Spatial Training

This notebook is intended to set up a test where the RNN is run serial by location and compared to the spatial training scheme. Additionally, the ODE model with the augmented KF will be run as a comparison, but note that the RNN models will be predicting entirely without knowledge of the heldout locations, while the augmented KF will be run directly on the test locations.


## Environment Setup

In [None]:
import numpy as np
import sys
sys.path.append('..')
import pickle
import logging
import os.path as osp
import tensorflow as tf
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams, RNNData, RNN, rnn_data_wrap
from utils import hash2, read_yml, read_pkl, retrieve_url, Dict, print_dict_summary, print_first, str2time, logging_setup
from moisture_rnn import RNN
import reproducibility
from data_funcs import rmse, to_json, combine_nested, subset_by_features, build_train_dict
from moisture_models import run_augmented_kf
import copy
import pandas as pd
import matplotlib.pyplot as plt
import yaml
import time

In [None]:
logging_setup()

In [None]:
filename = "fmda_rocky_202403-05_f05.pkl"
retrieve_url(
    url = f"https://demo.openwfm.org/web/data/fmda/dicts/{filename}", 
    dest_path = f"../data/{filename}")

In [None]:
file_paths = [f'../data/{filename}']

In [None]:
# # read/write control
# train_file='../data/train.pkl'
# train_create=True   # if false, read
# train_write=False
# train_read=False

In [None]:
# Params used for data filtering
params_data = read_yml("../params_data.yaml") 
params_data

In [None]:
# Params used for setting up RNN
params = read_yml("../params.yaml", subkey='rnn') 
params

In [None]:
feats = ['Ed', 'Ew', 'solar', 'wind', 'elev', 'lon', 'lat', 'rain']
params.update({'features_list': feats})

In [None]:
train = build_train_dict(file_paths, atm_source="RAWS", params_data = params_data,
                         features_subset = feats, spatial=False, verbose=True)
train = subset_by_features(train, params['features_list'])
train = combine_nested(train)

In [None]:
# if train_create:
#     params_data.update({'hours': 1440})
#     logging.info('creating the training cases from files %s',file_paths)
#     # osp.join works on windows too, joins paths using \ or /
#     train = process_train_dict(file_paths, atm_dict = "RAWS", params_data = params_data, verbose=True)
#     train = subset_by_features(train, feats)
#     train = combine_nested(train)
# if train_write:
#     with open(train_file, 'wb') as file:
#         logging.info('Writing the rain cases into file %s',train_file)
#         pickle.dump(train, file)
# if train_read:
#     logging.info('Reading the train cases from file %s',train_file)
#     train = read_pkl(train_file)

## Spatial Data Training

This method combines the training timeseries data into a single 3-d array, with timeseries at the same location arranged appropriately in the right order for a given `batch_size` hyperparameter. The hidden state of the recurrent layers are set up reset when the location changes. 

In [None]:
reproducibility.set_seed(123)

In [None]:
params = RNNParams(params)
# params.update({'epochs': 200, 
#                'learning_rate': 0.001,
#                'activation': ['relu', 'relu'], # Activation for RNN Layers, Dense layers respectively.
#                'recurrent_layers': 1, 'recurrent_units': 30, 
#                'dense_layers': 1, 'dense_units': 30,
#                'early_stopping_patience': 30, # how many epochs of no validation accuracy gain to wait before stopping
#                'batch_schedule_type': 'exp', # Hidden state batch reset schedule
#                'bmin': 20, # Lower bound of hidden state batch reset, 
#                'bmax': params_data['hours'], # Upper bound of hidden state batch reset, using max hours
#                'batch_size': 60
#               })

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNNData

In [None]:
rnn_dat_sp = rnn_data_wrap(train, params)
params.update({
    'loc_batch_reset': rnn_dat_sp.n_seqs, # Used to reset hidden state when location changes for a given batch
    'bmax': params_data['hours']
})

In [None]:
rnn_sp = RNN(params)
m_sp, errs = rnn_sp.run_model(rnn_dat_sp)

In [None]:
errs.mean()

## Save Model

In [None]:
outpath = "../outputs/models"
filename = osp.join(outpath, f"model_predict_raws_rocky.keras")
rnn_sp.model_predict.save(filename)

## Load and Check

In [None]:
mod = tf.keras.models.load_model(filename)

In [None]:
from utils import hash_weights

hash_weights(mod)

In [None]:
type(rnn_dat_sp.X_test)

In [None]:
X_test = np.stack(rnn_dat_sp.X_test, axis=0)
y_array = np.stack(rnn_dat_sp.y_test, axis=0)

In [None]:
preds = mod.predict(X_test)
preds.shape

In [None]:
np.mean(np.sqrt(np.mean(np.square(preds - y_array), axis=(1,2))))