# v2.3 run RNN with Spatial Training

This notebook is intended to set up a test where the RNN is run serial by location and compared to the spatial training scheme. Additionally, the ODE model with the augmented KF will be run as a comparison, but note that the RNN models will be predicting entirely without knowledge of the heldout locations, while the augmented KF will be run directly on the test locations.


## Environment Setup

In [None]:
import numpy as np
from utils import print_dict_summary, print_first, str2time, logging_setup
import pickle
import logging
import os.path as osp
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams, RNNData, RNN, rnn_data_wrap
from utils import hash2, read_yml, read_pkl, retrieve_url, Dict
from moisture_rnn import RNN
import reproducibility
from data_funcs import rmse, to_json, combine_nested, build_train_dict
from moisture_models import run_augmented_kf
import copy
import pandas as pd
import matplotlib.pyplot as plt
import yaml
import time

In [None]:
logging_setup()

In [None]:
filename = "fmda_rocky_202403-05_f05.pkl"
retrieve_url(
    url = f"https://demo.openwfm.org/web/data/fmda/dicts/{filename}", 
    dest_path = f"data/{filename}")

In [None]:
file_paths = [f"data/{filename}"]

In [None]:
# read/write control
train_file='data/train.pkl'
train_create=True   # if false, read
train_write=False
train_read=False

In [None]:
# Params used for data filtering
params_data = read_yml("params_data.yaml") 
params_data.update({'hours':720})

In [None]:
# Params used for setting up RNN
params = read_yml("params.yaml", subkey='rnn') 
params

In [None]:
train = build_train_dict(file_paths, atm_source="HRRR", params_data = params_data, spatial=False, verbose=True,
                        forecast_step = 3)

In [None]:
from itertools import islice
train = {k: train[k] for k in islice(train, 250)}

## Setup Validation Runs

The following parameters will be used for both serial and spatial models.

In [None]:
params = RNNParams(params)

In [None]:
reproducibility.set_seed(123)

## Spatial Data Training

This method combines the training timeseries data into a single 3-d array, with timeseries at the same location arranged appropriately in the right order for a given `batch_size` hyperparameter. The hidden state of the recurrent layers are set up reset when the location changes. 

In [None]:
# Start timer for code 
start_time = time.time()

In [None]:
# Combine Nested Dictionary into Spatial Data
# train_sp = Dict(combine_nested(train))

In [None]:
# params.update({'time_fracs': [.5, .2, .3], 'space_fracs': [.5, .2, .3]})
rnn_dat_sp = rnn_data_wrap(combine_nested(train), params)
params.update({
    'loc_batch_reset': rnn_dat_sp.n_seqs, # Used to reset hidden state when location changes for a given batch
    'bmax': rnn_dat_sp.hours
})

In [None]:
rnn_sp = RNN(params)
m_sp, errs = rnn_sp.run_model(rnn_dat_sp)

In [None]:
errs.mean()

In [None]:
# End Timer
end_time = time.time()

# Calculate Code Runtime
elapsed_time_sp = end_time - start_time
print(f"Spatial Training Elapsed time: {elapsed_time_sp:.4f} seconds")

## Run ODE + KF 

In [None]:
import importlib
import moisture_models
importlib.reload(moisture_models)
from moisture_models import run_augmented_kf

In [None]:
# Get timeseries IDs from previous RNNData object
test_cases = rnn_dat_sp.loc['test_locs']
print(len(test_cases))

In [None]:
test_ind = rnn_dat_sp.test_ind # Time index for test period start
print(test_ind)

In [None]:
outputs_kf = {}
for case in test_cases:
    print("~"*50)
    print(case)
    # Run Augmented KF
    print('Running Augmented KF')
    train[case]['h2'] = test_ind
    train[case]['hours'] =len(train[case]['y'])
    train[case]['scale_fm'] = 1
    m, Ec = run_augmented_kf(train[case])
    y = train[case]['y']        
    train[case]['m_kf'] = m
    print(f"KF RMSE: {rmse(m[test_ind:],y[test_ind:])}")
    outputs_kf[case] = {'case':case, 'errs': rmse(m[test_ind:],y[test_ind:])}

In [None]:
df_kf = pd.DataFrame.from_dict(outputs_kf).transpose()
df_kf.head()

In [None]:
df_kf.errs.mean()

## Compare

In [None]:
print(f"Total Test Cases: {len(test_cases)}")
print(f"Total Test Hours: {rnn_dat_sp.y_test.shape[0]}")

In [None]:
print(f"Spatial Training RMSE: {errs.mean()}")
print(f"Augmented KF RMSE: {df_kf.errs.mean()}")

In [None]:
print(f"Spatial Training Elapsed time: {elapsed_time_sp:.4f} seconds")