# v2.1 run RNN strategy serial by Location

This version of the RNN runs the model on each location separately, one at a time. Two main runs:
1. Run separate model at each location - training and prediction at least location independently - training mode periods 0:train_ind (was 0:h2), then prediction in test_ind:end. Validation data, if any, are from train_ind:test_ind
2. Run same model with multiple fitting calls 0:train_ind at different locations, compare prediction accuracy in test_ind:end  at for all location. 


In [None]:
import numpy as np
from utils import print_dict_summary, print_first, str2time, logging_setup
import pickle
import logging
import os.path as osp
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams, RNNData, RNN, create_rnn_data2 
from utils import hash2, read_yml, read_pkl, retrieve_url
from moisture_rnn import RNN
import reproducibility
from data_funcs import rmse
from moisture_models import run_augmented_kf
import copy
import pandas as pd
import matplotlib.pyplot as plt
import yaml

In [None]:
logging_setup()

In [None]:
retrieve_url(
    url = "https://demo.openwfm.org/web/data/fmda/dicts/test_CA_202401.pkl", 
    dest_path = "data/test_CA_202401.pkl")

In [None]:
repro_file = "data/reproducibility_dict_v2_TEST.pkl"
file_names=['test_CA_202401.pkl']
file_dir='data'
file_paths = [osp.join(file_dir,file_name) for file_name in file_names]

In [None]:
# read/write control
train_file='train.pkl'
train_create=True   # if false, read
train_write=True
train_read=True

In [None]:
repro = read_pkl(repro_file)

if train_create:
    logging.info('creating the training cases from files %s',file_paths)
    # osp.join works on windows too, joins paths using \ or /
    train = pkl2train(file_paths)
if train_write:
    with open(train_file, 'wb') as file:
        logging.info('Writing the rain cases into file %s',train_file)
        pickle.dump(train, file)
if train_read:
    logging.info('Reading the train cases from file %s',train_file)
    train = read_pkl(train_file)

In [None]:
params_all = read_yml("params.yaml")
print(params_all.keys())

In [None]:
# from module_param_sets import param_sets

In [None]:
param_sets_keys=['rnn']
# cases=[list(train.keys())[0]]
cases=list(train.keys())[70:90]
# cases.remove('reproducibility')
cases

In [None]:
logging.info('Running over parameter sets %s',param_sets_keys)
logging.info('Running over cases %s',cases)

## Run Reproducibility Case

In [None]:
params = repro['repro_info']['params']
print(type(params))
print(params)

# Set up input data
rnn_dat = RNNData(repro, scaler = params['scaler'], features_list = params['features_list'])
rnn_dat.train_test_split(
    train_frac = params['train_frac'],
    val_frac = params['val_frac']
)
rnn_dat.scale_data()

In [None]:
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat, reproducibility_run=True)

## Separate Models by Location

In [None]:
# Set up output dictionaries
outputs_kf = {}
outputs_rnn = {}

In [None]:

for k in param_sets_keys:
    params = RNNParams(params_all[k])
    print("~"*80)
    print("Running with params:")
    print(params)
    # Increase Val Frac so no errors, TODO fix validation
    params.update({
        'train_frac': .5,
        'val_frac': .2,
        'activation': ['relu', 'relu'],
        'epochs': 200
    })
    for case in cases:
        print("~"*50)
        logging.info('Processing case %s',case)
        print_dict_summary(train[case])
        # Format data & Run Model
        # rnn_dat = create_rnn_data2(train[case], params)
        rnn_dat = RNNData(train[case], scaler = params['scaler'], features_list = params['features_list'])
        rnn_dat.train_test_split(
            train_frac = params['train_frac'],
            val_frac = params['val_frac']
        )
        rnn_dat.scale_data()
        reproducibility.set_seed()
        rnn = RNN(params)
        m, errs = rnn.run_model(rnn_dat)
        # Add model output to case
        train[case]['m']=m
        # Get RMSE Prediction Error
        print(f"RMSE: {errs}")
        outputs_rnn[case] = {'case':case, 'm': m.copy(), 'errs': errs.copy()}
        
        # Run Augmented KF
        print('Running Augmented KF')
        train[case]['h2'] = train[case]['hours'] // 2
        train[case]['scale_fm'] = 1
        m, Ec = run_augmented_kf(train[case])
        m = m*rnn_dat['scale_fm']
        y = rnn_dat['y']*rnn_dat['scale_fm']          
        train[case]['m'] = m
        print(f"KF RMSE: {rmse(m,y)}")
        outputs_kf[case] = {'case':case, 'm': m.copy(), 'errs': rmse(m,y)}

In [None]:
logging.info('fmda_rnn_serial.ipynb done')

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
for k in outputs_rnn:
    print("~"*50)
    print(outputs_rnn[k]['case'])
    print(outputs_rnn[k]['errs']['prediction'])

In [None]:
for k in outputs_kf:
    print("~"*50)
    print(outputs_kf[k]['case'])
    print(outputs_kf[k]['errs'])