# v2.1 run RNN strategy serial by Location

This version of the RNN runs the model on each location separately, one at a time. Two main runs:
1. Run separate model at each location - training and prediction at least location independently - training mode periods 0:train_ind (was 0:h2), then prediction in test_ind:end. Validation data, if any, are from train_ind:test_ind
2. Run same model with multiple fitting calls 0:train_ind at different locations, compare prediction accuracy in test_ind:end  at for all location. 


In [None]:
import numpy as np
from utils import print_dict_summary, print_first, str2time, logging_setup
import pickle
import logging
import os.path as osp
from moisture_rnn_pkl import pkl2train
from moisture_rnn import RNNParams, RNNData, RNN 
from utils import hash2, read_yml, read_pkl, retrieve_url
from moisture_rnn import RNN
import reproducibility
from data_funcs import rmse, to_json
from moisture_models import run_augmented_kf
import copy
import pandas as pd
import matplotlib.pyplot as plt
import yaml
import time

In [None]:
logging_setup()

In [None]:
retrieve_url(
    url = "https://demo.openwfm.org/web/data/fmda/dicts/test_CA_202401.pkl", 
    dest_path = "fmda_nw_202401-05_f05.pkl")

In [None]:
repro_file = "data/reproducibility_dict_v2_TEST.pkl"
file_names=['fmda_nw_202401-05_f05.pkl']
file_dir='data'
file_paths = [osp.join(file_dir,file_name) for file_name in file_names]

In [None]:
# read/write control
train_file='train.pkl'
train_create=False   # if false, read
train_write=False
train_read=True

In [None]:
repro = read_pkl(repro_file)

if train_create:
    logging.info('creating the training cases from files %s',file_paths)
    # osp.join works on windows too, joins paths using \ or /
    train = pkl2train(file_paths)
if train_write:
    with open(train_file, 'wb') as file:
        logging.info('Writing the rain cases into file %s',train_file)
        pickle.dump(train, file)
if train_read:
    logging.info('Reading the train cases from file %s',train_file)
    train = read_pkl(train_file)

In [None]:
params_all = read_yml("params.yaml")
print(params_all.keys())

In [None]:
# from module_param_sets import param_sets

In [None]:
param_sets_keys=['rnn']
cases=list(train.keys())
# cases=list(train.keys())
# cases.remove('reproducibility')
train_cases = cases[0:100]

In [None]:
print(f"Number of Training Locations: {len(train_cases)}")

## Separate Models by Location

In [None]:
# Set up output dictionaries
outputs_const = {}
outputs_exp = {}

In [None]:
params = RNNParams(params_all['rnn'])
print("~"*80)
print("Running with params:")
print(params)
params.update({
    'activation': ['relu', 'relu'],
    'epochs': 20,
    'rnn_layers' : 2,
    'rnn_units' : 30,
    'dense_units': 20,
    'rnn_layers': 2       
})
for case in train_cases:
    print("~"*50)
    logging.info('Processing case %s',case)
    print_dict_summary(train[case])
    # Format data & Run Model
    # rnn_dat = create_rnn_data2(train[case], params)
    rnn_dat = RNNData(train[case], scaler = params['scaler'], features_list = params['features_list'])
    rnn_dat.train_test_split(
        time_fracs = [.9, .05, .05]
    )
    rnn_dat.scale_data()
    rnn_dat.batch_reshape(timesteps = params['timesteps'], batch_size = params['batch_size'])
    
    params.update({'batch_schedule_type':'constant', 'bmin':20})
    reproducibility.set_seed()
    rnn = RNN(params)
    m, errs, best_ep = rnn.run_model(rnn_dat, return_epochs=True)
    # Get RMSE Prediction Error
    print(f"RMSE: {errs}")
    outputs_const[case] = {'case':case, 'errs': errs.copy(), 'epochs': best_ep}
    
    ###########
    params.update({'batch_schedule_type':'exp', 
                   'bmin':20, 'bmax': rnn_dat.hours})
    reproducibility.set_seed()
    rnn = RNN(params)
    m, errs, best_ep = rnn.run_model(rnn_dat, return_epochs=True)
    # Get RMSE Prediction Error
    print(f"RMSE: {errs}")
    outputs_exp[case] = {'case':case, 'errs': errs.copy(), 'epochs': best_ep}

## Compare

In [None]:
# Prepare lists to store the extracted values
cases = []
predictions = []
epochs = []

# Iterate through the dictionary to extract the needed values
for key, value in outputs_const.items():
    cases.append(value['case'])
    predictions.append(value['errs']['prediction'])
    epochs.append(value['epochs'])

# Create the DataFrame
df1 = pd.DataFrame({
    'case': cases,
    'prediction': predictions,
    'epochs' : epochs
})


In [None]:
# Prepare lists to store the extracted values
cases = []
predictions = []
epochs = []

# Iterate through the dictionary to extract the needed values
for key, value in outputs_exp.items():
    cases.append(value['case'])
    predictions.append(value['errs']['prediction'])
    epochs.append(value['epochs'])

# Create the DataFrame
df2 = pd.DataFrame({
    'case': cases,
    'prediction': predictions,
    'epochs' : epochs
})

In [None]:
df1.head()

In [None]:
df2.head()

In [None]:
df1.prediction.mean()

In [None]:
df2.prediction.mean()

In [None]:
df1.epochs.mean()

In [None]:
df2.epochs.mean()