# v2.1 exploration trying to make it work better

In [None]:
# Environment
import os
import os.path as osp
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
import pandas as pd
from utils import print_dict_summary
from data_funcs import rmse, build_train_dict, combine_nested, subset_by_features
from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM, rnn_data_wrap
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from utils import hash2
import copy
import logging
import pickle
from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights, str2time
import yaml
import copy
import time

In [None]:
logging_setup()

## Test Data

In [None]:
file_paths = ['data/fmda_rocky_202403-05_f05.pkl']

In [None]:
# Params used for data filtering
params_data = read_yml("params_data.yaml") 
params_data

In [None]:
params = read_yml("params.yaml", subkey='rnn') 
params = RNNParams(params)
params.update({'epochs': 200, 
               'learning_rate': 0.001,
               'activation': ['tanh', 'tanh'], # Activation for RNN Layers, Dense layers respectively.
               'recurrent_layers': 2, 'recurrent_units': 30, 
               'dense_layers': 2, 'dense_units': 30,
               'early_stopping_patience': 30, # how many epochs of no validation accuracy gain to wait before stopping
               'batch_schedule_type': 'exp', # Hidden state batch reset schedule
               'bmin': 20, # Lower bound of hidden state batch reset, 
               'bmax': params_data['hours'], # Upper bound of hidden state batch reset, using max hours
               'features_list': ['Ed', 'Ew', 'rain', 'elev', 'lon', 'lat', 'solar', 'wind'],
               'timesteps': 12
              })

In [None]:
dat = read_pkl(file_paths[0])

In [None]:
import importlib
import data_funcs
importlib.reload(data_funcs)
from data_funcs import build_train_dict

In [None]:
params_data.update({'hours': None})

In [None]:
start_time = time.time()

In [None]:
train3 = build_train_dict(file_paths, params_data, spatial=False, forecast_step=3, drop_na=True)


In [None]:
# End Timer
end_time = time.time()

# Calculate Code Runtime
elapsed_time_sp = end_time - start_time
print(f"Spatial Training Elapsed time: {elapsed_time_sp:.4f} seconds")

In [None]:
from data_funcs import build_features_single

In [None]:
dat['PLFI1_202401'].keys()

In [None]:
start_time = time.time()

for key in dat:
    build_features_single(dat[key], atm="HRRR", fstep="f03", fprev="f02")

# End Timer
end_time = time.time()

# Calculate Code Runtime
elapsed_time_sp = end_time - start_time
print(f"Spatial Training Elapsed time: {elapsed_time_sp:.4f} seconds")

In [None]:
from multiprocessing import Process, Queue

In [None]:
keys = list(dat.keys())

In [None]:
def process_key(key):
    build_features_single(dat[key], atm="HRRR", fstep="f03", fprev="f02")

In [None]:
from multiprocessing import Pool

In [None]:
if __name__ == '__main__':
    with Pool() as pool:
        pool.map(process_key, keys)

## Test Other ML

In [None]:
params = read_yml("params.yaml", subkey='xgb')
params

In [None]:
dat = read_pkl("data/train.pkl")

In [None]:
cases = [*dat.keys()]

In [None]:
rnn_dat = RNNData(dat[cases[10]], params['scaler'], params['features_list'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)
rnn_dat.scale_data()

In [None]:
from moisture_models import XGB, RF, LM

In [None]:
mod = XGB(params)

In [None]:
mod.params

In [None]:
mod.fit(rnn_dat.X_train, rnn_dat.y_train)

In [None]:
preds = mod.predict(rnn_dat.X_test)

In [None]:
rmse(preds, rnn_dat.y_test)

In [None]:
plt.plot(rnn_dat.y_test)
plt.plot(preds)

In [None]:
params = read_yml("params.yaml", subkey='rf')
rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)

In [None]:
import importlib
import moisture_models
importlib.reload(moisture_models)

In [None]:
params

In [None]:
mod2 = RF(params)
mod2.fit(rnn_dat.X_train, rnn_dat.y_train.flatten())
preds2 = mod2.predict(rnn_dat.X_test)
print(rmse(preds2, rnn_dat.y_test.flatten()))
plt.plot(rnn_dat.y_test)
plt.plot(preds2)

In [None]:
from moisture_models import RF
mod2 = RF(params)

In [None]:
params = read_yml("params.yaml", subkey='lm')
rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)
mod = LM(params)

In [None]:
mod.fit(rnn_dat.X_train, rnn_dat.y_train)
preds = mod.predict(rnn_dat.X_test)
print(rmse(preds2, rnn_dat.y_test.flatten()))

## RNN

## Phys Initialized

In [None]:
params.update({
    'epochs':100,
    'dense_layers': 0,
    'activation': ['relu', 'relu'],
    'phys_initialize': False,
    'dropout': [0,0],
    'space_fracs': [.8, .1, .1],
    'scaler': None
})

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import rnn_data_wrap

In [None]:
params['scaler'] is None

In [None]:
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
rnn.model_train.summary()

In [None]:
errs.mean()

In [None]:
rnn_dat.X_train[:,:,0].mean()

In [None]:
rnn_dat['features_list']

## LSTM

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
params = read_yml("params.yaml", subkey="lstm")
params = RNNParams(params)

In [None]:
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
params.update({
    'loc_batch_reset': rnn_dat.n_seqs # Used to reset hidden state when location changes for a given batch
})

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,
              'batch_schedule_type':'exp', 'bmin': 10, 'bmax':rnn_dat.hours})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)

history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(params),
                                EarlyStoppingCallback(patience = 15)],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              

In [None]:
errs.mean()

In [None]:
params = RNNParams(read_yml("params.yaml", subkey="lstm"))
params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,
              'batch_schedule_type':'exp', 'bmin': 10, 'bmax':rnn_dat.hours,
              'early_stopping_patience': 25})
rnn_dat = rnn_data_wrap(combine_nested(train3), params)
params.update({
    'loc_batch_reset': rnn_dat.n_seqs # Used to reset hidden state when location changes for a given batch
})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)
m, errs = lstm.run_model(rnn_dat)

In [None]:
errs.mean()

In [None]:
params = RNNParams(read_yml("params.yaml", subkey="rnn"))
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
errs.mean()