# v2.1 exploration trying to make it work better

In [None]:
# Environment
import os
import os.path as osp
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
# Local modules
sys.path.append('..')
import reproducibility
import pandas as pd
from utils import print_dict_summary
from data_funcs import rmse, build_train_dict, combine_nested, subset_by_features
from moisture_rnn import RNNParams, RNNData, RNN, RNN_LSTM, rnn_data_wrap
from moisture_rnn_pkl import pkl2train
from tensorflow.keras.callbacks import Callback
from utils import hash2
import copy
import logging
import pickle
from utils import logging_setup, read_yml, read_pkl, hash_ndarray, hash_weights, str2time
import yaml
import copy
import time

In [None]:
logging_setup()

## Test Data

In [None]:
file_paths = ['data/fmda_rocky_202403-05_f05.pkl']

In [None]:
# Params used for data filtering
params_data = read_yml("params_data.yaml") 
params_data

In [None]:
params = read_yml("params.yaml", subkey='rnn') 
params = RNNParams(params)
params.update({'epochs': 200, 
               'learning_rate': 0.001,
               'activation': ['tanh', 'tanh'], # Activation for RNN Layers, Dense layers respectively.
               'recurrent_layers': 2, 'recurrent_units': 30, 
               'dense_layers': 2, 'dense_units': 30,
               'early_stopping_patience': 30, # how many epochs of no validation accuracy gain to wait before stopping
               'batch_schedule_type': 'exp', # Hidden state batch reset schedule
               'bmin': 20, # Lower bound of hidden state batch reset, 
               'bmax': params_data['hours'], # Upper bound of hidden state batch reset, using max hours
               'features_list': ['Ed', 'Ew', 'rain', 'elev', 'lon', 'lat', 'solar', 'wind'],
               'timesteps': 12
              })

In [None]:
dat = read_pkl(file_paths[0])

In [None]:
import importlib
import data_funcs
importlib.reload(data_funcs)
from data_funcs import build_train_dict

In [None]:
params_data.update({'hours': None})

In [None]:
train3 = build_train_dict(file_paths, params_data, spatial=False, forecast_step=3, drop_na=True)


## Test Other ML

In [None]:
params = read_yml("params.yaml", subkey='xgb')
params

In [None]:
dat = read_pkl("data/train.pkl")

In [None]:
cases = [*dat.keys()]

In [None]:
rnn_dat = RNNData(dat[cases[10]], params['scaler'], params['features_list'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)
rnn_dat.scale_data()

In [None]:
from moisture_models import XGB, RF, LM

In [None]:
mod = XGB(params)

In [None]:
mod.params

In [None]:
mod.fit(rnn_dat.X_train, rnn_dat.y_train)

In [None]:
preds = mod.predict(rnn_dat.X_test)

In [None]:
rmse(preds, rnn_dat.y_test)

In [None]:
plt.plot(rnn_dat.y_test)
plt.plot(preds)

In [None]:
params = read_yml("params.yaml", subkey='rf')
rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)

In [None]:
import importlib
import moisture_models
importlib.reload(moisture_models)

In [None]:
params

In [None]:
mod2 = RF(params)
mod2.fit(rnn_dat.X_train, rnn_dat.y_train.flatten())
preds2 = mod2.predict(rnn_dat.X_test)
print(rmse(preds2, rnn_dat.y_test.flatten()))
plt.plot(rnn_dat.y_test)
plt.plot(preds2)

In [None]:
from moisture_models import RF
mod2 = RF(params)

In [None]:
params = read_yml("params.yaml", subkey='lm')
rnn_dat = RNNData(dat[cases[10]], features_list = ['Ed', 'Ew', 'solar', 'wind', 'rain'])
rnn_dat.train_test_split(
    time_fracs = [.8, .1, .1]
)
mod = LM(params)

In [None]:
mod.fit(rnn_dat.X_train, rnn_dat.y_train)
preds = mod.predict(rnn_dat.X_test)
print(rmse(preds2, rnn_dat.y_test.flatten()))

## Class RNN 

In [None]:
params_test = {
    'n_features': 3,
    'timesteps': 12,
    'batch_size': 32,
    'hidden_layers': ['LSTM', 'attention', 'dense'],
    'hidden_units': [32, None, 32],
    'hidden_activation': ['tanh', None, 'relu'],
    'dropout': 0.2,
    'recurrent_dropout': 0.2,
    'output_layer': 'dense',
    'output_activation': 'linear',
    'output_dimension': 1
}

In [None]:
params_test

In [None]:
def build_model_from_params(params):
    # Define the input layer with the specified batch size, timesteps, and features
    inputs = tf.keras.Input(batch_shape=(params['batch_size'], params['timesteps'], params['n_features']))
    x = inputs

    # Loop over each layer specified in 'hidden_layers'
    for i, layer_type in enumerate(params['hidden_layers']):
        units = params['hidden_units'][i]
        activation = params['hidden_activation'][i]

        if layer_type == 'dense':
            x = layers.Dense(units=units, activation=activation)(x)

        elif layer_type == 'dropout':
            x = layers.Dropout(params['dropout'])(x)
        
        elif layer_type == 'rnn':
            x = layers.SimpleRNN(units=units, activation=activation, dropout=params['dropout'], recurrent_dropout=params['recurrent_dropout'],
                                 return_sequences=True, stateful=True)(x)
        
        elif layer_type == 'lstm':
            x = layers.LSTM(units=units, activation=activation, dropout=params['dropout'], recurrent_dropout=params['recurrent_dropout'],
                            return_sequences=True, stateful=True)(x)    
        
        elif layer_type == 'attention':
            # Self-attention mechanism
            x = layers.Attention()([x, x])

    # Add the output layer
    if params['output_layer'] == 'dense':
        outputs = layers.Dense(units=params['output_dimension'], activation=params['output_activation'])(x)
    else:
        raise ValueError("Unsupported output layer type: {}".format(params['output_layer']))
    
    # Create the model
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
params_test

In [None]:
model = build_model_from_params(params_test)
model.summary()

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)  # Example optimizer
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=[])

In [None]:
model.fit(rnn_dat.X_train, rnn_dat.y_train, validation_data = (rnn_dat.X_val, rnn_dat.y_val), epochs=2)

In [None]:
def build_prediction_model_from_params(params):
    # Define the input layer with flexible batch size and sequence length
    inputs = tf.keras.Input(shape=(None, params['n_features']))
    x = inputs

    # Loop over each layer specified in 'hidden_layers'
    for i, layer_type in enumerate(params['hidden_layers']):
        units = params['hidden_units'][i]
        activation = params['hidden_activation'][i]

        if layer_type == 'dense':
            x = layers.Dense(units=units, activation=activation)(x)

        elif layer_type == 'rnn':
            x = layers.SimpleRNN(units=units, activation=activation, return_sequences=True, stateful=False)(x)
        
        elif layer_type == 'lstm':
            x = layers.LSTM(units=units, activation=activation, return_sequences=True, stateful=False)(x)
        
        elif layer_type == 'attention':
            # Self-attention mechanism
            x = layers.Attention()([x, x])

    # Add the output layer
    if params['output_layer'] == 'dense':
        outputs = layers.Dense(units=params['output_dimension'], activation=params['output_activation'])(x)
    else:
        raise ValueError("Unsupported output layer type: {}".format(params['output_layer']))
    
    # Create the prediction model
    model = models.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
# Example usage with params_test
prediction_model = build_prediction_model_from_params(params_test)
prediction_model.summary()

In [None]:
prediction_model.set_weights(model.get_weights())
prediction_model.compile(optimizer=optimizer, loss='mean_squared_error')

In [None]:
preds = prediction_model.predict(rnn_dat.X_test)

In [None]:
preds.shape

In [None]:
squared_diff = np.square(preds - rnn_dat.y_test)
mse = np.mean(squared_diff, axis=(1, 2))
errs = np.sqrt(mse)
errs.mean()

In [None]:
class RNN2():
    """
    TEST
    """
    def __init__(self, params: dict):
        """
        Initializes the RNNModel with the given parameters.

        Parameters:
        -----------
        params : dict
            A dictionary containing model parameters.
        """
        self.params = params
        # Build model architectures based on input params
        self.model_train = self._build_model_train()
        self.model_predict = self._build_model_predict()
        # Compile Models
        optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
        self.model_train.compile(loss='mean_squared_error', optimizer=optimizer)
        self.model_predict.compile(loss='mean_squared_error', optimizer=optimizer)

    def _build_model_train(self):
        params = self.params
        
        # Define the input layer with the specified batch size, timesteps, and features
        inputs = tf.keras.Input(batch_shape=(params['batch_size'], params['timesteps'], params['n_features']))
        x = inputs
    
        # Loop over each layer specified in 'hidden_layers'
        for i, layer_type in enumerate(params['hidden_layers']):
            units = params['hidden_units'][i]
            activation = params['hidden_activation'][i]
    
            if layer_type == 'dense':
                x = layers.Dense(units=units, activation=activation)(x)
    
            elif layer_type == 'dropout':
                x = layers.Dropout(params['dropout'])(x)
            
            elif layer_type == 'rnn':
                x = layers.SimpleRNN(units=units, activation=activation, dropout=params['dropout'], recurrent_dropout=params['recurrent_dropout'],
                                     return_sequences=True, stateful=True)(x)
            
            elif layer_type == 'lstm':
                x = layers.LSTM(units=units, activation=activation, dropout=params['dropout'], recurrent_dropout=params['recurrent_dropout'],
                                return_sequences=True, stateful=True)(x)    
            
            elif layer_type == 'attention':
                # Self-attention mechanism
                x = layers.Attention()([x, x])
    
        # Add the output layer
        if params['output_layer'] == 'dense':
            outputs = layers.Dense(units=params['output_dimension'], activation=params['output_activation'])(x)
        else:
            raise ValueError("Unsupported output layer type: {}".format(params['output_layer']))
        
        # Create the model
        model = models.Model(inputs=inputs, outputs=outputs)
        return model

    def _build_model_predict(self, return_sequences=True):
        params = self.params
        
        # Define the input layer with flexible batch size and sequence length
        inputs = tf.keras.Input(shape=(None, params['n_features']))
        x = inputs
    
        # Loop over each layer specified in 'hidden_layers'
        for i, layer_type in enumerate(params['hidden_layers']):
            units = params['hidden_units'][i]
            activation = params['hidden_activation'][i]
            
            if layer_type == 'dense':
                x = layers.Dense(units=units, activation=activation)(x)
    
            elif layer_type == 'rnn':
                x = layers.SimpleRNN(units=units, activation=activation, return_sequences=True, stateful=False)(x)
            
            elif layer_type == 'lstm':
                x = layers.LSTM(units=units, activation=activation, return_sequences=True, stateful=False)(x)
            
            elif layer_type == 'attention':
                # Self-attention mechanism
                x = layers.Attention()([x, x])
    
        # Add the output layer
        if params['output_layer'] == 'dense':
            outputs = layers.Dense(units=params['output_dimension'], activation=params['output_activation'])(x)
        else:
            raise ValueError("Unsupported output layer type: {}".format(params['output_layer']))
        
        # Create the prediction model
        model = models.Model(inputs=inputs, outputs=outputs)
        return model

    def is_stateful(self):
        """
        Checks whether any of the layers in the internal model (self.model_train) are stateful.

        Returns:
        bool: True if at least one layer in the model is stateful, False otherwise.
        
        This method iterates over all the layers in the model and checks if any of them
        have the 'stateful' attribute set to True. This is useful for determining if 
        the model is designed to maintain state across batches during training.

        Example:
        --------
        model.is_stateful()
        """          
        for layer in self.model_train.layers:
            if hasattr(layer, 'stateful') and layer.stateful:
                return True
        return False

    def plot_history(self, history, plot_title, create_figure=True):
        """
        Plots the training history. Uses log scale on y axis for readability.

        Parameters:
        -----------
        history : History object
            The training history object from model fitting. Output of keras' .fit command
        plot_title : str
            The title for the plot.
        """
        
        if create_figure:
            plt.figure(figsize=(10, 6))
        plt.semilogy(history.history['loss'], label='Training loss')
        if 'val_loss' in history.history:
            plt.semilogy(history.history['val_loss'], label='Validation loss')
        plt.title(f'{plot_title} Model loss')
        plt.ylabel('Loss')
        plt.xlabel('Epoch')
        plt.legend(loc='upper left')
        plt.show()


    def fit(self, X_train, y_train, verbose_fit = False, verbose_weights=False, 
            plot_history=True, plot_title = '', 
            weights=None, callbacks=[], validation_data=None, return_epochs=False, *args, **kwargs):
        """
        Trains the model on the provided training data. Uses the fit method of the training model and then copies the weights over to the prediction model, which has a less restrictive input shape. Formats a list of callbacks to use within the fit method based on params input

        Parameters:
        -----------
        X_train : np.ndarray
            The input matrix data for training.
        y_train : np.ndarray
            The target vector data for training.
        plot_history : bool, optional
            If True, plots the training history. Default is True.
        plot_title : str, optional
            The title for the training plot. Default is an empty string.
        weights : optional
            Initial weights for the model. Default is None.
        callbacks : list, optional
            A list of callback functions to use during training. Default is an empty list.
        validation_data : tuple, optional
            Validation data to use during training, expected format (X_val, y_val). Default is None.
        return_epochs : bool
            If True, return the number of epochs that training took. Used to test and optimize early stopping
        """        
        # Check Compatibility, assume features dimension is last in X_train array
        if X_train.shape[-1] != self.params['n_features']:
            raise ValueError(f"X_train and number of features from params not equal. \n X_train shape: {X_train.shape} \n params['n_features']: {self.params['n_features']}. \n Try recreating RNNData object with features list from params: `RNNData(..., features_list = parmas['features_list'])`")
        
        # # verbose_fit argument is for printing out update after each epoch, which gets very long
        # verbose_fit = self.params['verbose_fit'] 
        # verbose_weights = self.params['verbose_weights']
        if verbose_weights:
            print(f"Training simple RNN with params: {self.params}")
            
        # Setup callbacks
        # if self.params["reset_states"]:
        #     callbacks=callbacks+[ResetStatesCallback(self.params), TerminateOnNaN()]

        # Early stopping callback requires validation data
        if validation_data is not None:
            X_val, y_val = validation_data[0], validation_data[1]
            print("Using early stopping callback.")
            early_stop = EarlyStoppingCallback(patience = self.params['early_stopping_patience'])
            callbacks=callbacks+[early_stop]
        if verbose_weights:
            print(f"Formatted X_train hash: {hash_ndarray(X_train)}")
            print(f"Formatted y_train hash: {hash_ndarray(y_train)}")
            if validation_data is not None:
                print(f"Formatted X_val hash: {hash_ndarray(X_val)}")
                print(f"Formatted y_val hash: {hash_ndarray(y_val)}")
            print(f"Initial weights before training hash: {hash_weights(self.model_train)}")

        ## TODO: Hidden State Initialization
        # Evaluate Model once to set nonzero initial state
        # self.model_train(X_train[0:self.params['batch_size'],:,:])

        if validation_data is not None:
            history = self.model_train.fit(
                X_train, y_train, 
                epochs=self.params['epochs'], 
                batch_size=self.params['batch_size'],
                callbacks = callbacks,
                verbose=verbose_fit,
                validation_data = (X_val, y_val),
                *args, **kwargs
            )
        else:
            history = self.model_train.fit(
                X_train, y_train, 
                epochs=self.params['epochs'], 
                batch_size=self.params['batch_size'],
                callbacks = callbacks,
                verbose=verbose_fit,
                *args, **kwargs
            )
        
        if plot_history:
            self.plot_history(history,plot_title)
            
        if verbose_weights:
            print(f"Fitted Weights Hash: {hash_weights(self.model_train)}")

        # Update Weights for Prediction Model
        w_fitted = self.model_train.get_weights()
        self.model_predict.set_weights(w_fitted)

        if return_epochs:
            # Epoch counting starts at 0, adding 1 for the count
            return early_stop.best_epoch + 1

In [None]:
params_test = {
    'n_features': 3,
    'timesteps': 12,
    'batch_size': 32,
    'hidden_layers': ['dense', 'lstm', 'attention', 'dense'],
    'hidden_units': [64, 32, None, 32],
    'hidden_activation': ['relu', 'tanh', None, 'relu'],
    'dropout': 0.2,
    'recurrent_dropout': 0.2,
    'output_layer': 'dense',
    'output_activation': 'linear',
    'output_dimension': 1,
    'learning_rate': 0.001,
    'epochs': 10
}

In [None]:
params_test

In [None]:
mod = RNN2(params_test)

In [None]:
mod.model_train.summary()

In [None]:
mod.fit(rnn_dat.X_train, rnn_dat.y_train, verbose_fit = True)

In [None]:
hash_weights(mod.model_train)

In [None]:
hash_weights(mod.model_predict)

## RNN

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN

In [None]:
params = read_yml("params.yaml", subkey="lstm")
params = RNNParams(params)
params.update({
    'dense_layers': 2,
    'dense_units': 32
})

In [None]:
params.update({'batch_schedule_type': 'step'})

In [None]:
rnn_dat = rnn_data_wrap(combine_nested(train3), params)
reproducibility.set_seed(123)
rnn = RNN(params)

In [None]:
rnn.params

In [None]:
rnn.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, validation_data=(rnn_dat.X_val, rnn_dat.y_val), 
                    verbose=True, epochs=20)

In [None]:
rnn.model_predict.set_weights(rnn.model_train.get_weights())

In [None]:
rnn.model_predict.summary()

In [None]:
rnn_dat.X_test.shape

In [None]:
preds = rnn.model_predict.predict(rnn_dat.X_test)

In [None]:
preds.shape

In [None]:
rnn_dat.X_test.shape

In [None]:
squared_diff = np.square(preds - rnn_dat.y_test)
mse = np.mean(squared_diff, axis=(1, 2))
errs = np.sqrt(mse)
errs.mean()

## Phys Initialized

In [None]:
params.update({
    'epochs':100,
    'dense_layers': 0,
    'activation': ['relu', 'relu'],
    'phys_initialize': False,
    'dropout': [0,0],
    'space_fracs': [.8, .1, .1],
    'scaler': None
})

In [None]:
import importlib
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import rnn_data_wrap

In [None]:
params['scaler'] is None

In [None]:
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
reproducibility.set_seed()
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
rnn.model_train.summary()

In [None]:
errs.mean()

In [None]:
rnn_dat.X_train[:,:,0].mean()

In [None]:
rnn_dat['features_list']

## LSTM

In [None]:
import importlib 
import moisture_rnn
importlib.reload(moisture_rnn)
from moisture_rnn import RNN_LSTM

In [None]:
params = read_yml("params.yaml", subkey="lstm")
params = RNNParams(params)

In [None]:
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
params.update({
    'loc_batch_reset': rnn_dat.n_seqs # Used to reset hidden state when location changes for a given batch
})

In [None]:
from moisture_rnn import ResetStatesCallback, EarlyStoppingCallback
params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,
              'batch_schedule_type':'step', 'bmin': 10, 'bmax':rnn_dat.hours})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)

history = lstm.model_train.fit(rnn_dat.X_train, rnn_dat.y_train, 
                    batch_size = params['batch_size'], epochs=params['epochs'], 
                    callbacks = [ResetStatesCallback(params),
                                EarlyStoppingCallback(patience = 15)],
                   validation_data = (rnn_dat.X_val, rnn_dat.y_val))
              

In [None]:
params = RNNParams(read_yml("params.yaml", subkey="lstm"))
params.update({'epochs': 50, 'learning_rate': 0.001, 'verbose_fit': True, 'rnn_layers': 2, 'rnn_units': 20, 'dense_layers': 1, 'dense_units': 10,
              'activation': ['tanh', 'tanh'], 'features_list': rnn_dat.features_list,
              'batch_schedule_type':'step', 'bmin': 10, 'bmax':rnn_dat.hours})
rnn_dat = rnn_data_wrap(combine_nested(train3), params)
params.update({
    'loc_batch_reset': rnn_dat.n_seqs # Used to reset hidden state when location changes for a given batch
})
reproducibility.set_seed(123)
lstm = RNN_LSTM(params)
m, errs = lstm.run_model(rnn_dat)

In [None]:
errs.mean()

In [None]:
params = RNNParams(read_yml("params.yaml", subkey="rnn"))
rnn_dat = rnn_data_wrap(combine_nested(train3), params)

In [None]:
reproducibility.set_seed(123)
rnn = RNN(params)
m, errs = rnn.run_model(rnn_dat)

In [None]:
errs.mean()