In [None]:
import sys
import os
# Manually set the project root directory (adjust if needed)
project_root = os.path.abspath(os.path.join('..', '..'))  # Moves up one level to project root
# Add the project directory to sys.path
sys.path.append(project_root)

### Create/ Clean Model Weight Directory

In [None]:
from Training.Helper.weightFileCleaner import cleanWeightFiles
model_save_path = os.path.join('.')
cleanWeightFiles('RNN', dirPath=model_save_path, earlyStopped=False, verbose=True)

### Load Data

In [None]:
import pandas as pd

# **Define relative file path for training data**
train_file = os.path.join('..', '..', 'Data', 'Train', 'train1990s.csv')

# **Load Training Data with Automatic Column Detection**
train_df = pd.read_csv(train_file)
#print("Columns in dataset:", train_df.columns)  # Debugging: Show available columns

In [None]:
date_col = 'observation_date'
target_col = 'fred_PCEPI'

In [None]:
from Training.Helper.dataPreprocessing import sklearn_fit_transform
from sklearn.preprocessing import MinMaxScaler
 
# **Normalize Data**
# Perform min-max scaling on input data (no exogenous variables)
train_dataframes, scaler = sklearn_fit_transform(pd.DataFrame(train_df[target_col]), MinMaxScaler())
# Returns list of DataFrames, so extract correct DataFrame, then extract values, then reshape
train_series = train_dataframes[0].values.reshape(1, -1)[0]

In [None]:
from Training.Helper.dataPreprocessing import create_sequences

# **Set Sequence Length**
# Create sequences from the training series
sequence_length = 12
X, y = create_sequences(train_series, train_series, sequence_length)

In [None]:
from sklearn.model_selection import train_test_split

# **Train-Validation Split (80% Train, 20% Validation)**
X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=0.8, test_size=0.2, shuffle=False)

In [None]:
from Training.Helper.dataPreprocessing import add_dimension

# Reshape data as expected
X_train, y_train, X_val, y_val = [add_dimension(dataset) for dataset in [X_train, y_train, X_val, y_val]]

In [None]:
from Training.Helper.dataPreprocessing import prepare_dataloader

# **Create DataLoaders**
batch_size = 32
train_loader = prepare_dataloader(X_train, y_train, batch_size=batch_size, shuffle=False)
val_loader = prepare_dataloader(X_val, y_val, batch_size=batch_size, shuffle=False)

### Train Model

In [None]:
# Now import the RNN model
from Models.RNN import RNNModel
from Training.Helper.PyTorchModular import train_model
import torch
import torch.optim as optim
import torch.nn as nn


# **Train Model Using Modular Functions**
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rnn_model = RNNModel(input_size=1, hidden_size=64, num_layers=2).to(device)

# **Define Loss Function and Optimizer**
criterion = nn.MSELoss()
optimizer = optim.Adam(rnn_model.parameters(), lr=0.001)

# **Train the Model Using Modular Functions**
train_data = train_model(
    model=rnn_model,
    maxEpochs=50,
    modelSavePath=model_save_path,
    modelName="RNN",
    dataLoaderTrain=train_loader,
    dataLoaderValid=val_loader,
    lossFn=criterion,
    optimizer=optimizer,
    device=device,
    batchStatusUpdate=10,
    verbose=True
)

### Evaluate

In [None]:
from Training.Helper.PyTorchModular import train_model, loss_curve

# **Plot Training vs. Validation Loss**
loss_curve(trainLoss=train_data["trainLoss"], validLoss=train_data["validLoss"], title="RNN Training vs. Validation Loss")

In [None]:
from Evaluation.Helper.evaluation_helpers import get_best_model_path, evaluate_model

# Get path of the best model
best_model_path = get_best_model_path(model_save_path, 'RNN')

# **Extract the dates corresponding to the validation predictions**
val_dates = train_df[date_col].iloc[len(X_train) + sequence_length:].values

# Plot evaluation plot of the best model (loaded from path above) and get metrics
eval_axes, metrics = evaluate_model(rnn_model, val_loader, scaler, val_dates, device, savepath=best_model_path, print_dates=10)

In [None]:
# **Display metrics for Validation Predictions**
print("Metrics for RNN model:")
display(metrics)

In [None]:
# Example usage of weight file cleaner, uncomment if needed

#from Training.Helper.weightFileCleaner import cleanWeightFiles
#cleanWeightFiles('RNN', earlyStopped=True, verbose=True)