# Imports

In [None]:
import importlib
import sys
import torch

sys.path.insert(0, '..')
sys.path.insert(1, '../..')
sys.path.insert(2, '../../..')
sys.path.insert(3, '../../../..')

# Data

### Load Data Files

In [None]:
# Path to your pickle file (saved with torch.save)
file_path_train = '../../../../encoded_data/PCR_5_train.pkl'
# Load the dataset using torch.load
PCR_train_dataset = torch.load(file_path_train)
# Check the type of the loaded dataset
print(type(PCR_train_dataset))  # Should output something like <class 'torch.utils.data.dataset.TensorDataset'>

# Path to your pickle file (saved with torch.save)
file_path_val = '../../../../encoded_data/PCR_5_val.pkl'
# Load the dataset using torch.load
PCR_val_dataset = torch.load(file_path_val)
# Check the type of the loaded dataset
print(type(PCR_val_dataset))  # Should output something like <class 'torch.utils.data.dataset.TensorDataset'>

### Train Data Insights

In [None]:
# PCR Dataset Categories, Features:

PCR_all_categories = PCR_train_dataset.all_categories

PCR_all_categories_cat = PCR_all_categories[0]
print(PCR_all_categories_cat)

PCR_all_categories_num = PCR_all_categories[1]
print(PCR_all_categories_num)

for i, cat in enumerate(PCR_all_categories_cat):
     print(f"PCR (5) Categorical feature: {cat[0]}, Index position in categorical data list: {i}")
     print(f"PCR (5) Total Amount of Category labels: {cat[1]}")
print('\n')    
for i, num in enumerate(PCR_all_categories_num):
     print(f"PCR (5) Numerical feature: {num[0]}, Index position in categorical data list: {i}")
     print(f"PCR (5) Amount Category Lables: {num[1]}")

### Input Features for Encoder and Decoder

In [None]:
# Create lists with name of Encoder features (input) and decoder features (input & output)

# Encoder features:
enc_feat_cat = []
enc_feat_num = []
for cat in PCR_all_categories_cat:
    enc_feat_cat.append(cat[0])
for num in PCR_all_categories_num:
    enc_feat_num.append(num[0])
enc_feat = [enc_feat_cat, enc_feat_num]
print("Input features encoder: ", enc_feat)

# Decoder features:
dec_feat_cat = enc_feat_cat
dec_feat_num = enc_feat_num
dec_feat = [dec_feat_cat, dec_feat_num]
print("Features decoder: ", dec_feat)

# Model

In [None]:
import model.dropout_uncertainty_enc_dec_LSTM.dropout_uncertainty_model
importlib.reload(model.dropout_uncertainty_enc_dec_LSTM.dropout_uncertainty_model)
from model.dropout_uncertainty_enc_dec_LSTM.dropout_uncertainty_model import DropoutUncertaintyEncoderDecoderLSTM

# Prediction decoder output sequence length
seq_len_pred = 4

# Size hidden layer
hidden_size = 128

# Number of cells
num_layers = 2

# Fixed Dropout probability 
dropout = 0.1

# Encoder Decoder model initialization
model = DropoutUncertaintyEncoderDecoderLSTM(data_set_categories=PCR_all_categories,
                                             enc_feat=enc_feat,
                                             dec_feat=dec_feat,
                                             seq_len_pred=seq_len_pred,
                                             hidden_size=hidden_size,
                                             num_layers=num_layers,
                                             dropout=dropout)

# Loss Object Creation

In [None]:
import loss.losses
importlib.reload(loss.losses)
from loss.losses import Loss

loss_obj = Loss()

# Training Configuration

In [None]:
import trainer.trainer
importlib.reload(trainer.trainer)
from trainer.trainer import Trainer
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(comment="Full_PCR_no_grad")

# device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Start learning rate
learning_rate = 1e-4

# Optimizer and Scheduler
optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=0)

scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=2, min_lr=1e-10)

# Epochs
num_epochs = 100

# BATCHING does not work currntly with custom implementation
batch_size = 128

# lambda for L2 (weight, bias, dropout) regularization: According to formula: 1/2N
regularization_term = 1.0/(2.0*batch_size)

# shuffle data
shuffle = True

# Teacher forcing: Smaller 0.5 more target events are used for next event prediction.
teacher_forcing_ratio = 0.8

optimize_values = {"regularization_term":regularization_term,
                   "optimizer":optimizer,
                   "scheduler": scheduler,
                   "epochs":num_epochs,
                   "mini_batches":batch_size,
                   "shuffle": shuffle,
                   "teacher_forcing_ratio":teacher_forcing_ratio,}

suffix_data_split_value = 4

# GradNorm parameter
gradNorm = {"use_gradnorm":False}

trainer = Trainer(device=device,
                  model=model,
                  data_train=PCR_train_dataset,
                  data_val=PCR_val_dataset,
                  loss_obj=loss_obj,
                  optimize_values=optimize_values,
                  suffix_data_split_value=suffix_data_split_value,
                  writer=writer,
                  gradnorm_values=gradNorm,
                  save_model_n_th_epoch = 1,
                  saving_path = 'PCR_full_no_grad_norm_new_1.pkl')

# Train the model:
train_attenuated_losses, val_losses, val_attenuated_losses = trainer.train_model()

# Training Visualization

In [None]:
import matplotlib.pyplot as plt

# After training is finished, plot the loss curves
plt.plot(range(1, num_epochs+1), train_attenuated_losses, label='Training Attenuated Loss', color='blue')
plt.plot(range(1, num_epochs+1), val_losses, label='Validation Loss', color='orange')
plt.plot(range(1, num_epochs+1), val_attenuated_losses, label='Validation Attenuated Loss', color='green')
# Labeling x and y axes
plt.xlabel('Epochs', fontsize=12)
plt.ylabel('Loss', fontsize=12)
# Adding title
plt.title('Training and Validation Loss Curve', fontsize=14)
# Adding legend
plt.legend()
# Show the plot
plt.show()