In [1]:
import os
import sys
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import copy
import argparse
import os
import matplotlib.pyplot as plt

src_path = os.path.abspath(os.path.join(os.getcwd(), "../experiments/src"))

sys.path.append(src_path)

from dtw_measure import dtw_measure

from utils import set_seed, get_torch_device, load_config, \
    count_parameters, apply_glorot_xavier, inspect_gradient_norms

from preprocessing import get_k_fold, load_data, get_torch_data
from preprocessing import StandardScaler

from postprocessing import save_gradient_norms_plot, save_predictions_and_true_values_plot, \
    save_predictions_detail_plot, save_scatter_predictions_and_true_values, \
    get_dst_rmse, get_detail_properties, get_dtw_measures


In [2]:

def apply_smoothing(batch_x, augumentation_rate, smoothing_window=5):

    batch_size, sequence, features = batch_x.shape
    num_samples_to_smooth = int(batch_size * augumentation_rate)

    indices_to_smooth = np.random.choice(batch_size, num_samples_to_smooth, replace=False)
    
    smoothed_batch_x = batch_x.clone()
    for idx in indices_to_smooth:
        df = pd.DataFrame(smoothed_batch_x[idx].cpu().numpy())
        smoothed_data = df.rolling(window=smoothing_window, min_periods=1).mean()
        smoothed_batch_x[idx] = torch.tensor(smoothed_data.values, device=batch_x.device, dtype=torch.float32)

    return smoothed_batch_x


def train_model(model, train_loader, optimizer, criterion, device, AUGUMENTATION_RATE):
    model.train()
    total_loss = 0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device) , targets.to(device)
        optimizer.zero_grad()
        inputs = apply_smoothing(inputs, AUGUMENTATION_RATE)
        outputs = model(inputs)
        targets = targets.squeeze(-1)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    mean_loss = total_loss / len(train_loader)
    return mean_loss

def validate_model(model, val_test_loader, criterion, device):
    model.eval()
    total_loss = 0
    all_outputs = []
    
    with torch.no_grad():
        for inputs, targets in val_test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            all_outputs.append(outputs.squeeze(-1))
            targets = targets.squeeze(-1)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    
    mean_loss = total_loss / len(val_test_loader)
    all_outputs = torch.cat(all_outputs, dim=0)
    return mean_loss, all_outputs


class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_gru_layers, dropout):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_gru_layers = num_gru_layers

        self.gru = nn.GRU(input_size, hidden_size, num_gru_layers, batch_first=True, bidirectional=False, dropout=dropout)

        self.fc1 = nn.Linear(hidden_size, output_size)

        self.relu = nn.ReLU()

    def forward(self, x):

        h0 = torch.zeros(self.num_gru_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)

        out = self.fc1(out[:, -1, :])

        return out

In [None]:
########################################
#PART 1: EXPERIMENT CONFIGURATION SETUP
########################################

config_file_name = "conf_gru_ie_1_1_1.yaml"
seed_input = 42
device_input = "cpu"

# we extract all hyperparameters settings from .yamls, it was handy when we were finetuning hyperparameters
config = load_config(f"configs/conf_gru_ie/{config_file_name}")
EXPERIMENT_NAME = config["logging"]["experiment_name"]
# uncomment this line to save also seed you running
#EXPERIMENT_NAME = EXPERIMENT_NAME + "__" + seed_input
EXPERIMENT_NOTES = config["logging"]["notes"]

set_seed(seed_input)
device = get_torch_device(device_input)

BATCH_SIZE = config["training"]["batch_size"]
LEARNING_RATE = config["training"]["learning_rate"]
NUM_EPOCHS = config["training"]["num_epochs"]
WEIGHT_DECAY = config["training"]["weight_decay"]
AUGUMENTATION_RATE = config["training"]["augumentation_rate"]

INPUT_SIZE = config["model"]["input_size"]
HIDDEN_CHANNELS = config["model"]["hidden_channels"]
OUTPUT_SIZE = config["model"]["output_size"]
NUM_GRU_LAYERS = config["model"]["num_gru_layers"]
DROPOUT = config["model"]["dropout"]

TIME_STEPS = config["data"]["time_steps"]
PREDICTION_WINDOW = config["data"]["prediction_window"]
K_FOLD = config["data"]["k_fold"]

In [None]:
###########################
#PART 2: PREPARING DATA
###########################

file_ids_train, file_ids_val, file_ids_test = get_k_fold(K_FOLD)

train_X_unscaled, train_y_unscaled = load_data(file_ids_train, time_steps=TIME_STEPS, sliding_window=PREDICTION_WINDOW)
val_X_unscaled, val_y_unscaled = load_data(file_ids_val, time_steps=TIME_STEPS, sliding_window=PREDICTION_WINDOW)
test_X_unscaled, test_y_unscaled = load_data(file_ids_test, time_steps=TIME_STEPS, sliding_window=PREDICTION_WINDOW)

#droping Q1-4 data (datetime colmn was dropped in load_data()) 
train_X_unscaled = train_X_unscaled[:, :, 4:]
val_X_unscaled = val_X_unscaled[:, :, 4:]
test_X_unscaled = test_X_unscaled[:, :, 4:]


standard_scaler = StandardScaler(train_X_unscaled, train_y_unscaled)

train_X = standard_scaler.standardize_X(train_X_unscaled)
val_X = standard_scaler.standardize_X(val_X_unscaled)
test_X =standard_scaler.standardize_X(test_X_unscaled)

train_y = standard_scaler.standardize_y(train_y_unscaled)
val_y = standard_scaler.standardize_y(val_y_unscaled)
test_y = standard_scaler.standardize_y(test_y_unscaled)

train_X, train_y = get_torch_data(train_X, train_y)
val_X, val_y = get_torch_data(val_X, val_y)
test_X, test_y = get_torch_data(test_X, test_y)


train_dataset = torch.utils.data.TensorDataset(train_X, train_y)
val_dataset = torch.utils.data.TensorDataset(val_X, val_y)
test_dataset = torch.utils.data.TensorDataset(test_X, test_y)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
###############################################
#PART 3: DEEP LEARNING PART
###############################################

model = GRUModel(input_size=INPUT_SIZE, hidden_size=HIDDEN_CHANNELS, output_size=OUTPUT_SIZE, num_gru_layers=NUM_GRU_LAYERS, dropout=DROPOUT)
model.to(device)
apply_glorot_xavier(model)

optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
criterion = nn.MSELoss()

print(model)

total_params = count_parameters(model)
print(f"Total parameters of the model: {total_params}")

In [None]:
############################
#PART 4: TRAINING LOOP
############################

print(f"--------------TRAINING LOOP--------------")

losses = []
val_losses = []
gradient_norms = []

best_val = 10000.0
best_model_state = None

for epoch in range(NUM_EPOCHS):
    train_loss = train_model(model, train_loader, optimizer, criterion, device, AUGUMENTATION_RATE)
    val_loss, _ = validate_model(model, val_loader, criterion, device)

    losses.append(train_loss)
    val_losses.append(val_loss)

    #==============grad norms============
    total_norm = inspect_gradient_norms(model)
    gradient_norms.append(total_norm)
    #==========================

    print(f'{epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}')

    if val_loss < best_val:
        best_val = val_loss
        best_model_state = copy.deepcopy(model.state_dict())
        print(f"model with val loss {val_loss} saved...")

print('Training completed saving....')
#torch.save(best_model_state, f'../models/try_run_{EXPERIMENT_NAME}.pth')


In [None]:
############################
#PART 4: MODEL EVALUATION
############################

model.load_state_dict(best_model_state)


test_loss, test_predictions_standardized = validate_model(model, test_loader, criterion, device)
test_predictions_standardized = test_predictions_standardized.cpu()
test_predictions = (test_predictions_standardized * standard_scaler.y_std) + standard_scaler.y_mean
print(test_predictions_standardized.shape)
test_predictions = test_predictions.numpy().tolist()
print(f"avg. test loss {test_loss}")

In [None]:
y_true_list = test_y_unscaled.tolist()

plt.figure(figsize=(20, 8))
plt.plot(y_true_list, label="True values", linewidth=0.5, color="green", marker='o', markersize=3)
plt.plot(test_predictions, label="Prediction", linewidth=0.5, color="orange", marker='o', markersize=3)

plt.legend()
plt.ylabel('Dst Value [nT]')
plt.title("Detail of test prediction")
plt.grid(axis='x', alpha=0.5, linestyle="--")
plt.show()