In [1]:
import os
import copy
import json
import yaml
import torch
import argparse
import pandas as pd
import torch.nn as nn
from helper import *


In [None]:
%cd MTL_Assignment1_MT23028

In [6]:

# torch.autograd.set_detect_anomaly(True)

config_file = 'Assignment1_config.yaml'
# Read Configuration file
with open(config_file, "r") as F: 
    configs = yaml.safe_load(F)



In [None]:

feature_set = json.load(open(f"data/{configs['Dataset']['name']}/features.json"))["feature_sets"][f"{configs['Dataset']['set']}"]

# Load Train Dataset
train_dataset = pd.read_parquet(f"data/{configs['Dataset']['name']}/train.parquet",columns = ["era", "target"] + feature_set)
# Reduce Dataset size
# train = pd.DataFrame(train_dataset[train_dataset["era"].isin(pd.Series(train_dataset["era"].unique()[::configs['Dataset'][configs['Dataset']['name']]['reduce_dataset_size']]))])
# train = pd.DataFrame(train_dataset[train_dataset["era"].isin(pd.Series(train_dataset["era"].unique()[-300:]))])
train = pd.DataFrame(train_dataset[train_dataset["era"].isin(pd.Series(train_dataset["era"]))])


In [None]:
# Save last train era information in config file
configs["Train"]["last_train_era"] = int(train["era"].unique()[-1])

In [None]:

class LSTMModel(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=64, num_layers=2, dropout=0.1):
        super(LSTMModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = hidden_dim

        # Linear layer to project input features to the LSTM model dimension (hidden_dim)
        self.feature_embedding = nn.Linear(input_dim, hidden_dim)

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout
        )

        # Output layer
        self.fc_out = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Project input features to LSTM model dimension
        x = self.feature_embedding(x)
        # Reshape input for LSTM (batch_first=True, so shape should be [batch_size, seq_len, hidden_dim])
        x = x.unsqueeze(1)  # [batch_size, input_dim] -> [batch_size, 1, hidden_dim]
        # LSTM forward pass
        x, (h_n, c_n) = self.lstm(x)  # x has shape [batch_size, seq_len, hidden_dim]
        # Get the output for the last time step (if sequence length = 1, we use the only time step)
        x = x[:, -1, :]  # [batch_size, hidden_dim]
        # Output layer to match target dimension
        x = torch.sigmoid(self.fc_out(x))  # Output between 0 and 1 for binary classification
        return x.squeeze()

In [None]:

model = LSTMModel(input_dim=42, output_dim=1)

In [None]:

maml = train_maml(net = model, data_loader_list=prepare_data_loader_list(configs),epochs=100,fast_adaptation_steps=5,inner_lr=1e-2,outer_lr=5e-3)

In [None]:
# Check if directory exist
if not os.path.isdir(f"saved_models/{configs['Experiment_Name']}"):
    os.makedirs(f"saved_models/{configs['Experiment_Name']}")

In [None]:
torch.save(maml.state_dict(),f"saved_models/{configs['Experiment_Name']}/{configs['Model']['name']}.pth")