In [2]:
import pandas as pd
from sklearn.utils import shuffle

X_test = pd.read_csv('X_test.csv')
X_train = pd.read_csv('X_train.csv')
X_val = pd.read_csv('X_val.csv')
y_train = pd.read_csv('y_train.csv')
y_val = pd.read_csv('y_val.csv')

In [4]:
X_train.head(50)

Unnamed: 0,Traffic_Volume,tavg,tmin,tmax,prcp,snow,wdir,wspd,pres,year,...,month_sin,month_cos,week_number_sin,week_number_cos,quarter_sin,quarter_cos,four_month_sin,four_month_cos,half_year_sin,half_year_cos
0,-0.277074,-0.101973,-0.381232,-0.134296,1.188318,-0.137643,1.603098,0.133181,-1.822248,2022,...,0.5,0.866025,6.432491e-16,1.0,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
1,-0.499842,-1.17281,-1.231675,-1.351007,-0.306376,-0.137643,-2.00976,0.485982,0.014492,2022,...,0.5,0.866025,6.432491e-16,1.0,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
2,-0.332017,-1.617308,-1.345759,-1.712218,-0.378236,-0.137643,1.018358,-0.774021,1.760901,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
3,-1.266043,-1.475877,-1.345759,-1.189412,-0.378236,-0.137643,-0.25554,0.737983,0.52637,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
4,-0.734597,-1.081889,-1.750238,-1.246446,-0.378236,-0.137643,0.579803,3.15719,-0.99421,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
5,-0.596741,-2.152726,-1.978405,-2.396618,-0.378236,-0.137643,0.893057,0.670783,0.511315,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
6,-0.729602,-2.425486,-2.382884,-2.339584,-0.378236,-0.137643,0.684221,-0.35402,1.685625,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
7,-0.084275,-1.890068,-1.750238,-1.293973,0.239762,-0.137643,-0.464375,0.737983,1.309243,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
8,-1.569726,-0.980867,-1.573926,-1.132379,1.087714,-0.137643,0.91394,1.157984,1.203857,2022,...,0.5,0.866025,0.1205367,0.992709,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0
9,-1.152162,-1.910272,-1.812465,-1.864307,-0.378236,-0.137643,0.934824,0.368382,2.61905,2022,...,0.5,0.866025,0.2393157,0.970942,1.0,6.123234000000001e-17,0.866025,-0.5,1.224647e-16,-1.0


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim

X_train_numeric = X_train.select_dtypes(include=[float, int])
X_val_numeric = X_val.select_dtypes(include=[float, int])

X_train_tensor = torch.tensor(X_train_numeric.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)  
X_val_tensor = torch.tensor(X_val_numeric.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)  

In [8]:
y_train_labels = y_train_tensor
y_val_labels = y_val_tensor

In [10]:
y_train_tensor.shape

torch.Size([705, 1])

In [12]:
train_combined = torch.cat((X_train_tensor, y_train_tensor), dim=1)
val_combined = torch.cat((X_val_tensor, y_val_tensor), dim=1)
full_dataset = torch.cat((train_combined, val_combined), dim=0)

In [14]:
from torch.utils.data import DataLoader, Dataset

class TimeSeriesDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        return self.sequences[idx], self.targets[idx]

In [80]:
def create_subsequences(full_data, seq_length):
    sequences = []
    targets = []

    X_data = full_data[:, :-1]  
    y_data = full_data[:, -1]  

    for i in range(seq_length, len(full_data)):
        X_seq = X_data[i - seq_length:i]
        y_seq = y_data[i] 
        sequences.append(X_seq)
        targets.append(y_seq)
        
    return torch.stack(sequences), torch.tensor(targets).long()

sequence_length = 4
X_sequences, y_sequences = create_subsequences(full_dataset, sequence_length)

print(f"X_sequences shape: {X_sequences.shape}")
print(f"y_sequences shape: {y_sequences.shape}")

X_sequences shape: torch.Size([878, 4, 35])
y_sequences shape: torch.Size([878])


In [82]:
full_dataset_df = pd.DataFrame(full_dataset.numpy())
full_dataset_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26,27,28,29,30,31,32,33,34,35
0,-0.277074,-0.101973,-0.381232,-0.134296,1.188318,-0.137643,1.603098,0.133181,-1.822248,2022.0,...,0.866025,6.432490e-16,1.000000,1.000000e+00,6.123234e-17,0.866025,-0.5,1.224647e-16,-1.0,0.0
1,-0.499842,-1.172810,-1.231675,-1.351007,-0.306376,-0.137643,-2.009760,0.485982,0.014492,2022.0,...,0.866025,6.432490e-16,1.000000,1.000000e+00,6.123234e-17,0.866025,-0.5,1.224647e-16,-1.0,0.0
2,-0.332017,-1.617308,-1.345759,-1.712218,-0.378236,-0.137643,1.018358,-0.774021,1.760901,2022.0,...,0.866025,1.205367e-01,0.992709,1.000000e+00,6.123234e-17,0.866025,-0.5,1.224647e-16,-1.0,0.0
3,-1.266043,-1.475877,-1.345759,-1.189412,-0.378236,-0.137643,-0.255540,0.737983,0.526370,2022.0,...,0.866025,1.205367e-01,0.992709,1.000000e+00,6.123234e-17,0.866025,-0.5,1.224647e-16,-1.0,0.0
4,-0.734597,-1.081890,-1.750238,-1.246446,-0.378236,-0.137643,0.579803,3.157190,-0.994210,2022.0,...,0.866025,1.205367e-01,0.992709,1.000000e+00,6.123234e-17,0.866025,-0.5,1.224647e-16,-1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,1.530042,0.726410,0.946289,0.550104,-0.335120,-0.137643,0.611129,1.275585,-1.566309,2024.0,...,-0.866025,4.647232e-01,-0.885456,1.224647e-16,-1.000000e+00,-0.866025,-0.5,1.224647e-16,-1.0,0.0
878,1.462113,0.675899,0.655894,0.654665,-0.306376,-0.137643,0.861731,0.133181,-0.361889,2024.0,...,-0.866025,4.647232e-01,-0.885456,1.224647e-16,-1.000000e+00,-0.866025,-0.5,1.224647e-16,-1.0,0.0
879,2.159386,0.473854,0.541810,0.502577,-0.378236,-0.137643,1.237636,0.183582,0.360763,2024.0,...,-0.866025,4.647232e-01,-0.885456,1.224647e-16,-1.000000e+00,-0.866025,-0.5,1.224647e-16,-1.0,0.0
880,2.273267,0.514263,0.313642,0.607138,-0.378236,-0.137643,-1.967993,-1.513224,0.752199,2024.0,...,-0.866025,4.647232e-01,-0.885456,1.224647e-16,-1.000000e+00,-0.866025,-0.5,1.224647e-16,-1.0,0.0


In [434]:
import torch
import random

def create_subsequences_with_duplication(full_data, seq_length, random_duplicate):
    sequences = []
    targets = []

    X_data = full_data[:, :-1]  
    y_data = full_data[:, -1]  

    for i in range(seq_length, len(full_data)):
        X_seq = X_data[i - seq_length:i]
        y_seq = y_data[i]
        
        sequences.append(X_seq)
        targets.append(y_seq)

        #had num_duplicates to see if duplicating data and bagging would help
        if y_seq == 1:       
            num_duplicates = 1
            for _ in range(num_duplicates):
                sequences.append(X_seq)
                targets.append(y_seq)
        elif y_seq == 2:
            num_duplicates = 1
            for _ in range(num_duplicates):
                sequences.append(X_seq)
                targets.append(y_seq)
        elif y_seq == 3:
            num_duplicates = 2
            for _ in range(num_duplicates):
                sequences.append(X_seq)
                targets.append(y_seq)
        elif y_seq == 4:
            num_duplicates = 2
            for _ in range(num_duplicates):
                sequences.append(X_seq)
                targets.append(y_seq)
        
        
    return torch.stack(sequences), torch.tensor(targets).long()

sequence_length = 3
random_duplicate = 4

X_sequences, y_sequences = create_subsequences_with_duplication(full_dataset, sequence_length, random_duplicate)

In [436]:
y_sequences.shape

torch.Size([913])

In [438]:
import torch
import random

def shuffle_sequences(X_sequences, y_sequences):
    combined = list(zip(X_sequences, y_sequences))
    random.shuffle(combined)
    X_shuffled, y_shuffled = zip(*combined)
    
    X_shuffled = torch.stack(X_shuffled)
    y_shuffled = torch.tensor(y_shuffled).long()
    
    return X_shuffled, y_shuffled

X_shuffled, y_shuffled = shuffle_sequences(X_sequences, y_sequences)

In [440]:
X_sequences, y_sequences = X_shuffled, y_shuffled

In [442]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

for X_batch, y_batch in train_loader:
    print(f"Train batch shapes: X: {X_batch.shape}, y: {y_batch.shape}")
    break

for X_batch, y_batch in val_loader:
    print(f"Validation batch shapes: X: {X_batch.shape}, y: {y_batch.shape}")
    break

Train batch shapes: X: torch.Size([16, 3, 35]), y: torch.Size([16])
Validation batch shapes: X: torch.Size([16, 5, 35]), y: torch.Size([16])


In [444]:
X_np = X_sequences.numpy()
y_np = y_sequences.numpy()

In [446]:
class_frequencies = np.array([0.9276, 0.0593, 0.0066, 0.0033, 0.0033])
inverse_frequencies = 1 / class_frequencies
alpha = inverse_frequencies / inverse_frequencies.sum()

print(alpha)

[0.00139011 0.02174472 0.19537303 0.39074607 0.39074607]


In [448]:
unique, counts = np.unique(y_np, return_counts=True)

percentages = counts / len(y_np) * 100

value_percentages = dict(zip(unique, percentages))

print("Percentage of each value in y_np:")
for value, percentage in value_percentages.items():
    print(f"Value {value}: {percentage:.2f}%")

print(f"\nTotal: {sum(percentages):.2f}%")

Percentage of each value in y_np:
Value 0: 92.77%
Value 1: 5.91%
Value 2: 0.66%
Value 3: 0.33%
Value 4: 0.33%

Total: 100.00%


In [450]:
X_np = X_sequences.numpy()
y_np = y_sequences.numpy()

X_train_tensor = torch.tensor(X_np, dtype=torch.float32)
y_train_tensor = torch.tensor(y_np, dtype=torch.long)

train_dataset = TimeSeriesDataset(X_train_tensor, y_train_tensor)

In [452]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GatedLinearUnit(nn.Module):
    def __init__(self, input_size, output_size):
        super(GatedLinearUnit, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
        self.gate = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x) * torch.sigmoid(self.gate(x))

class LayerNormLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional):
        super(LayerNormLSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size, hidden_size, num_layers=num_layers,
            batch_first=True, dropout=dropout, bidirectional=bidirectional
        )
        self.layer_norm = nn.LayerNorm(hidden_size * (2 if bidirectional else 1))

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        lstm_out = self.layer_norm(lstm_out)
        return lstm_out

class LayerDropLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional, layer_drop_prob=0.2):
        super(LayerDropLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers,
                            batch_first=True, dropout=dropout, bidirectional=bidirectional)
        self.layer_drop_prob = layer_drop_prob
        self.projection = nn.Linear(input_size, hidden_size * (2 if bidirectional else 1))

    def forward(self, x):
        if self.training and torch.rand(1).item() < self.layer_drop_prob:
            return self.projection(x)  #projecting input to match LSTM output size
        lstm_out, _ = self.lstm(x)
        return lstm_out

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, hidden_size, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim=hidden_size, num_heads=num_heads, batch_first=True)

    def forward(self, x):
        attn_output, _ = self.attention(x, x, x)
        return attn_output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.pos_embedding = nn.Embedding(max_len, d_model)

    def forward(self, x):
        seq_len = x.size(1)
        positions = torch.arange(0, seq_len, dtype=torch.long, device=x.device).unsqueeze(0)
        pos_enc = self.pos_embedding(positions)
        return x + pos_enc

class Mish(nn.Module):
    def forward(self, x):
        return x * torch.tanh(F.softplus(x))


class ResidualLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout, bidirectional):
        super(ResidualLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=num_layers,
                            batch_first=True, dropout=dropout, bidirectional=bidirectional)
        self.projection = nn.Linear(hidden_size * (2 if bidirectional else 1), hidden_size * (2 if bidirectional else 1))

    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        projected_out = self.projection(lstm_out)
        return projected_out



class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes, lstm_dropout=0.3, fcn_dropout=0.5, debug=False):
        super(LSTMModel, self).__init__()
        self.bidirectional = True
        self.num_directions = 2 if self.bidirectional else 1
        self.debug = debug
        
        self.num_heads = 8
        self.hidden_size = hidden_size // (self.num_directions * 3)
        
        if self.debug:
            print(f"Adjusted hidden_size: {self.hidden_size}")
        
        self.embedding_size = input_size

        self.positional_encoding = PositionalEncoding(self.embedding_size)

        self.lstm1 = LayerNormLSTM(
            self.embedding_size, self.hidden_size, num_layers=num_layers,
            dropout=lstm_dropout, bidirectional=self.bidirectional
        )
        self.lstm2 = ResidualLSTM(
            self.embedding_size, self.hidden_size, num_layers=num_layers,
            dropout=lstm_dropout, bidirectional=self.bidirectional
        )
        self.lstm3 = LayerDropLSTM(
            self.embedding_size, self.hidden_size, num_layers=num_layers,
            dropout=lstm_dropout, bidirectional=self.bidirectional
        )

        self.combined_lstm_size = self.hidden_size * self.num_directions * 3
        
        if self.debug:
            print(f"Combined LSTM size: {self.combined_lstm_size}")
            print(f"Number of heads: {self.num_heads}")
            print(f"Is combined_lstm_size divisible by num_heads? {self.combined_lstm_size % self.num_heads == 0}")

        self.transformer_layer = nn.TransformerEncoderLayer(
            d_model=self.combined_lstm_size,
            nhead=self.num_heads,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(
            self.transformer_layer, num_layers=3
        )

        self.attention = MultiHeadSelfAttention(self.combined_lstm_size, self.num_heads)
        self.glu1 = GatedLinearUnit(self.combined_lstm_size, self.combined_lstm_size // 2)
        self.glu2 = GatedLinearUnit(self.combined_lstm_size // 2, self.combined_lstm_size // 4)

        self.fc = nn.Linear(self.combined_lstm_size // 4, num_classes)

        self.layer_norm2 = nn.LayerNorm(self.combined_lstm_size // 2)
        self.layer_norm3 = nn.LayerNorm(self.combined_lstm_size // 4)

        self.dropout = nn.Dropout(p=fcn_dropout)

    def forward(self, x):
        x = self.positional_encoding(x)
        
        if self.debug:
            print(f"Shape after positional encoding: {x.shape}")

        lstm_out1 = self.lstm1(x)
        lstm_out2 = self.lstm2(x)
        lstm_out3 = self.lstm3(x)
        
        if self.debug:
            print(f"Shape of lstm_out1: {lstm_out1.shape}")
            print(f"Shape of lstm_out2: {lstm_out2.shape}")
            print(f"Shape of lstm_out3: {lstm_out3.shape}")

        lstm_out_concat = torch.cat((lstm_out1, lstm_out2, lstm_out3), dim=-1)
        
        if self.debug:
            print(f"Shape after concatenation: {lstm_out_concat.shape}")

        transformer_out = self.transformer_encoder(lstm_out_concat)
        transformer_out = lstm_out_concat + transformer_out  
        
        if self.debug:
            print(f"Shape after transformer: {transformer_out.shape}")

        attn_out = self.attention(transformer_out)
        attn_out = transformer_out + attn_out  
        
        if self.debug:
            print(f"Shape after attention: {attn_out.shape}")

        global_avg_pool = torch.mean(attn_out, dim=1)
        
        if self.debug:
            print(f"Shape after global average pooling: {global_avg_pool.shape}")

        out = self.glu1(global_avg_pool)
        out = self.layer_norm2(out)
        out = self.dropout(out)
        
        if self.debug:
            print(f"Shape after first GLU: {out.shape}")

        out = self.glu2(out)
        out = self.layer_norm3(out)
        out = self.dropout(out)
        
        if self.debug:
            print(f"Shape after second GLU: {out.shape}")

        out = self.fc(out)
        
        if self.debug:
            print(f"Final output shape: {out.shape}")

        return out

class FocalLoss(nn.Module):
    def __init__(self, alpha, gamma=2.5, reduction='mean', label_smoothing=0.1):
        super(FocalLoss, self).__init__()
        self.alpha = torch.tensor(alpha) 
        self.gamma = gamma
        self.reduction = reduction
        self.label_smoothing = label_smoothing

    def forward(self, inputs, targets):
        num_classes = inputs.size(1)
        smoothed_labels = F.one_hot(targets, num_classes=num_classes)
        smoothed_labels = smoothed_labels * (1 - self.label_smoothing) + self.label_smoothing / num_classes

        ce_loss = F.cross_entropy(inputs, targets, reduction='none', label_smoothing=self.label_smoothing)
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha[targets] * (1 - pt) ** self.gamma * ce_loss

        if self.reduction == 'mean':
            return focal_loss.mean()
        elif self.reduction == 'sum':
            return focal_loss.sum()
        else:
            return focal_loss

In [454]:
X_sequences.shape

torch.Size([913, 3, 35])

In [456]:
input_size = X_sequences.shape[2] #should match your X_sequences.shape[2] which is 35
hidden_size = 384  #should be divisible by (num_directions * 3 * num_heads)
num_layers = 4
num_classes = 5

print(f"Input size: {input_size}")
print(f"Initial hidden size: {hidden_size}")

model = LSTMModel(input_size, hidden_size, num_layers, num_classes, debug=True)

batch_size = 16
sequence_length = 3
dummy_input = torch.randn(batch_size, sequence_length, input_size)

output = model(dummy_input)

Input size: 35
Initial hidden size: 384
Adjusted hidden_size: 64
Combined LSTM size: 384
Number of heads: 8
Is combined_lstm_size divisible by num_heads? True
Shape after positional encoding: torch.Size([16, 3, 35])
Shape of lstm_out1: torch.Size([16, 3, 128])
Shape of lstm_out2: torch.Size([16, 3, 128])
Shape of lstm_out3: torch.Size([16, 3, 128])
Shape after concatenation: torch.Size([16, 3, 384])
Shape after transformer: torch.Size([16, 3, 384])
Shape after attention: torch.Size([16, 3, 384])
Shape after global average pooling: torch.Size([16, 384])
Shape after first GLU: torch.Size([16, 192])
Shape after second GLU: torch.Size([16, 96])
Final output shape: torch.Size([16, 5])


In [460]:
input_size = X_sequences.shape[2]
hidden_size = 384 #should be divisible by (num_directions * 3 * num_heads) 
num_layers = 2
num_epochs = 55
num_classes = 5
bagging = 5
batch_size = 16
early_stop_patience = 10
subset_size = 555
alpha_np = [0.70, 0.27, 0.01, 0.01, 0.01]

In [462]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LSTM):
        for name, param in m.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param.data)
            elif 'bias_ih' in name:
                nn.init.zeros_(param.data)
                #setting forget gate bias to 1
                n = param.size(0)
                start, end = n // 4, n // 2
                param.data[start:end].fill_(1.)
            elif 'bias_hh' in name:
                nn.init.zeros_(param.data)
    elif isinstance(m, nn.LayerNorm):
        nn.init.ones_(m.weight)
        nn.init.zeros_(m.bias)
    elif isinstance(m, nn.MultiheadAttention):
        if m.in_proj_weight is not None:
            nn.init.xavier_uniform_(m.in_proj_weight)
        if m.out_proj.weight is not None:
            nn.init.xavier_uniform_(m.out_proj.weight)
        if m.in_proj_bias is not None:
            nn.init.zeros_(m.in_proj_bias)
        if m.out_proj.bias is not None:
            nn.init.zeros_(m.out_proj.bias)

In [464]:
import random
from torch.utils.data import DataLoader, SubsetRandomSampler
import copy

models = []
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for bagging_idx in range(bagging):
    print(f"Training model {bagging_idx + 1}...")

    model = LSTMModel(input_size, hidden_size, num_layers, num_classes, debug=False)
    torch.manual_seed(bagging_idx)
    model.to(device)
    
    criterion = FocalLoss(alpha=alpha_np, gamma=2.5, label_smoothing=0.1)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.0001, weight_decay=1e-7)
    
    random.seed(bagging_idx * 55)
    indices = random.sample(range(len(train_dataset)), subset_size)
    subset_labels = [train_dataset[i][1].item() for i in indices]

    class_counts = Counter(subset_labels)
    total_samples = len(subset_labels)
    percentages = {cls: (count / total_samples) * 100 for cls, count in class_counts.items()}

    print("Class counts in the subset:")
    for cls in sorted(class_counts.keys()):
        print(f"Class {cls}: {class_counts[cls]} samples ({percentages[cls]:.2f}%)")


    train_sampler = SubsetRandomSampler(indices)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler)

    best_accuracy = 0
    best_model_state = None  

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        correct = 0
        total = 0

        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

            train_loss += loss.item() * X_batch.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()

        train_loss /= len(train_loader.dataset)
        train_accuracy = 100 * correct / total

        if train_accuracy > best_accuracy:
            best_accuracy = train_accuracy
            best_model_state = copy.deepcopy(model.state_dict())  

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.2f}%, Best Acc: {best_accuracy:.5f}%')

    models.append(best_model_state)
    print(f"Model {bagging_idx + 1} training complete with best accuracy: {best_accuracy:.2f}%\n")


Training model 1...
Class counts in the subset:
Class 0: 517 samples (93.15%)
Class 1: 33 samples (5.95%)
Class 2: 3 samples (0.54%)
Class 3: 1 samples (0.18%)
Class 4: 1 samples (0.18%)
Epoch [1/55], Train Loss: 0.0864, Train Acc: 84.68%, Best Acc: 84.68468%
Epoch [2/55], Train Loss: 0.0420, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [3/55], Train Loss: 0.0380, Train Acc: 92.97%, Best Acc: 93.15315%
Epoch [4/55], Train Loss: 0.0396, Train Acc: 92.79%, Best Acc: 93.15315%
Epoch [5/55], Train Loss: 0.0369, Train Acc: 92.97%, Best Acc: 93.15315%
Epoch [6/55], Train Loss: 0.0382, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [7/55], Train Loss: 0.0386, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [8/55], Train Loss: 0.0392, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [9/55], Train Loss: 0.0372, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [10/55], Train Loss: 0.0378, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [11/55], Train Loss: 0.0366, Train Acc: 93.15%, Best Acc: 93.15315%
Epoch [1

In [466]:
for idx, model_state in enumerate(models):
    torch.save(model_state, f"bagged_model_{idx + 1}.pth")

In [424]:
merged_data_2024_test_cleaned = pd.read_csv('X_test.csv')

In [426]:
X_test_numeric = merged_data_2024_test_cleaned.select_dtypes(include=[float, int])

X_test_tensor = torch.tensor(X_test_numeric.values, dtype=torch.float32)

def create_sequences_test(X, seq_length):
    sequences = []
    for i in range(seq_length, len(X)):
        X_seq = X[i-seq_length:i]
        sequences.append(X_seq)
    return torch.stack(sequences)

sequence_length = 5
X_test_sequences = create_sequences_test(X_test_tensor, sequence_length)

test_dataset = torch.utils.data.TensorDataset(X_test_sequences)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=16, shuffle=False)

In [428]:
sequence_predictions = []

for bagging_idx in range(bagging):
    model = LSTMModel(input_size, hidden_size, num_layers, num_classes)
    
    model.load_state_dict(torch.load(f'bagged_model_{bagging_idx + 1}.pth', map_location=device), strict=False)

    model.to(device)
    model.eval()

    model_sequence_predictions = []

    with torch.no_grad():
        for X_batch in test_loader:
            X_batch = X_batch[0].to(device)  
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            model_sequence_predictions.extend(predicted.cpu().numpy())
    
    sequence_predictions.append(model_sequence_predictions)


sequence_predictions = np.array(sequence_predictions)
final_predictions = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=sequence_predictions)

individual_predictions = []
for i in range(len(X_test_numeric) - sequence_length):
    individual_predictions.append(final_predictions[i])

for i in range(sequence_length):
    individual_predictions.insert(0, individual_predictions[0])


  model.load_state_dict(torch.load(f'bagged_model_{bagging_idx + 1}.pth', map_location=device), strict=False)


In [429]:
label_mapping_legend = {'Good': 0, 'Moderate': 1, 'Poor': 2, 'Severe': 3, 'Unhealthy': 4}
reverse_label_mapping = {v: k for k, v in label_mapping_legend.items()}
test_predictions_labels = pd.Series([reverse_label_mapping[pred] for pred in individual_predictions], name='Predicted_AQI')

ID_column = pd.Series(range(1, len(individual_predictions) + 1), name='ID')
predictions_df = pd.concat([ID_column, test_predictions_labels], axis=1)

predictions_df.to_csv('test_predictions_with_labels_bagging.csv', index=False)
print(predictions_df.head())
print(f"Total predictions: {len(predictions_df)}")
print(f"Original data points: {len(X_test_numeric)}")


   ID Predicted_AQI
0   1      Moderate
1   2      Moderate
2   3      Moderate
3   4      Moderate
4   5      Moderate
Total predictions: 133
Original data points: 133


In [432]:
##changing the distribution made the model's acuracy lower. could change the sample size of the bagging 
unique_value_counts = predictions_df['Predicted_AQI'].value_counts()
unique_value_counts 

Predicted_AQI
Good        82
Moderate    51
Name: count, dtype: int64