In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 

In [25]:
# Checking if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [26]:
# Loading the data
train_data = pd.read_csv("datasetTV.csv")
test_data = pd.read_csv("datasetTest.csv")


In [27]:
# Preprocess Data
X_train = train_data.iloc[:, :-1].values  # Features
y_train = train_data.iloc[:, -1].values   # Labels
y_train = y_train - 1                     # Change labels to start from 0
X_test = test_data.values                 # Test features (no labels provided)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train - 1, dtype=torch.long)  
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Putting all the tensors to the device
X_train_tensor = X_train_tensor.to(device)  
y_train_tensor = y_train_tensor.to(device)
X_test_tensor = X_test_tensor.to(device)


In [33]:
"""Check for class imbalance"""
from collections import Counter


# Count the number of samples for each class
class_counts = Counter(y_train)
print("Class Counts:", class_counts)

Class Counts: Counter({np.int64(2): 1145, np.int64(0): 1126, np.int64(1): 1118, np.int64(4): 1105, np.int64(3): 1100})


The classes are balanced in the dataset so we will not proceed with techniques that try to take into account class imbalance, like weighted loss function etc.

In [38]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")


# Create torch datasets
train_dataset = TensorDataset(
    torch.tensor(X_train, dtype=torch.float32),
    torch.tensor(y_train, dtype=torch.long)
)
val_dataset = TensorDataset(
    torch.tensor(X_val, dtype=torch.float32),
    torch.tensor(y_val, dtype=torch.long)
)

Training set size: 3580
Validation set size: 895


In [28]:
# Create a Model class that inherits from nn.Module
class Model(nn.Module):
    # Input layer with 8743 samples 
    # Each sample has 224 features and 1 label that takes the values 1, 2, 3, 4 and 5
    def __init__(self, in_features, h1, h2, num_classes):
        super(Model, self).__init__() # Instantiate the parent class
        self.fc1 = nn.Linear(in_features, h1)
        self.bn1 = nn.BatchNorm1d(h1)
        self.dropout1 = nn.Dropout(0.3)

        self.fc2 = nn.Linear(h1, h2)
        self.bn2 = nn.BatchNorm1d(h2)
        self.dropout2 = nn.Dropout(0.2)

        self.out = nn.Linear(h2, num_classes)
        

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout1(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout2(x)
        x = self.out(x)
        return x

In [30]:

# Plotting training and validation loss
import matplotlib.pyplot as plt

def plot_training_process(train_losses, val_losses):
    plt.figure(figsize=(10, 6))
    plt.plot(train_losses, label="Training Loss")
    plt.plot(val_losses, label="Validation Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title("Training and Validation Loss Over Epochs")
    plt.legend()
    plt.show()


In [39]:

# Hyperparameter tuning using a grid search
from itertools import product
from torch.utils.data import DataLoader

# Define hyperparameter grid
param_grid = {
    # "learning_rate": [0.001, 0.01, 0.1],
    # "batch_size": [64, 128, 256],
    # "hidden_layer_size": [32, 64, 128],
    "learning_rate": [0.0001],
    "batch_size": [64],
    "hidden_layer_size": [128]
}

epochs = 100
criteria = nn.CrossEntropyLoss()
# Perform grid search
best_params = None
best_val_loss = float("inf")

for lr, bs, hls in product(param_grid["learning_rate"], param_grid["batch_size"], param_grid["hidden_layer_size"]):
    # Update model and optimizer with current hyperparameters
    model = Model(in_features=224, h1=hls, h2=hls, num_classes=5).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # Create DataLoaders for current batch size
    train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=False)

    train_losses, val_losses = [], []
    
    print(f"Hyperparameters: lr: {lr}, bs: {bs}, hls: {hls}")
    # Training loop with validation
    for epoch in range(epochs):
        model.train()
        batch_losses = []
        for batch_X, batch_y in train_loader:
            batch_X, batch_y = torch.tensor(batch_X, dtype=torch.float32).to(device), torch.tensor(batch_y).to(device)
            
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criteria(outputs, batch_y)
            loss.backward()
            optimizer.step()
            batch_losses.append(loss.item())
        
        train_losses.append(sum(batch_losses) / len(batch_losses))
        
        # Validation step
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for val_X, val_y in val_loader:
                val_X, val_y = torch.tensor(val_X, dtype=torch.float32).to(device), torch.tensor(val_y).to(device)
                outputs = model(val_X)
                val_loss += criteria(outputs, val_y).item()
        val_losses.append(val_loss / (len(X_val) / bs))

        if (epoch+1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Train Loss: {sum(batch_losses) / len(batch_losses)}, Val Loss: {val_loss / (len(X_val) / bs)}")
    
    # Plot the training process
    plot_training_process(train_losses, val_losses)
        
    # Check if this is the best model so far
    if val_losses[-1] < best_val_loss:
        best_val_loss = val_losses[-1]
        best_params = {"learning_rate": lr, "batch_size": bs, "hidden_layer_size": hls}

print(f"Best Hyperparameters: {best_params}")



Hyperparameters: lr: 0.0001, bs: 64, hls: 128
Epoch [10/100], Train Loss: 0.6901615347181048, Val Loss: 0.6821283265865049
Epoch [20/100], Train Loss: 0.5033819068755422, Val Loss: 0.563628234010835
Epoch [30/100], Train Loss: 0.39130449135388645, Val Loss: 0.5210122944922421
Epoch [40/100], Train Loss: 0.31979852595499586, Val Loss: 0.509101799480076
Epoch [50/100], Train Loss: 0.24644878586488111, Val Loss: 0.5131844142295795
Epoch [60/100], Train Loss: 0.20100395447973693, Val Loss: 0.5286616043005575
Epoch [70/100], Train Loss: 0.1543957298355443, Val Loss: 0.5437745781584159


KeyboardInterrupt: 