In [None]:
#Required libraries

import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

#checking if its using my gpu or cpu
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


In [None]:
#Splitting the Dataset (Train & Validation Sets)

data_dir = "./Data/train"
csv_path = "./Data/trainLabels.csv"

df = pd.read_csv(csv_path)

class_names = sorted(df['label'].unique())
class_map = {name: i for i, name in enumerate(class_names)}

train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
print(f"Train samples: {len(train_df)}, Validation samples: {len(val_df)}")  

In [None]:
#Checking if the labels are correctly assigned
image_id = 1009

print(f"Label for image {image_id}: {df.loc[df['id'] == image_id, 'label'].values[0]}")

In [None]:
#Image pre-processing

class CIFARDataset(Dataset):
    def __init__(self, dataframe, data_dir, transform=None):
        self.dataframe = dataframe.reset_index(drop=True) #reset index incase if its shuffled
        self.data_dir = data_dir #path to where images are stores
        self.transform = transform #transformations to preprocess imgs
        
    def __len__(self):
        return len(self.dataframe) #returns total number of imgs from the dataset
    
    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx] #get a row from the dataframe
        image_id = row['id'] #get image file name
        label_str = row['label'] #get label of the image
        
        img_path = os.path.join(self.data_dir, f"{image_id}.png") #construct path to img
        image = Image.open(img_path).convert("RGB") #open image and ensure its rgb
        
        if self.transform:
            image = self.transform(image) #apply transformations to image
        
        label = class_map[label_str] #convert label to its numerical label
        return image, label #returning the image tensor and its label

transform = transforms.Compose([
    transforms.Resize((32, 32)), #resize image to 32x32 incase its not
    transforms.ToTensor(), #convert the image to a pytorch tensor
])

train_dataset = CIFARDataset(train_df, data_dir, transform=transform)
val_dataset = CIFARDataset(val_df, data_dir, transform=transform)

batch_size = 128  # tunable
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) #shuffle helps to generalize the data its training on, also to reduce overfitting
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) #data is in order no need to shuffle since its a validation set

print("Ready!") #just to lmk if it didnt break


In [None]:
#CNN model architecture
#Brownies: Got batch normalization here


class CNNModel(nn.Module):
    def __init__(self, num_classes):
        super(CNNModel, self).__init__()
        
        #first convolution layer
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1) #3 channels, 32 filters, 3x3 kernels, padding is 1 to prevent image size reduction
        self.bn1 = nn.BatchNorm2d(32) #to normalize the activation, stabilize training.
        self.pool = nn.MaxPool2d(2, 2) #halves the size of the feature maps. Helps with perfomance (also it retains important info so its fine)
        
        #second convolution layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        
        #third convolution layer
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        
        #final layer
        self.fc1 = nn.Linear(128 * 4 * 4, 256) #flattens the 2048(128*4*4) inputs to 256 neurons.
        self.dropout = nn.Dropout(0.3) #drops 30% of the neurons at random, also to prevent overfitting.
        self.fc2 = nn.Linear(256, num_classes) #another flatten layer to bring the 256 inputs down to 10 (number of classes of ciphar-10 to correspond to each category of image)
        
#flow of the data
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = F.relu(x)
        x = self.pool(x)
        
        x = x.view(x.size(0), -1) 
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

num_classes = len(class_names) #number of unique labels 
model = CNNModel(num_classes).to(device)
print(model) #displays the architecture
total_params = sum(p.numel() for p in model.parameters())
print(f"Number of parameters: {total_params}") #Jst for me to see how many params the model consists of

In [None]:
optimizer = optim.Adam(model.parameters(), lr=1e-3) #adam optimizer, super popular. Adapts learning rates for each parameter to improve convergence
criterion = nn.CrossEntropyLoss() #good for categorical data

#early stopping (if you wanna tweak it)
patience = 3
best_val_loss = float('inf')
epochs_no_improve = 0

#Do how much ever you want, im keeping it to just 15. Model starts overfitting around 11 epochs.
num_epochs = 15
train_losses, val_losses = [], []
train_accs, val_accs = [], []

In [None]:
#tensorboard cuz they have a nice UI (also I can compare the VGG model and this one over there)

writer = SummaryWriter(log_dir='Logs/no_brownies')
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0
    
    #loading training data per batch
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        
        
        optimizer.zero_grad() #clearing gradients
        outputs = model(images) 
        loss = criterion(outputs, labels) #computing loss
        loss.backward() #compute the gradients (backpropagation)
        optimizer.step() #updates weights
        
        #tracking loss and accuracy calculations
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
    
    #tracking loss and accuracy calculations
    
    train_epoch_loss = running_loss / len(train_loader.dataset)
    train_epoch_acc = 100.0 * correct / total
    
    model.eval()
    val_running_loss = 0.0
    val_correct, val_total = 0, 0
    
    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            val_outputs = model(val_images)
            val_loss = criterion(val_outputs, val_labels)
            
            val_running_loss += val_loss.item() * val_images.size(0)
            _, val_predicted = torch.max(val_outputs, 1)
            val_correct += (val_predicted == val_labels).sum().item()
            val_total += val_labels.size(0)
    
    val_epoch_loss = val_running_loss / len(val_loader.dataset)
    val_epoch_acc = 100.0 * val_correct / val_total
    
    train_losses.append(train_epoch_loss)
    val_losses.append(val_epoch_loss)
    train_accs.append(train_epoch_acc)
    val_accs.append(val_epoch_acc)
    
    #adding all the data to tensorboard log files.
    
    writer.add_scalar('Loss/Train', train_epoch_loss, epoch)
    writer.add_scalar('Loss/Val', val_epoch_loss, epoch)
    writer.add_scalar('Accuracy/Train', train_epoch_acc, epoch)
    writer.add_scalar('Accuracy/Val', val_epoch_acc, epoch)
    
    print(f"Epoch [{epoch+1}/{num_epochs}] "
          f"Train Loss: {train_epoch_loss:.4f}, Train Acc: {train_epoch_acc:.2f}% "
          f"Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_epoch_acc:.2f}%")
    
    #early stopping algorithm
    
    if val_epoch_loss < best_val_loss:
        best_val_loss = val_epoch_loss
        epochs_no_improve = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= patience:
            print("Early stopping triggered!")
            break

writer.close()

In [None]:
#Test accuracy for the best model we generated


model.load_state_dict(torch.load("best_model.pth"))
model.eval()

all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

#all the data asked for in the question.


acc = accuracy_score(all_labels, all_preds)
prec = precision_score(all_labels, all_preds, average='weighted')
rec = recall_score(all_labels, all_preds, average='weighted')
f1 = f1_score(all_labels, all_preds, average='weighted')

print(f"Validation Accuracy: {acc*100:.2f}%")
print(f"Precision: {prec:.3f}, Recall: {rec:.3f}, F1-score: {f1:.3f}")

print("\nClassification Report:")
print(classification_report(all_labels, all_preds, target_names=class_names))