In [2]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image
import os
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as transforms


In [10]:

import pandas as pd


#--- Dataset class for loading your training data ---
class ImageClassificationDataset(Dataset):
    def __init__(self, image_dir, labels_csv, transform=None):
        self.image_dir = image_dir
        self.labels_df = pd.read_csv(labels_csv)
        self.image_ids = self.labels_df['id'].values
        self.labels = self.labels_df['class'].values
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_name = str(self.image_ids[idx]) + ".png"
        label = self.labels[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')  # colour
        if self.transform:
            image = self.transform(image)
        return image, label


transform = transforms.Compose([
    #transforms.RandomRotation(5),
    #transforms.RandomHorizontalFlip(),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# --- Load and split the dataset ---
dataset = ImageClassificationDataset(
    image_dir='data/train',
    labels_csv='data/train/labels_train.csv',
    transform=transform
)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)



import torch.nn as nn
import torch.nn.functional as F
import torch
import matplotlib.pyplot as plt

# --- Define CNN Model ---
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

        self.dropout1 = nn.Dropout(p=0.5)
        self.dropout2 = nn.Dropout(p=0.3)

        self.fc1 = nn.Linear(32 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, 64)
        self.out = nn.Linear(64, 26)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))  # 64→32
        x = self.pool(self.relu(self.conv2(x)))  # 32→16
        x = self.pool(self.relu(self.conv3(x)))  # 16→8

        x = x.view(-1, 32 * 8 * 8)

        x = self.dropout1(self.relu(self.fc1(x)))
        x = self.dropout2(self.relu(self.fc2(x)))
        x = self.out(x)
        return x

# --- Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# --- Training ---
num_epochs = 60
best_val_acc = 0
best_model_state = None

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

for epoch in range(num_epochs):
    # --- Training ---
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    # --- Validation ---
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            val_outputs = model(val_images)
            loss = criterion(val_outputs, val_labels)
            val_loss += loss.item()
            _, val_predicted = torch.max(val_outputs.data, 1)
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_acc = 100 * val_correct / val_total

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict()

    # Track metrics
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.2f}% | "
          f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.2f}%")

# --- Save Best Model ---
model.load_state_dict(best_model_state)
torch.save(model.state_dict(), "simple_best_model.pt")

# --- Plotting ---
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy over Epochs')
plt.legend()

plt.tight_layout()
plt.show()



Epoch 1/60 | Train Loss: 3.2626 | Train Acc: 3.41% | Val Loss: 3.2562 | Val Acc: 3.94%
Epoch 2/60 | Train Loss: 3.2608 | Train Acc: 4.13% | Val Loss: 3.2572 | Val Acc: 3.94%
Epoch 3/60 | Train Loss: 3.2595 | Train Acc: 3.94% | Val Loss: 3.2606 | Val Acc: 3.17%
Epoch 4/60 | Train Loss: 3.2591 | Train Acc: 3.85% | Val Loss: 3.2597 | Val Acc: 3.17%
Epoch 5/60 | Train Loss: 3.2588 | Train Acc: 3.51% | Val Loss: 3.2599 | Val Acc: 3.17%
Epoch 6/60 | Train Loss: 3.2584 | Train Acc: 4.38% | Val Loss: 3.2605 | Val Acc: 3.17%
Epoch 7/60 | Train Loss: 3.2585 | Train Acc: 4.06% | Val Loss: 3.2616 | Val Acc: 3.17%
Epoch 8/60 | Train Loss: 3.2584 | Train Acc: 3.49% | Val Loss: 3.2624 | Val Acc: 3.17%
Epoch 9/60 | Train Loss: 3.2585 | Train Acc: 4.11% | Val Loss: 3.2619 | Val Acc: 3.17%
Epoch 10/60 | Train Loss: 3.2581 | Train Acc: 4.21% | Val Loss: 3.2622 | Val Acc: 3.17%
Epoch 11/60 | Train Loss: 3.2583 | Train Acc: 3.68% | Val Loss: 3.2629 | Val Acc: 3.17%
Epoch 12/60 | Train Loss: 3.2582 | Train 

KeyboardInterrupt: 

# Using ResNet18

In [13]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import pandas as pd
import os
import matplotlib.pyplot as plt

# --- Dataset Class ---
class ImageClassificationDataset(Dataset):
    def __init__(self, image_dir, labels_csv, transform=None):
        self.image_dir = image_dir
        self.labels_df = pd.read_csv(labels_csv)
        self.image_ids = self.labels_df['id'].values
        self.labels = self.labels_df['class'].values
        self.transform = transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        img_name = str(self.image_ids[idx]) + ".png"
        label = self.labels[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# --- Transforms ---
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# --- Dataset Loading ---
dataset = ImageClassificationDataset(
    image_dir='data/train',
    labels_csv='data/train/labels_train.csv',
    transform=transform
)

train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)

# --- Modified ResNet-18 ---
class SmallResNet(nn.Module):
    def __init__(self, num_classes=26):
        super(SmallResNet, self).__init__()
        self.model = models.resnet18(weights=None)
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.model.maxpool = nn.Identity()
        self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)

    def forward(self, x):
        return self.model(x)

# --- Setup ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SmallResNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)

# --- Training ---
num_epochs = 10
best_val_acc = 0
best_model_state = None
train_losses, val_losses = [], []
train_accuracies, val_accuracies = [], []

for epoch in range(num_epochs):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total

    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0
    with torch.no_grad():
        for val_images, val_labels in val_loader:
            val_images, val_labels = val_images.to(device), val_labels.to(device)
            val_outputs = model(val_images)
            loss = criterion(val_outputs, val_labels)
            val_loss += loss.item()
            _, val_predicted = torch.max(val_outputs.data, 1)
            val_total += val_labels.size(0)
            val_correct += (val_predicted == val_labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_acc = 100 * val_correct / val_total

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state = model.state_dict()

    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)
    train_accuracies.append(train_acc)
    val_accuracies.append(val_acc)

    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | "
          f"Train Acc: {train_acc:.2f}% | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.2f}%")

# --- Save Best Model ---
model.load_state_dict(best_model_state)
torch.save(model.state_dict(), "resnet18_best_model.pt")

# --- Plotting ---
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Train Accuracy')
plt.plot(val_accuracies, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Accuracy over Epochs')
plt.legend()

plt.tight_layout()
plt.savefig("resnet18_training_plot.png")


KeyboardInterrupt: 

In [16]:
import torch
import torch.nn as nn
import torchvision.models as models

# Define the model architecture
class ModifiedResNet18(nn.Module):
    def __init__(self, num_classes=26):
        super(ModifiedResNet18, self).__init__()
        self.model = models.resnet18(weights=None)
        self.model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.model.maxpool = nn.Identity()
        self.model.layer4 = nn.Sequential(
            nn.Dropout(0.3),
            self.model.layer4
        )
        self.model.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.BatchNorm1d(self.model.fc.in_features),
            nn.Linear(self.model.fc.in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)

# Load the model
model = ModifiedResNet18()
model.load_state_dict(torch.load("202146996_model.pt", map_location=torch.device("cpu")))  # Rename if needed
model.eval()

print("✅ Model loaded successfully!")
print(model)


✅ Model loaded successfully!
ModifiedResNet18(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): Identity()
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (re

  model.load_state_dict(torch.load("202146996_model.pt", map_location=torch.device("cpu")))  # Rename if needed
