In [1]:
import os
import cv2
import torch.nn as nn
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.models import resnet50, ResNet50_Weights

from torch.utils.data import random_split


In [2]:
# download the alternative excel file from kaggle
excel_url = "https://storage.googleapis.com/kaggle-forum-message-attachments/3037722/21362/stanford_cars_with_class_names.xlsx"
excel_data = pd.ExcelFile(excel_url)

# read both excel sheets into df
train_df = excel_data.parse("train")
test_df = excel_data.parse("test")
print(train_df.head())
print(test_df.head())

# drop unnecessary columns
train_df = train_df.drop(columns=['Unnamed: 0', 'x1', 'y1', 'x2', 'y2'])
test_df = test_df.drop(columns=['x1', 'y1', 'x2', 'y2'])
test_df['image'] = test_df['image'].str.strip("'")
print(train_df.head())
print(test_df.head())

# labels start from 1, so subtract 1 to start from 0
train_df['class'] = train_df['class'] - train_df['class'].min()
test_df['class'] = test_df['class'] - test_df['class'].min()

print(train_df['class'].min(), train_df['class'].max())
print(test_df['class'].min(), test_df['class'].max())
print(train_df['class'].head()) 


   Unnamed: 0   x1   y1    x2    y2  class  \
0           0   39  116   569   375     14   
1           1   36  116   868   587      3   
2           2   85  109   601   381     91   
3           3  621  393  1484  1096    134   
4           4   14   36   133    99    106   

                       ture_class_name      image  
0                  Audi TTS Coupe 2012  00001.jpg  
1                  Acura TL Sedan 2012  00002.jpg  
2           Dodge Dakota Club Cab 2007  00003.jpg  
3     Hyundai Sonata Hybrid Sedan 2012  00004.jpg  
4  Ford F-450 Super Duty Crew Cab 2012  00005.jpg  
    x1   y1   x2   y2  class                      ture_class_name        image
0   30   52  246  147    181              Suzuki Aerio Sedan 2007  '00001.jpg'
1  100   19  576  203    103  Ferrari 458 Italia Convertible 2012  '00002.jpg'
2   51  105  968  659    145                Jeep Patriot SUV 2012  '00003.jpg'
3   67   84  581  407    187              Toyota Camry Sedan 2012  '00004.jpg'
4  140  151  593

In [3]:
path = dataset_download("jessicali9530/stanford-cars-dataset")
print(path)
train_path = os.path.join(path, "cars_train", "cars_train")
test_path = os.path.join(path, "cars_test", "cars_test")

C:\Users\Weh\.cache\kagglehub\datasets\jessicali9530\stanford-cars-dataset\versions\2


In [4]:
class StanfordCarsDataset(Dataset):
    def __init__(self, dataframe, image_folder, image_size=(128, 128), transform=None):
        self.image_folder = image_folder
        self.image_size = image_size
        self.transform = transform

        # map image names to labels
        self.mapping = {row['image']: row['class'] for _, row in dataframe.iterrows()}
        self.image_paths = list(self.mapping.keys())

    def __len__(self):

        return len(self.image_paths)

    def __getitem__(self, idx):
        image_name = self.image_paths[idx]
        label = self.mapping[image_name]
        image_path = os.path.join(self.image_folder, image_name)

        img = cv2.imread(image_path)
        if img is None:
            raise FileNotFoundError(f"Error loading image: {image_path}")
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, self.image_size)

        if self.transform:
            img = self.transform(img)

        return img, label


In [5]:
# TODO: maybe augmentations on train data
# transformations
transform_rgb = transforms.Compose([
    transforms.ToPILImage(),  # opencv to pil image
    transforms.ToTensor(),    # convert image to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # normalize for RGB (ResNet50)
])

transform_gray = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Grayscale(num_output_channels=1),  # grayscale
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])   # normalize for grayscale
])


In [6]:
# datasets
train_dataset = StanfordCarsDataset(train_df, train_path, image_size=(128, 128), transform=transform_rgb)
test_dataset = StanfordCarsDataset(test_df, test_path, image_size=(128, 128), transform=transform_rgb)


In [7]:
# train/val split: 80% train, 20% val
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset_split, val_dataset = random_split(train_dataset, [train_size, val_size])

batch_size = 32
# dataloader
train_loader = DataLoader(train_dataset_split, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



In [8]:
# Training Dataset
print("Train Dataset:")
for i in range(10):
    _, label = train_dataset[i]
    print(f"Train Dataset Index {i}, Label: {label}")

# Test Dataset
print("\nTest Dataset:")
for i in range(10):
    _, label = test_dataset[i]
    print(f"Test Dataset Index {i}, Label: {label}")
print(f"Train Labels Range: {min(train_dataset.labels)} to {max(train_dataset.labels)}")
print(f"Test Labels Range: {min(test_dataset.labels)} to {max(test_dataset.labels)}")

# Training DataLoader
print("\nTrain DataLoader:")
for X, y in train_loader:
    print(f"Train Batch Labels: {y[:10].tolist()}")
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    print(y.min().item(), y.max().item())
    break

# Test DataLoader
print("\nTest DataLoader:")
for X, y in test_loader:
    print(f"Test Batch Labels: {y[:10].tolist()}")
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    print(y.min().item(), y.max().item())
    break

batch_min = float('inf')
batch_max = float('-inf')
for _, labels in train_loader:
    batch_min = min(batch_min, labels.min().item())
    batch_max = max(batch_max, labels.max().item())

print(f"Global Min Label: {batch_min}, Global Max Label: {batch_max}")

Train Dataset:
Train Dataset Index 0, Label: 13
Train Dataset Index 1, Label: 2
Train Dataset Index 2, Label: 90
Train Dataset Index 3, Label: 133
Train Dataset Index 4, Label: 105
Train Dataset Index 5, Label: 122
Train Dataset Index 6, Label: 88
Train Dataset Index 7, Label: 95
Train Dataset Index 8, Label: 166
Train Dataset Index 9, Label: 57

Test Dataset:
Test Dataset Index 0, Label: 180
Test Dataset Index 1, Label: 102
Test Dataset Index 2, Label: 144
Test Dataset Index 3, Label: 186
Test Dataset Index 4, Label: 184
Test Dataset Index 5, Label: 77
Test Dataset Index 6, Label: 117
Test Dataset Index 7, Label: 164
Test Dataset Index 8, Label: 31
Test Dataset Index 9, Label: 59
Train Labels Range: 0 to 195
Test Labels Range: 0 to 195

Train DataLoader:
Train Batch Labels: [13, 2, 90, 133, 105, 122, 88, 95, 166, 57]
Shape of X [N, C, H, W]: torch.Size([32, 3, 128, 128])
Shape of y: torch.Size([32]) torch.int64
2 193

Test DataLoader:
Test Batch Labels: [180, 102, 144, 186, 184, 77, 1

In [8]:
# model: ResNet50, pretrained on ImageNet
class ResNet50TransferLearning(nn.Module):
    def __init__(self, num_classes=196, dropout_rate=0.2):
        super(ResNet50TransferLearning, self).__init__()
        self.resnet = resnet50(weights=ResNet50_Weights.DEFAULT)

        # adapt output layer to number of classes
        num_ftrs = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.LeakyReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

model = ResNet50TransferLearning(num_classes=196, dropout_rate=0.2).to('cpu')
print(model)


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\Weh/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:03<00:00, 26.9MB/s]


ResNet50TransferLearning(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequen

In [9]:
# Loss and Optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4)

In [11]:
'''
# train loop
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to('cpu'), y.to('cpu')

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# test loop
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to('cpu'), y.to('cpu')
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    test(test_loader, model, loss_fn)
print("Training complete!")
'''

In [10]:
# train and val loop (0.2 validation split)
def train_and_validate(train_loader, val_loader, model, loss_fn, optimizer, epochs=10):
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        print("-------------------------------")
        
        # train
        model.train()
        for batch, (X, y) in enumerate(train_loader):
            X, y = X.to('cpu'), y.to('cpu')
            pred = model(X)
            loss = loss_fn(pred, y)
            
            # backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{len(train_loader.dataset):>5d}]")
        
        # validate
        model.eval()
        val_loss, correct = 0, 0
        with torch.no_grad():
            for X, y in val_loader:
                X, y = X.to('cpu'), y.to('cpu')
                pred = model(X)
                val_loss += loss_fn(pred, y).item()
                correct += (pred.argmax(1) == y).type(torch.float).sum().item()
        
        val_loss /= len(val_loader)
        correct /= len(val_loader.dataset)
        print(f"Validation Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {val_loss:>8f} \n")


In [11]:
# evaluate on test data
def evaluate_on_test(test_loader, model, loss_fn):
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in test_loader:
            X, y = X.to('cpu'), y.to('cpu')
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    
    test_loss /= len(test_loader)
    correct /= len(test_loader.dataset)
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [12]:
train_and_validate(train_loader, val_loader, model, loss_fn, optimizer, epochs=10)

evaluate_on_test(test_loader, model, loss_fn)

Epoch 1/10
-------------------------------
loss: 5.283438  [   32/ 6515]
loss: 5.281435  [ 3232/ 6515]
loss: 5.275332  [ 6432/ 6515]
Validation Error: 
 Accuracy: 0.7%, Avg loss: 5.274061 

Epoch 2/10
-------------------------------
loss: 5.254241  [   32/ 6515]
loss: 5.248442  [ 3232/ 6515]
loss: 5.244256  [ 6432/ 6515]
Validation Error: 
 Accuracy: 1.0%, Avg loss: 5.259780 

Epoch 3/10
-------------------------------
loss: 5.220606  [   32/ 6515]
loss: 5.226896  [ 3232/ 6515]
loss: 5.175627  [ 6432/ 6515]
Validation Error: 
 Accuracy: 2.0%, Avg loss: 5.237035 

Epoch 4/10
-------------------------------
loss: 5.147103  [   32/ 6515]
loss: 5.109557  [ 3232/ 6515]
loss: 5.094997  [ 6432/ 6515]
Validation Error: 
 Accuracy: 2.8%, Avg loss: 5.182463 

Epoch 5/10
-------------------------------
loss: 5.118543  [   32/ 6515]
loss: 5.055025  [ 3232/ 6515]
loss: 4.909935  [ 6432/ 6515]
Validation Error: 
 Accuracy: 3.5%, Avg loss: 5.047059 

Epoch 6/10
-------------------------------
loss: 4

In [13]:
torch.save(model.state_dict(), "model_val.pth")
#print("Saved PyTorch Model State to model.pth")

# Hyperparameter Tuning

In [ ]:
import random
from itertools import product

# parameter grid
param_grid = {
    "lr": [1e-4, 1e-3, 1e-2],
    "batch_size": [16, 32, 64],
    "dropout": [0.2, 0.3, 0.5],
    "optimizer": ["SGD", "Adam", "RMSprop"],
    "weight_decay": [1e-4, 1e-3]
}

# combinations
all_combinations = list(product(*param_grid.values()))
random_combinations = random.sample(all_combinations, 10)  # choose 10 random combinations


results = []

for idx, (lr, batch_size, dropout, optimizer_name, weight_decay) in enumerate(random_combinations):
    print(f"Testing combination {idx + 1}: lr={lr}, batch_size={batch_size}, dropout={dropout}, optimizer={optimizer_name}, weight_decay={weight_decay}")
    
    # dataloader with new batch size
    train_loader = DataLoader(train_dataset_split, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


    model = ResNet50TransferLearning(num_classes=196, dropout_rate=dropout).to('cpu')

    # optimizer
    if optimizer_name == "SGD":
        optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)
    elif optimizer_name == "Adam":
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    elif optimizer_name == "RMSprop":
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)

    # train and validate
    validation_accuracy = train_and_validate(train_loader, val_loader, model, loss_fn, optimizer, epochs=3)

    # save results
    results.append({
        "lr": lr,
        "batch_size": batch_size,
        "dropout": dropout,
        "optimizer": optimizer_name,
        "weight_decay": weight_decay,
        "validation_accuracy": validation_accuracy
    })

# find best hyperparameter combination
best_result = max(results, key=lambda x: x['validation_accuracy'])
print(f"Best Hyperparameter Combination: {best_result}")

# test best model on test data
print("Evaluating the best model on test data...")
train_loader = DataLoader(train_dataset_split, batch_size=best_result['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_result['batch_size'], shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=best_result['batch_size'], shuffle=False)

model = ResNet50TransferLearning(num_classes=196, dropout_rate=best_result['dropout']).to('cpu')
if best_result['optimizer'] == "SGD":
    optimizer = torch.optim.SGD(model.parameters(), lr=best_result['lr'], momentum=0.9, weight_decay=best_result['weight_decay'])
elif best_result['optimizer'] == "Adam":
    optimizer = torch.optim.Adam(model.parameters(), lr=best_result['lr'], weight_decay=best_result['weight_decay'])
elif best_result['optimizer'] == "RMSprop":
    optimizer = torch.optim.RMSprop(model.parameters(), lr=best_result['lr'], weight_decay=best_result['weight_decay'])

# train and validate best model
train_and_validate(train_loader, val_loader, model, loss_fn, optimizer, epochs=10)

# evaluate on test data
evaluate_on_test(test_loader, model, loss_fn)

torch.save(model.state_dict(), "model.pth")
print("Saved PyTorch Model State to model.pth")
