In [140]:
import config
from dataset import ImageDataset
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from torchvision import transforms
from torch.utils.data import DataLoader
from model import TwoHeadedModel
import torch.nn as nn
import torch
import matplotlib.pyplot as plt
import numpy as np

In [116]:
print(config.data_dir)

../data/kagglehub/datasets/andrewmvd/dog-and-cat-detection/versions/1


In [117]:
print(config.device)

cuda


In [118]:
# data directory
annotations_dir = os.path.join(config.data_dir, 'annotations')
images_dir = os.path.join(config.data_dir, 'images')

# get list of image files and create a dummy dataframe to split the data
image_files = [file for file in os.listdir(images_dir) if os.path.isfile(os.path.join(images_dir, file))]
df = pd.DataFrame({'image_name': image_files})

print(df.head())

          image_name
0     Cats_Test0.png
1     Cats_Test1.png
2    Cats_Test10.png
3   Cats_Test100.png
4  Cats_Test1000.png


In [119]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

In [120]:
print(train_df.shape)
print(val_df.shape)

(2948, 1)
(738, 1)


In [121]:
# transform
transform = transforms.Compose([
    transforms.Resize((224, 244)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [122]:
# datasets
train_dataset = ImageDataset(annotations_dir, images_dir, transform=transform)
val_dataset = ImageDataset(annotations_dir, images_dir, transform=transform)

In [123]:
train_dataset.image_files = [file for file in train_dataset.image_files if file in train_df['image_name'].values]
val_dataset.image_files = [file for file in val_dataset.image_files if file in val_df['image_name'].values]

In [124]:
print(train_dataset.__len__())
print(val_dataset.__len__())

2947
738


In [125]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [126]:
# building the model with 2 heads including classification and bounding box regression
# print()
# (classifier): Linear(in_features=512, out_features=2, bias=True)
# (regressor): Linear(in_features=512, out_features=4, bias=True)

In [127]:
device = config.device
print(device)

cuda


In [128]:
# model
model = TwoHeadedModel()
model = model.to(device)

# loss and optimizer
criterion_class = nn.CrossEntropyLoss()
criterion_bbox = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [131]:
# training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (data, targets, bboxes) in enumerate(train_loader):
        data = data.to(device)
        targets = targets.to(device)
        bboxes = bboxes.to(device)

        scores, pred_bboxes = model(data)
        loss_class = criterion_class(scores, targets)
        loss_bbox = criterion_bbox(pred_bboxes, bboxes)
        # combine losses
        loss = loss_class + loss_bbox
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        total_loss_bbox = 0
        total_samples = 0
        for data, targets, bboxes in val_loader:
            data = data.to(device)
            targets = targets.to(device)
            bboxes = bboxes.to(device)

            scores, pred_bboxes = model(data)
            _, predictions = scores.max(1)
            correct += (predictions == targets).sum()
            total += targets.size(0)
            validation_acc = correct / total
            total_loss_bbox = criterion_bbox(pred_bboxes, bboxes).item() * data.size(0)
            total_samples += data.size(0)

        avg_loss_bbox = total_loss_bbox / total_samples
        print(f"Epoch {epoch + 1}/{num_epochs}, Validation Accuracy: {float(correct) / float(total) * 100:.2f}%, "
      f"Avg. Bbox Loss: {avg_loss_bbox:.4f}")

Epoch 1/10, Validation Accuracy: 88.75%, Avg. Bbox Loss: 0.0000
Epoch 2/10, Validation Accuracy: 89.84%, Avg. Bbox Loss: 0.0001
Epoch 3/10, Validation Accuracy: 70.60%, Avg. Bbox Loss: 0.0000
Epoch 4/10, Validation Accuracy: 91.60%, Avg. Bbox Loss: 0.0000
Epoch 5/10, Validation Accuracy: 93.77%, Avg. Bbox Loss: 0.0000
Epoch 6/10, Validation Accuracy: 94.72%, Avg. Bbox Loss: 0.0000
Epoch 7/10, Validation Accuracy: 91.19%, Avg. Bbox Loss: 0.0000
Epoch 8/10, Validation Accuracy: 90.65%, Avg. Bbox Loss: 0.0000
Epoch 9/10, Validation Accuracy: 96.48%, Avg. Bbox Loss: 0.0000
Epoch 10/10, Validation Accuracy: 95.39%, Avg. Bbox Loss: 0.0000
