In [1]:
import torch
import torchvision
import pandas as pd
import os
from torchvision import transforms
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from PIL import Image
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np

In [2]:
car_bounding_box = pd.read_csv('car_boundingBox_class_and_image_number.csv')
car_class_and_model = pd.read_csv('car_class_and_model.csv')
car_test_bounding_box = pd.read_csv('car_dataset_test_boundingBox_image_number.csv')

In [25]:
class CarDataset(Dataset):
    def __init__(self, annotations, img_dir, transform=None, augment=True):
        self.annotations = annotations
        self.img_dir = img_dir
        self.transform = transform
        self.augment = augment  # Flag to indicate if augmentation should be applied

    def __len__(self):
        # Multiply by 2 to account for both original and augmented images
        return len(self.annotations) * 2 if self.augment else len(self.annotations)

    def __getitem__(self, idx):
        # Get the actual index by dividing by 2 for flipping
        actual_idx = idx // 2
        img_id = str(self.annotations.iloc[actual_idx, 0]).zfill(5)
        img_path = os.path.join(self.img_dir, f"{img_id}.jpg")
        
        image = Image.open(img_path).convert("RGB")
    
        # Bounding box and class
        boxes = torch.tensor([[self.annotations.iloc[actual_idx, 1], 
                               self.annotations.iloc[actual_idx, 2], 
                               self.annotations.iloc[actual_idx, 3], 
                               self.annotations.iloc[actual_idx, 4]]], dtype=torch.float32)
        labels = torch.tensor([self.annotations.iloc[actual_idx, 5]], dtype=torch.int64)
    
        target = {"boxes": boxes, "labels": labels}

        # Determine if this instance should be augmented (flip horizontally)
        if idx % 2 == 1:  # Flip every second instance
            image = F.hflip(image)
            # Flip the bounding box horizontally
            boxes[:, [0, 2]] = image.width - boxes[:, [2, 0]]  # Adjust x-coordinates for flip
            target = {"boxes": boxes, "labels": labels}

        if self.transform:
            image = self.transform(image)

        return image, target

In [27]:
data_transforms = transforms.Compose([
    transforms.ToTensor()
])

In [29]:
img_dir = "cars_training_image/cars_train"
dataset = CarDataset(car_bounding_box, img_dir, transform=data_transforms)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [31]:
print(car_class_and_model.columns)

Index(['Class', 'Class_name'], dtype='object')


In [33]:
print(len(train_loader))

3258


In [35]:
model = fasterrcnn_resnet50_fpn(weights=torchvision.models.detection.FasterRCNN_ResNet50_FPN_Weights.DEFAULT)
num_classes = car_class_and_model['Class'].nunique() + 1
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
num_epochs = 10

In [37]:
import time

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (images, targets) in enumerate(train_loader):
        start_time = time.time()  # Start timing for the batch

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        epoch_loss += losses.item()

        # Backward pass
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        # Calculate batch time
        batch_time = time.time() - start_time

        # Print batch loss and time
        print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{batch_idx+1}/{len(train_loader)}], "
              f"Batch Loss: {losses.item():.4f}, Time: {batch_time:.2f} seconds")

    # Print epoch loss
    print(f"Epoch [{epoch+1}/{num_epochs}], Total Loss: {epoch_loss:.4f}")

print("Training complete.")

Epoch [1/10], Batch [1/3258], Batch Loss: 5.9579, Time: 1.31 seconds
Epoch [1/10], Batch [2/3258], Batch Loss: 33.6412, Time: 0.68 seconds
Epoch [1/10], Batch [3/3258], Batch Loss: 1.0544, Time: 0.64 seconds
Epoch [1/10], Batch [4/3258], Batch Loss: 0.4485, Time: 0.68 seconds
Epoch [1/10], Batch [5/3258], Batch Loss: 0.5577, Time: 0.64 seconds
Epoch [1/10], Batch [6/3258], Batch Loss: 0.5211, Time: 0.65 seconds
Epoch [1/10], Batch [7/3258], Batch Loss: 0.9143, Time: 0.64 seconds
Epoch [1/10], Batch [8/3258], Batch Loss: 0.4776, Time: 0.62 seconds
Epoch [1/10], Batch [9/3258], Batch Loss: 0.8187, Time: 0.37 seconds
Epoch [1/10], Batch [10/3258], Batch Loss: 0.4567, Time: 0.33 seconds
Epoch [1/10], Batch [11/3258], Batch Loss: 0.4407, Time: 0.35 seconds
Epoch [1/10], Batch [12/3258], Batch Loss: 0.4427, Time: 0.61 seconds
Epoch [1/10], Batch [13/3258], Batch Loss: 0.4070, Time: 0.38 seconds
Epoch [1/10], Batch [14/3258], Batch Loss: 0.2720, Time: 0.69 seconds
Epoch [1/10], Batch [15/3258

KeyboardInterrupt: 