# Task 1

In [1]:
import torch
import cv2 as cv
import numpy as np
import pandas as pd
import os

import torchvision
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load data

In [3]:
car_image_names_path = './data/vehicles'
car_image_names = [f"{car_image_names_path}/{path}" for path in os.listdir(car_image_names_path)]
non_car_image_names_path = './data/non-vehicles'
non_car_image_names = [f"{non_car_image_names_path}/{path}" for path in os.listdir(non_car_image_names_path)]
car_image_names[:3], non_car_image_names[:3]

(['./data/vehicles/1.png',
  './data/vehicles/10.png',
  './data/vehicles/1000.png'],
 ['./data/non-vehicles/0new_img.png',
  './data/non-vehicles/3new_img.png',
  './data/non-vehicles/4new_img.png'])

In [4]:
images = []
labels = []

images.extend(car_image_names)
labels.extend([1 for _ in range(len(car_image_names))])

images.extend(non_car_image_names)
labels.extend([0 for _ in range(len(non_car_image_names))])
len(images), len(labels)

(17492, 17492)

In [5]:
images[0]

'./data/vehicles/1.png'

In [6]:
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
len(x_train), len(x_test)

(13993, 3499)

In [7]:
from torchvision.io import read_image
from torch.utils.data import Dataset

class VehicleDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = read_image(self.images[idx]).float() / 255.0
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image.to(device), torch.tensor(label, device=device, dtype=torch.float)

In [8]:
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

transformer = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to match ResNet input size
])

training_dataset = VehicleDataset(x_train, y_train, transform=transformer)
training_dataloader = DataLoader(training_dataset, batch_size=64, shuffle=True)

testing_dataset = VehicleDataset(x_test, y_test, transform=transformer)
testing_dataloader = DataLoader(testing_dataset, batch_size=64, shuffle=True)

In [9]:
batch = next(iter(training_dataloader))
print(batch[0].shape, batch[1].shape)

torch.Size([64, 3, 224, 224]) torch.Size([64])


In [None]:
from torchvision import models

model = models.resnet34(pretrained=True)

In [12]:
from torch import nn

num_classes = 1
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, num_classes),
    nn.Sigmoid()
)

In [13]:
criterion = nn.BCELoss()

In [14]:
freeze_layers = 2 # the number of last layers to be unfreezed
final_layers_params = []
rest_of_model_params = []

nr_of_layers = 0
for idx, param in enumerate(model.parameters()):
    nr_of_layers += 1
    
for idx, param in enumerate(model.parameters()):
    if idx >= nr_of_layers - freeze_layers:
        print(idx)
        final_layers_params.append(param)
    else:
        # param.requires_grad = False
        rest_of_model_params.append(param)        

108
109


In [15]:
# Separate the parameters for the final layer and the rest of the model
final_layer_params = list(model.fc.parameters())

# Define the optimizer
optimizer = torch.optim.Adam([
    {'params': rest_of_model_params, 'lr': 1e-5},
    {'params': final_layer_params, 'lr': 1e-3}
])
epochs = 3

In [16]:
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [17]:
from tqdm import tqdm

score_threshold = 0.8
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25):
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_corrects = 0
        
        for inputs, labels in tqdm(train_loader):
            labels = labels.unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            preds = (outputs >= score_threshold).float()
            train_corrects += torch.sum(preds == labels.data)
            # progress_bar.update(1)

        epoch_loss = running_loss / len(train_loader.dataset)
        train_acc = train_corrects.double() / len(train_loader.dataset)
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Train Accuracy: {train_acc:.4f}')
        
        model.eval()
        val_running_loss = 0.0
        val_corrects = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                labels = labels.unsqueeze(1)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_running_loss += loss.item() * inputs.size(0)
                preds = (outputs >= score_threshold).float()
                val_corrects += torch.sum(preds == labels.data)
        
        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_corrects.double() / len(val_loader.dataset)
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')
        print("--------------------------------------------------------------------")
    
    return model

In [18]:
model = train_model(model, training_dataloader, testing_dataloader, criterion, optimizer, num_epochs=epochs)

100%|██████████| 219/219 [00:29<00:00,  7.53it/s]


Epoch 1/3, Train Loss: 0.0556, Train Accuracy: 0.9680
Validation Loss: 0.0034, Validation Accuracy: 0.9986
--------------------------------------------------------------------


100%|██████████| 219/219 [00:28<00:00,  7.67it/s]


Epoch 2/3, Train Loss: 0.0041, Train Accuracy: 0.9989
Validation Loss: 0.0025, Validation Accuracy: 0.9980
--------------------------------------------------------------------


100%|██████████| 219/219 [00:28<00:00,  7.68it/s]


Epoch 3/3, Train Loss: 0.0025, Train Accuracy: 0.9989
Validation Loss: 0.0022, Validation Accuracy: 0.9986
--------------------------------------------------------------------


In [19]:
torch.save(model.state_dict(), 'model.pth')

In [20]:
model.load_state_dict(torch.load('model.pth', map_location=device))

<All keys matched successfully>

In [21]:
from tqdm import tqdm
wrong_predictions_images = []
def test_model(model, test_loader):
    model.eval()
    test_loss = 0.0
    test_corrects = 0
    for inputs, labels in tqdm(test_loader):
        labels = labels.unsqueeze(1)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        test_loss += loss.item() * inputs.size(0)
        preds = (outputs >= score_threshold).float()
        
        for img, pred, in zip(inputs, preds):
            if pred == pred:
                test_corrects += 1
            else:
                wrong_predictions_images.append(img)
        
    val_loss = test_loss / len(test_loader.dataset)
    val_acc = test_corrects / len(test_loader.dataset)
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')
    print("--------------------------------------------------------------------")

In [22]:
test_model(model, testing_dataloader)

100%|██████████| 55/55 [00:03<00:00, 15.44it/s]


Validation Loss: 0.0022, Validation Accuracy: 1.0000
--------------------------------------------------------------------


In [23]:
wrong_predictions_images

[]

# Test specific image

In [24]:
from PIL import Image
import torchvision

test_img_path = 'test.jpg'

img = read_image(test_img_path).float() / 255.0

img = transformer(img)
img = img.unsqueeze(0)

img = img.to(device)
model.eval()


with torch.no_grad():
    outputs = model(img)
    predicted_class = (outputs >= score_threshold).float().item()
    print(outputs)

print(f"Predicted class: {predicted_class}")

RuntimeError: [Errno 2] No such file or directory: 'test.jpg'