# Task 1

In [40]:
import torch
import cv2 as cv
import numpy as np
import pandas as pd
import os

import torchvision
from sklearn.model_selection import train_test_split

# Load data

In [41]:
car_image_names_path = './data/vehicles'
car_image_names = [f"{car_image_names_path}/{path}" for path in os.listdir(car_image_names_path)]
non_car_image_names_path = './data/non-vehicles'
non_car_image_names = [f"{non_car_image_names_path}/{path}" for path in os.listdir(non_car_image_names_path)]
car_image_names[:3], non_car_image_names[:3]

In [42]:
images = []
labels = []

images.extend(car_image_names)
labels.extend([1 for _ in range(len(car_image_names))])

images.extend(non_car_image_names)
labels.extend([0 for _ in range(len(non_car_image_names))])
len(images), len(labels)

In [43]:
images[0]

In [44]:
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
len(x_train), len(x_test)

In [45]:
from torchvision.io import read_image
from torch.utils.data import Dataset
from PIL import Image

class VehicleDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image = Image.open(self.images[idx]).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [46]:
from torchvision.transforms import transforms
from torch.utils.data import DataLoader

transformer = transforms.Compose([
    transforms.Resize((64, 64)),  # Resize to match ResNet input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

training_dataset = VehicleDataset(x_train, y_train, transform=transformer)
training_dataloader = DataLoader(training_dataset, batch_size=64, shuffle=True)

testing_dataset = VehicleDataset(x_test, y_test, transform=transformer)
testing_dataloader = DataLoader(testing_dataset, batch_size=64, shuffle=True)

In [47]:
from torchvision import models

model = models.resnet34(pretrained=True)

In [48]:
model

In [49]:
from torch import nn

num_classes = 1
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, num_classes),
    nn.Sigmoid()
)

In [50]:
criterion = nn.BCELoss()

In [51]:
freeze_layers = 2 # the number of last layers to be unfreezed
final_layers_params = []
rest_of_model_params = []

nr_of_layers = 0
for idx, param in enumerate(model.parameters()):
    nr_of_layers += 1
    
for idx, param in enumerate(model.parameters()):
    if idx >= nr_of_layers - freeze_layers:
        print(idx)
        final_layers_params.append(param)
    else:
        # param.requires_grad = False
        rest_of_model_params.append(param)        

In [52]:
# Separate the parameters for the final layer and the rest of the model
final_layer_params = list(model.fc.parameters())

# Define the optimizer
optimizer = torch.optim.Adam([
    {'params': rest_of_model_params, 'lr': 1e-5},
    {'params': final_layer_params, 'lr': 0.001}
])
epochs = 3

In [53]:
# from tqdm import tqdm
# 
# num_training_steps = epochs * len(training_dataloader)
# progress_bar = tqdm(range(num_training_steps))

In [54]:
score_threshold = 0.8
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=25):
    # Set the device to CUDA if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        train_corrects = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            preds = (outputs >= score_threshold).float()
            train_corrects += torch.sum(preds == labels.data)
            # progress_bar.update(1)

        epoch_loss = running_loss / len(train_loader.dataset)
        train_acc = train_corrects.double() / len(train_loader.dataset)
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Train Accuracy: {train_acc:.4f}')
        
        model.eval()
        val_running_loss = 0.0
        val_corrects = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device).float().unsqueeze(1)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                val_running_loss += loss.item() * inputs.size(0)
                preds = (outputs >= score_threshold).float()
                val_corrects += torch.sum(preds == labels.data)
        
        val_loss = val_running_loss / len(val_loader.dataset)
        val_acc = val_corrects.double() / len(val_loader.dataset)
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_acc:.4f}')
        print("--------------------------------------------------------------------")
    
    return model

In [55]:
model = train_model(model, training_dataloader, testing_dataloader, criterion, optimizer, num_epochs=epochs)

In [56]:
torch.save(model.state_dict(), 'model.pth')

# Let's fetch data from project training data

In [57]:
project_path = "../train/Task1"

images_paths = []
query_paths = []
gt_query_paths = os.listdir(project_path + "/ground-truth")

file_paths = os.listdir(project_path)
for file_path in file_paths:
    if file_path.endswith(".jpg"):
        images_paths.append(file_path)
    elif file_path.endswith(".txt"):
        query_paths.append(file_path)


images_paths.sort()
query_paths.sort()
gt_query_paths.sort()

print(images_paths, len(images_paths))
print(query_paths, len(query_paths))
print(gt_query_paths, len(gt_query_paths))

# Test image

In [67]:
from PIL import Image
import torchvision

test_img_path = 'test.png'

img = Image.open(test_img_path).convert('RGB')  # Ensure image is in RGB mode

img = transformer(img)
img = img.unsqueeze(0)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
img = img.to(device)
model.to(device)

with torch.no_grad():
    outputs = model(img)
    predicted_class = (outputs >= score_threshold).float().item()
    print(outputs)

print(f"Predicted class: {predicted_class}")