In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
from torch import nn
from torch import optim
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import torchvision

import numpy as np

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)


In [None]:
# TEST folder
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/test/Hyundai Creta"
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/test/Mahindra Scorpio"
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/test/Rolls Royce"

# TRAIN folder
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/train/Hyundai Creta"
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/train/Mahindra Scorpio"
!rm -r "/content/drive/MyDrive/Image Project/Cars Dataset/train/Rolls Royce"


In [None]:
# /content/drive/MyDrive/Image Project/Cars Dataset/test
# /content/drive/MyDrive/Image Project/Cars Dataset/train

In [None]:

import os
from PIL import Image, ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = False   # attempting to load truncated images will result in an error

def is_not_truncated(file_path):
  try:
    # Attempt to open and load the image file
    with Image.open(file_path) as img:
      img.load()
      return True        # Image file is not truncated
  except OSError:
    return False

In [None]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

n = 64

transform = transforms.Compose([
    transforms.Resize((n, n)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

train_dataset = datasets.ImageFolder('/content/drive/MyDrive/Image Project/Cars Dataset/train',
                                     transform=transform,
                                     is_valid_file = is_not_truncated)

val_dataset = datasets.ImageFolder('/content/drive/MyDrive/Image Project/Cars Dataset/test', transform=transform, is_valid_file = is_not_truncated)

batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last = True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, drop_last = True)


In [None]:
import matplotlib.pyplot as plt
from google.colab import files
fig_class_exemples = plt.figure()

import copy

class_examples = {} # Dizionario per tenere traccia delle immagini per ogni classe


for batch_idx, (example_data, example_targets) in enumerate(train_loader): # Itera attraverso il train_loader fino a trovare un esempio per ogni classe
    for i in range(len(example_targets)):
        label = example_targets[i].item()
        if label not in class_examples:
            class_examples[label] = example_data[i]


        if len(class_examples) == 4: # Se abbiamo già trovato tutte le 4 classi, possiamo interrompere la ricerca
            break
    if len(class_examples) == 4:
        break

num_classes = 4
num_cols = 4

fig, axes = plt.subplots(num_classes, num_cols, figsize=(8, 6))


sorted_classes = sorted(class_examples.keys()) # Ordina le classi

for i, label in enumerate(sorted_classes):
    img = class_examples[label]
    img = img.swapaxes(0, 1).swapaxes(1, 2) * 0.5 + 0.5

    r = copy.deepcopy(img)
    r[:, :, 1] = r[:, :, 2] = 0

    g = copy.deepcopy(img)
    g[:, :, 0] = g[:, :, 2] = 0

    b = copy.deepcopy(img)
    b[:, :, 1] = b[:, :, 0] = 0

    axes[i, 0].imshow(img)
    axes[i, 1].imshow(r)
    axes[i, 2].imshow(g)
    axes[i, 3].imshow(b)

    axes[i, 0].set_title("Class: {}".format(label))

    for j in range(num_cols):
        axes[i, j].axis('off')

plt.tight_layout()
fig.suptitle('Class Examples', fontsize= 16 )
#plt.savefig('fig_class_exemples.pdf')
#files.download('fig_class_exemples.pdf')
plt.show()


## Convolutional Neural Network Model

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3, 16, kernel_size=(5,5), stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=(5,5), stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.conv3 = nn.Conv2d(16, 16, kernel_size=(5,5), stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(16)

        self.activation = nn.ReLU()
        self.pooling = nn.MaxPool2d(kernel_size=2, stride=2)

        self.dropout = nn.Dropout(p=0.2)

        self.fc1 = nn.Linear((n-16)**2//4, 16)
        self.fc2 = nn.Linear(16, 4)

        self.output = nn.Softmax(dim = 1)

    def reset_weights(self): #per resettare i pesi prima di ogni richiamo (distribuzione uniforme)
      torch.nn.init.xavier_uniform_(self.fc1.weight)
      torch.nn.init.xavier_uniform_(self.fc2.weight)

    def forward(self, x):


        x = self.activation(self.bn1(self.conv1(x)))
        x = self.pooling(x)
        x = self.dropout(x)

        x = self.activation(self.bn2(self.conv2(x)))
        x = self.pooling(x)
        x = self.dropout(x)

        x = self.activation(self.bn3(self.conv3(x)))
        x = self.pooling(x)
        x = self.dropout(x)

        x = torch.flatten(x, 1)

        x = self.activation(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return self.output(x)

model2 = CNN().to(device)

if device.type == "cuda:0":
  model2 = torch.compile(model2, dynamic=True)
  model2 = model2.to(device)
else:
  model2 = torch.compile(model2, mode = "max-autotune-no-cudagraphs", dynamic=True)


In [None]:
loss_function = nn.CrossEntropyLoss()

In [None]:
def accuracy(y_pred, y):

  pred = y_pred.detach().argmax(dim=1)
  corrects = (pred == y)

  accuracy = (corrects.sum()/corrects.size(0)).float()

  return accuracy

def loader_accuracy(model, loader):

  with torch.no_grad():

    acc = 0

    for x, y in loader:

      x, y = x.to(device), y.to(device)

      acc += accuracy(model(x).squeeze(), y)

    return (acc/len(loader)*100).cpu()

def step(model, x, y, optimizer = None):

  out = model(x)
  loss = loss_function(out.squeeze(), y.long())

  if optimizer is not None:

    optimizer.zero_grad()
    loss.backward()

    optimizer.step()

  return loss.detach().cpu()

def evaluate(model, loader, early_stopping = None):

  with torch.no_grad():

    loss = 0


    for inputs, targets in loader:

      inputs, targets = inputs.to(device), targets.to(device)
      loss += step(model, inputs, targets)

    loss /= len(loader)

   # Check for early stopping
    if early_stopping is not None:
      early_stopping(loss, model)

    return loss.detach().cpu()


if device.type != "cpu":
  accuracy = torch.compile(accuracy)
  train = torch.compile(step)
else:
  accuracy = torch.compile(accuracy, mode = "max-autotune-no-cudagraphs", dynamic=True)
  train = torch.compile(step, mode = "max-autotune-no-cudagraphs", dynamic=True)


Early stopping

In [None]:
class EarlyStopping:
  def __init__(self, patience=5, delta=0, verbose=False, path='checkpoint.pt'):
    self.patience = patience       # Number of epochs to wait for improvement
    self.delta = delta             # Minimum change in monitored metric to qualify as improvement
    self.verbose = verbose         # If True, print a message when validation metric improves
    self.path = path               # Path to save the model checkpoint
    self.counter = 0               # Counter to track epochs without improvement
    self.best_score = None         # Best validation metric achieved so far
    self.early_stop = False        # Flag to indicate whether to stop training

  def __call__(self, val_metric, model):
    if self.best_score is None:
      self.best_score = val_metric
      self.save_checkpoint(model)
    elif val_metric > self.best_score + self.delta:
      self.counter += 1
      if self.verbose:
        print(f'Epochs without improvement: {self.counter}')
      if self.counter >= self.patience:
                self.early_stop = True
    else:
      self.best_score = val_metric
      self.save_checkpoint(model)
      self.counter = 0

  def save_checkpoint(self, model):
    torch.save(model.state_dict(), self.path)


checkpoint

In [None]:
!mkdir /content/.checkpoints


## Learning rate choice and Convolutional Neural Network (CNN)

In [None]:
def train_model2(lrs = [0.01], max_epochs = 100):

  results = []

  for learning_rate in lrs:

    print("Starting with lr:", learning_rate)
    model2 = CNN().to(device)

    if device.type == "cuda:0":
      model2 = torch.compile(model2, dynamic=True)
      model2 = model2.to(device)
    else:
      model2 = torch.compile(model2, mode = "max-autotune-no-cudagraphs", dynamic=True)

    optimizer = torch.optim.SGD(model2.parameters(), lr = learning_rate)

    early_stopping = EarlyStopping(patience=4, verbose=False, path=f'/content/.checkpoints/model_checkpoint_{learning_rate}.pt')

    for epoch in range(max_epochs):

      ## TRAIN STEP

      model2.train()

      for x_train, y_train in train_loader:

        x_train, y_train = x_train.to(device), y_train.to(device)
        step(model2, x_train, y_train, optimizer)

      ## TEST STEP

      model2.eval()

      val_loss = evaluate(model2, val_loader, early_stopping)

      if early_stopping.early_stop:
          break

    model2.load_state_dict(torch.load(f'/content/.checkpoints/model_checkpoint_{learning_rate}.pt'))

    train_acc = loader_accuracy(model2, train_loader)
    val_acc = loader_accuracy(model2, val_loader)

    train_loss = evaluate(model2, train_loader)
    val_loss = evaluate(model2, val_loader)

    results.append([train_loss, val_loss, train_acc, val_acc])
    print(f"\tTrain loss: {train_loss:.3f} and validation loss: {val_loss:.3f}\n\tTrain accuracy: {train_acc:.2f}% and valiation accuracy: {val_acc:.2f}%\n\n")
    model2.reset_weights() # reset dei pesi prima di andare al learning rate successivo
  return results

In [None]:
lrs2 = [0.1,0.2,0.3, 0.4, 0.5,0.6]
results = train_model2(lrs2, max_epochs = 20)

In [None]:
results2 = results

In [None]:
results2 = np.array(results2).T

In [None]:
cnn_cross_accuracy = plt.figure()
fig, ax = plt.subplots(nrows=2, ncols=1, sharex = True)

ax[0].plot(lrs2, results2[0], label = "Train")
ax[0].plot(lrs2, results2[1], label = "Test")

ax[0].legend()

ax[0].set_ylabel("Cross entropy")

ax[1].plot(lrs2, results2[2], label = "Train")
ax[1].plot(lrs2, results2[3], label = "Test")


ax[1].set_xlabel("Learning rate")
ax[1].set_ylabel("Accuracy (%)")

ax[1].set_xticks(lrs2)

ax[0].set(ylim=(0.90, 1.20))
ax[1].set(ylim=(50, 100))

fig.suptitle("CNN Network", fontsize=16)
"""
plt.savefig('cnn_cross_accuracy.pdf')
files.download('cnn_cross_accuracy.pdf')"""

plt.show()

### Best lr CNN

In [None]:
EPOCHS = 30

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

learning_rate = 0.2
optimizer = torch.optim.SGD(model2.parameters(), lr = learning_rate)

early_stopping = EarlyStopping(patience=7, verbose=True, path='/content/.checkpoints/model_checkpoint.pt')


for epoch in range(EPOCHS):

  ## TRAIN STEP

  model2.train()

  train_loss = 0

  for x_train, y_train in train_loader:

    x_train, y_train = x_train.to(device), y_train.to(device)

    loss = step(model2, x_train, y_train, optimizer)

    train_loss += loss

  train_loss /= len(train_loader)
  train_losses.append(train_loss)

  ## TEST STEP

  model2.eval()

  train_acc = loader_accuracy(model2, train_loader)
  val_acc = loader_accuracy(model2, val_loader)

  train_accuracies.append(train_acc)
  val_accuracies.append(val_acc)

  val_loss = evaluate(model2, val_loader, early_stopping)
  val_losses.append(val_loss.cpu())

  print("Epoch:", epoch, f"Train: {float(train_acc):.2f}%,  Validation: {float(val_acc):.2f}%,", "Loss_train: ",  f"{float(train_loss):.2f}", "Loss_val: ",  f"{float(val_loss):.2f}")

  if early_stopping.early_stop:
      print("Early stopping triggered.")
      break

In [None]:
train_losses2 = train_losses
val_losses2 = val_losses
train_accuracies2 = train_accuracies
val_accuracies2 = val_accuracies

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=1, sharex = True)

ax[0].plot(train_losses2, label = "Train")
ax[0].plot(val_losses2, label = "Test")

ymin, ymax = ax[0].get_ylim()
ax[0].set_ylim(ymin, ymax)
ax[0].vlines(np.argmin(val_losses2), ymin=0.95, ymax=1.4, color = 'black', linestyle = '--')
ax[0].text(x = np.argmin(val_losses2) - 2, y = (ymax+ymin)/2 + 0.3, s = "Early stopping")
ax[0].legend()

ax[0].set_ylabel("Cross entropy")
ax[0].set(ylim=(0.95, 1.4))

ax[1].plot(train_accuracies2, label = "Train")
ax[1].plot(val_accuracies2, label = "Test")
ymin, ymax = ax[1].get_ylim()
ax[1].set_ylim(ymin, ymax)
ax[1].vlines(np.argmin(val_losses2), ymin=30, ymax=100, color = 'black', linestyle = '--', )

ax[1].set_xlabel("Epochs")
ax[1].set_ylabel("Accuracy (%)")
ax[1].set(ylim=(30,100))

fig.suptitle("CNN Network", fontsize=16)



plt.show()

### Model interpretation

In [None]:
from sklearn.metrics import confusion_matrix
model2.eval()

all_y = []
all_predictions = []

# Iterate over the dataset
for x, y in val_loader:
    x = x.to(device)

    with torch.no_grad():
        outputs = model2(x)
        _, predicted = torch.max(outputs, 1)

    all_y.extend(y.numpy())
    all_predictions.extend(predicted.cpu().numpy())

all_y = np.array(all_y)
all_predictions = np.array(all_predictions)

conf_matrix = confusion_matrix(all_y, all_predictions, normalize = "true")

print(conf_matrix.round(decimals=2))

In [None]:
classes =val_dataset.classes

plt.figure(figsize=(6, 4))
plt.imshow(conf_matrix)
plt.title("Confusion matrix CNN Network")
plt.colorbar()

tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)


plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

plt.show()

In [None]:
def generate_saliency_map(model, X):

    model.eval()

    saliency_map = []

    for i in range(X.size(0)):

      x = copy.copy(X[i:i+1])  # we create a copy of the image
      x = x.to(device)
      x.requires_grad = True

      output = model(x).squeeze()

      _, target_class = torch.max(output, -1) # we get the predict class (similar to argmax)

      model.zero_grad()
      output[target_class].backward()

      gradients = x.grad

      map, _ = torch.max(gradients.abs().squeeze(), dim=0) # we get the maximum across channels

      saliency_map.append(map.cpu().detach().numpy())
      saliency_map[i] = (saliency_map[i]- saliency_map[i].min())/(saliency_map[i].max() - saliency_map[i].min())  # we scale saliency map in range [0,1]
      x.requires_grad = False

    return np.array(saliency_map)

In [None]:
examples = enumerate(train_loader)
batch_idx, (example_data, example_targets) = next(examples)

In [None]:
saliency_map = generate_saliency_map(model2, example_data)

In [None]:
from matplotlib import colormaps

num_rows = 4
num_cols = 4

colormap = colormaps["hot"] #select a colormap

fig, axes = plt.subplots(num_rows, num_cols, figsize=(8, 6))

for i in range(num_rows):
  for j in range(num_cols):

    img = ((example_data[4*i + j].swapaxes(0,1).swapaxes(1,2)*0.5 + 0.5)* 255).detach().numpy().astype(np.uint8)
    sm = colormap(saliency_map[4*i + j]) # Apply colormap to saliency map

    axes[i, j].imshow(transforms.ToPILImage()(sm))

    axes[i,j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(num_rows, num_cols, figsize=(8, 6))

for i in range(num_rows):
  for j in range(num_cols):

    img = ((example_data[4*i + j].swapaxes(0,1).swapaxes(1,2)*0.5 + 0.5)* 255).detach().numpy().astype(np.uint8)
    sm = colormap(saliency_map[4*i + j])[:, :, :3]  # Apply colormap to saliency map

    axes[i, j].imshow(Image.blend(Image.fromarray(img), transforms.ToPILImage()(sm), alpha = 0.4))




    axes[i,j].axis('off')

plt.tight_layout()
plt.show()

### saving the model

In [None]:
# import torch
# import os

# # make sure checkpoint folder exists
# os.makedirs("/content/.checkpoints", exist_ok=True)

# # save model weights
# torch.save(
#     model2.state_dict(),
#     "/content/.checkpoints/cars_cnn_model.pth"
# )

# print("✅ Model saved successfully")


In [None]:
import torch
import os

# Define save directory
save_dir = "/content/drive/MyDrive/Image Project/models"
os.makedirs(save_dir, exist_ok=True)

# Full save path
model_path = os.path.join(save_dir, "cars_cnn_model.pth")

# Save model weights
torch.save(model2.state_dict(), model_path)

print(f"✅ Model saved at: {model_path}")


In [None]:
model2 = CNN().to(device)

model2.load_state_dict(
    torch.load(
        "/content/drive/MyDrive/Image Project/models/cars_cnn_model.pth",
        map_location=device
    )
)

model2.eval()


Load the image TEST a single image

In [None]:
import cv2
import matplotlib.pyplot as plt

img_path = "/content/drive/MyDrive/Image Project/Cars Dataset/test/Audi/1008.jpg"

img_bgr = cv2.imread(img_path)
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)
plt.axis("off")
plt.show()


Define the SAME transform used in training

In [None]:
from torchvision import transforms
from PIL import Image
import torch

n = 64  # same as training

test_transform = transforms.Compose([
    transforms.Resize((n, n)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5, 0.5, 0.5],
        std=[0.5, 0.5, 0.5]
    )
])


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model2 = CNN().to(device)
model2.load_state_dict(
    torch.load(
        "/content/drive/MyDrive/Image Project/models/cars_cnn_model.pth",
        map_location=device
    )
)

model2.eval()


In [None]:
import torch.nn.functional as F

# class names must match ImageFolder order
class_names = train_dataset.classes

def predict_image_pytorch(image_path, model, class_names):
    img = Image.open(image_path).convert("RGB")
    img = test_transform(img)
    img = img.unsqueeze(0).to(device)  # add batch dimension

    with torch.no_grad():
        outputs = model(img)          # logits
        probs = F.softmax(outputs, dim=1)
        conf, pred = torch.max(probs, 1)

    predicted_class = class_names[pred.item()]
    confidence = conf.item() * 100

    # show image
    plt.imshow(Image.open(image_path))
    plt.axis("off")
    plt.title(f"Predicted: {predicted_class} ({confidence:.2f}%)")
    plt.show()

    return predicted_class, confidence


In [None]:
predicted_class, confidence = predict_image_pytorch(
    img_path,
    model2,
    class_names
)

print(f"Predicted Class: {predicted_class}")
print(f"Confidence: {confidence:.2f}%")
