# **Library Imports**
Import required libraries for data handling, image manipulation, plotting, machine learning, and model evaluation.

In [None]:
!pip install seaborn

In [None]:
import torch
import h5py
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
import seaborn as sns
import torchvision.transforms as transforms
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import classification_report, roc_curve, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder, label_binarize

# **Load Data**
Read the dataset (bitvehicle_dataset.h5) using h5py and print the total number of images and labels.

In [None]:
with h5py.File('/kaggle/input/vtid2-dataset/vtid2_dataset.h5', 'r') as file:
    images = file['images'][:]
    labels = file['labels'][:]

print('Total number of images: ',len(images))
print('Total number of labels: ',len(labels))

Total number of images:  4356
Total number of labels:  4356


# **Data Exploration**
Print the number of samples for each unique class in the dataset.

In [None]:
unique_classes, class_counts = np.unique(labels, return_counts=True)
for class_label, count in zip(unique_classes, class_counts):
    print(f"Class {class_label}: {count} samples")

Class b'hatchback': 606 samples
Class b'other': 600 samples
Class b'pickup': 1240 samples
Class b'sedan': 1230 samples
Class b'suv': 680 samples


# **Image Resizing and Preprocessing**
Resize images to (224, 224) and preprocess them using ResNet50's preprocess_input function.

In [None]:
resized_images = [Image.fromarray(image).resize((224, 224)) for image in images]
resized_images = np.array([preprocess_input(np.array(image)) for image in resized_images])

# **Data Augmentation Functions**
Define functions for augmenting images (brightness adjustment, flipping, rotating, zooming, and shifting).

In [None]:
def adjust_brightness(image, factor):
    image = image.astype(np.float32)
    augmented_image = image + factor
    augmented_image = np.clip(augmented_image, 0, 255)
    augmented_image = augmented_image.astype(np.uint8)
    return augmented_image

def flip_image(image, flip_code):
    return cv2.flip(image, flip_code)

def rotate_image(image, angle):
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    return cv2.warpAffine(image, M, (cols, rows))

def zoom_image(image, zoom_factor):
    rows, cols = image.shape[:2]
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 0, zoom_factor)
    return cv2.warpAffine(image, M, (cols, rows))

def shift_image(image, dx, dy):
    rows, cols = image.shape[:2]
    M = np.float32([[1, 0, dx], [0, 1, dy]])
    return cv2.warpAffine(image, M, (cols, rows))

# **Data Augmentation Application**
Augment images based on their class labels and collect the augmented images and labels.

In [None]:
augmented_images = []
augmented_labels = []

for img, label in zip(resized_images, labels):
    augmented_img_brightness = adjust_brightness(img, 50)

    augmented_img_flip_horizontal = flip_image(img, 1)
    augmented_img_flip_vertical = flip_image(img, 0)

    augmented_img_rotate = rotate_image(img, 30)

    augmented_img_zoom = zoom_image(img, 1.2)

    augmented_img_shift = shift_image(img, 20, 20)

    augmented_images.extend([
        img,
        augmented_img_brightness,
        augmented_img_flip_horizontal,
        augmented_img_flip_vertical,
        augmented_img_rotate,
        augmented_img_zoom,
        augmented_img_shift
    ])

    augmented_labels.extend([label] * 7)

print('Total number of augmented images: ',len(augmented_images))
print('Total number of augmented labels: ',len(augmented_labels))

Total number of augmented images:  30492
Total number of augmented labels:  30492


# **Augmentation Summary**
Print the number of samples for each class.

In [None]:
unique_classes_aug, class_counts_aug = np.unique(augmented_labels, return_counts=True)
for class_label, count in zip(unique_classes_aug, class_counts_aug):
    print(f"Class {class_label}: {count} samples")

Class b'hatchback': 4242 samples
Class b'other': 4200 samples
Class b'pickup': 8680 samples
Class b'sedan': 8610 samples
Class b'suv': 4760 samples


In [None]:
num_classes = len(np.unique(augmented_labels))
print('Number of classes',num_classes)

Number of classes 5


# **Label Encoding and One-Hot Encoding**
Convert labels to numerical format and one-hot encode the labels.

In [None]:
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(augmented_labels)
label_mapping = {i: label for i, label in enumerate(label_encoder.classes_)}
print(label_mapping)
label = torch.tensor(encoded_labels)
labels = nn.functional.one_hot(label, num_classes=num_classes)
labels = labels.float()

{0: b'hatchback', 1: b'other', 2: b'pickup', 3: b'sedan', 4: b'suv'}


# **Custom dataset class named ImageDataset for PyTorch**

In [None]:
class ImageDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        x = self.data[index]
        y = self.labels[index]

        if self.transform:
            x = self.transform(x)

        return x, y
transform = transforms.Compose([

    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


dataset = ImageDataset(np.array(augmented_images), labels, transform=transform)

# **Model Building**
Setting Up a Pretrained GoogLeNet Model for Classification

In [None]:
model = models.googlenet(pretrained=True)
num_input_features = model.fc.in_features

model.fc = nn.Linear(num_input_features, num_classes)

use_cuda = torch.cuda.is_available()

if use_cuda:
    model.cuda()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# **K-Fold Cross-Validation Setup**
Initialize K-Fold cross-validation (3 folds) and define variables to collect evaluation metrics.

# **Model Training and Evaluation**
For each fold, train the model, make predictions, and evaluate the model's performance using accuracy, precision, recall, F1-score, and confusion matrix.

# **Average Performance Metrics**
Calculate and print the average accuracy, weighted precision, recall, and F1-score across all folds.

In [None]:
learning_rate = 0.0001
epochs = 3
batch_size = 32
k = 3

kf = KFold(n_splits=k, shuffle=True)
accuracy_values = []
weighted_precision_values = []
weighted_recall_values = []
weighted_f1_score_values = []


all_true_labels = []
all_pred_labels = []
confusion_matrices = []

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


for fold, (train_index, test_index) in enumerate(kf.split(dataset), 1):
    print("Fold:", fold)
    train_sampler = torch.utils.data.SubsetRandomSampler(train_index)
    test_sampler = torch.utils.data.SubsetRandomSampler(test_index)

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    test_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for batch_idx, (inputs, labels) in enumerate(train_loader):
          inputs = inputs.to(device)
          labels = labels.to(device)
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()
          running_loss += loss.item()

          print(f"\rEpoch {epoch+1}/{epochs}, Batch {batch_idx+1}/{len(train_loader)}, Loss: {loss.item():.4f}", end='')

        print(f"\rEpoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")

    model.eval()
    y_true = []
    y_pred = []

    for batch_idx, (inputs, labels) in enumerate(test_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = F.softmax(outputs, dim=1)

        y_true.extend(labels.cpu().tolist())
        y_pred.extend(predicted.cpu().tolist())

    print(f"\rEvaluation: Batch {batch_idx+1}/{len(test_loader)}", end='\n')

    y_true_labels = np.argmax(y_true, axis=1)
    y_pred_labels = np.argmax(y_pred, axis=1)

    cm = confusion_matrix(y_true_labels, y_pred_labels)
    report = classification_report(y_true_labels, y_pred_labels, output_dict=True)

    confusion_matrices.append(cm)
    accuracy_values.append(report['accuracy'])
    weighted_precision_values.append(report['weighted avg']['precision'])
    weighted_recall_values.append(report['weighted avg']['recall'])
    weighted_f1_score_values.append(report['weighted avg']['f1-score'])

    all_true_labels.extend(y_true_labels)
    all_pred_labels.extend(y_pred)


avg_accuracy = np.mean(accuracy_values)
avg_weighted_precision = np.mean(weighted_precision_values)
avg_weighted_recall = np.mean(weighted_recall_values)
weighted_f1_score_values = np.mean(weighted_f1_score_values)

print('Average accuracy:', avg_accuracy)
print('Average weighted precision:', avg_weighted_precision)
print('Average weighted recall:', avg_weighted_recall)
print('Average weighted f1_score:', weighted_f1_score_values)

Fold: 1
Epoch 1/3, Loss: 0.19766, Loss: 0.0173
Epoch 2/3, Loss: 0.01086, Loss: 0.2468
Epoch 3/3, Loss: 0.01266, Loss: 0.0014
Evaluation: Batch 318/318
Fold: 2
Epoch 1/3, Loss: 0.00676, Loss: 0.0031
Epoch 2/3, Loss: 0.00906, Loss: 0.0006
Epoch 3/3, Loss: 0.00616, Loss: 0.0216
Evaluation: Batch 318/318
Fold: 3
Epoch 1/3, Loss: 0.00506, Loss: 0.0039
Epoch 2/3, Loss: 0.00506, Loss: 0.0002
Epoch 3/3, Loss: 0.00696, Loss: 0.0013
Evaluation: Batch 318/318
Average accuracy: 0.9991473173291355
Average weighted precision: 0.9991486530034693
Average weighted recall: 0.9991473173291355
Average weighted f1_score: 0.999146552037773
