<a href="https://colab.research.google.com/github/kaiyiyu/MachinePerception/blob/main/ImageClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary packages

# General
import os
import glob
import random
import cv2 as cv
import numpy as np
import time

# Metrics and Clustering (Scikit-learn)
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.model_selection import train_test_split

# Plotting and Visualization
import matplotlib.pyplot as plt

# Image and Spatial Processing (SciPy)
from scipy import ndimage
from scipy.spatial import distance
from collections import Counter

# Deep Learning (PyTorch and torchvision)
import torch
import torch.optim as optim
import torchvision
import torchvision.models as models
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Image Handling
from PIL import Image

# Deep Learning Utilities (d2l)
!pip install d2l==1.0.3
from d2l import torch as d2l

In [None]:
# Code to connect Google Drive with Google Colab
from google.colab import drive
drive.mount('/content/drive/')

# **TASK 1: Data Preparation**

Splitting data into folders code adapted from https://aravinda-gn.medium.com/how-to-split-image-dataset-into-train-validation-and-test-set-5a41c48af332

In [None]:
path = '/content/drive/MyDrive/Machine Perception/assignment'

for filename in glob.glob(os.path.join(path, 'digits.png')):
    # Similar to pracs, simply for testing
    print("\n *********** IMAGE " + filename.split('/')[-1] + " ************ \n")
    img = cv.imread(filename)
    print("The image file name is: ", os.path.basename(filename))
    height, width, channels = img.shape
    print("The image height is: ", height)
    print("The image width is: ", width)
    tiny_images = []
    labels = []

    # Extract tiny images of 20x20 from digits.png
    for i in range(0, img.shape[0], 20):
        for j in range(0, img.shape[1], 20):
            tiny_image = img[i:i+20, j:j+20]
            tiny_images.append(tiny_image)
            labels.append(i // (20 * 5))  # assuming each 5 rows corresponds to a different digit

    # Shuffle and split the images and labels together
    # 50 rows x 100 columns = 5000 tiny images with 5 rows for each digit
    combined = list(zip(tiny_images, labels))
    random.shuffle(combined)
    tiny_images[:], labels[:] = zip(*combined)
    split_idx = int(0.8 * len(tiny_images))
    train_images = tiny_images[:split_idx]
    train_labels = labels[:split_idx]
    test_images = tiny_images[split_idx:]
    test_labels = labels[split_idx:]

    base_path = '/content/drive/MyDrive/Machine Perception/assignment'
    train_folder_path = os.path.join(base_path, 'Train')
    test_folder_path = os.path.join(base_path, 'Test')

    # Check if split already done previously
    if not os.listdir(train_folder_path) and not os.listdir(test_folder_path):
        os.makedirs(train_folder_path, exist_ok=True)
        os.makedirs(test_folder_path, exist_ok=True)

        # Split into test and train folder with labeling
        for idx, (image, label) in enumerate(zip(train_images, train_labels)):
            cv.imwrite(f'{train_folder_path}/train_{label}_{idx}.png', image)

        for idx, (image, label) in enumerate(zip(test_images, test_labels)):
            cv.imwrite(f'{test_folder_path}/test_{label}_{idx}.png', image)
    else:
        print("Train and Test folders already contain images. Skipping saving process.")

In [None]:
filename = os.path.join(path, 'digits.png')

img = cv.imread(filename)
print("Image shape: ", img.shape)
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# Split the image into 5000 cells, each 20x20 size
cells = [np.hsplit(row, 100) for row in np.vsplit(gray, 50)]
# Transform into a numpy array with size (50,100,20,20)
x = np.array(cells)

Image shape:  (1000, 2000, 3)


# **TASK 2: K-Nearest Neighbour**
This code was adapted from the template in Practical 5 Exercise 3 https://drive.google.com/file/d/18qFWCetJolvJWRXm_YI4KUyEaEtc_4S-/view?usp=sharing and OpenCV KNN tutorial https://docs.opencv.org/4.x/d5/d26/tutorial_py_knn_understanding.html


**Multi-class Classification**

In [None]:
# TRAINING ACCURACY

# Prepare training data and test data
train = x[:, :5].reshape(-1, 400).astype(np.float32)  # Size should be (250,400)
test = x[:, 5:100].reshape(-1, 400).astype(np.float32)  # Size should be (4750,400)

# Create labels for train and test data
c = np.arange(10)  # 10 classes, 0-9
train_labels = np.repeat(c, 25)[:, np.newaxis]  # Should create 250 labels
test_labels = np.repeat(c, 475)[:, np.newaxis]  # Should create 4750 labels

# Initialize KNN
knn = cv.ml.KNearest_create()

# Train KNN model on training data
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)

# Specify the number of neighbors in k-NN
k_neighbours = 3 # Odd values for majority voting

# Test KNN model on test data
ret, result, neighbours, dist = knn.findNearest(train, k=k_neighbours)

print("\nResults for k = ", k_neighbours)

# Evaluate model's accuracy
matches = result == train_labels
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print(f"\nAverage Accuracy: {accuracy:.2f}%")

# Individual accuracies
accuracies = []
for digit in range(10):
    digit_indices = np.where(train_labels == digit)
    true_labels_digit = train_labels[digit_indices]
    predicted_labels_digit = result[digit_indices]

    accuracy = accuracy_score(true_labels_digit, predicted_labels_digit) * 100
    accuracies.append(accuracy)

    print(f"Accuracy for digit {digit}: {accuracy:.2f}%")

# Compute the confusion matrix
matrix = confusion_matrix(train_labels.ravel(), result.ravel())
print("\nConfusion Matrix:")
print(matrix)

In [None]:
# TESTING ACCURACY

# Prepare training data and test data
train = x[:, :5].reshape(-1, 400).astype(np.float32)  # Size should be (250,400)
test = x[:, 5:100].reshape(-1, 400).astype(np.float32)  # Size should be (4750,400)

# Create labels for train and test data
c = np.arange(10)  # 10 classes, 0-9
train_labels = np.repeat(c, 25)[:, np.newaxis]  # Should create 250 labels
test_labels = np.repeat(c, 475)[:, np.newaxis]  # Should create 4750 labels

# Initialize KNN
knn = cv.ml.KNearest_create()

# Train KNN model on training data
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)

# Specify the number of neighbors in k-NN
k_neighbours = 1 # Odd values for majority voting

# Test KNN model on test data
ret, result, neighbours, dist = knn.findNearest(test, k=k_neighbours)

print("\nResults for k = ", k_neighbours)

# Evaluate model's accuracy
matches = result == test_labels
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print(f"\nAverage Accuracy: {accuracy:.2f}%")

# Individual accuracies
accuracies = []
for digit in range(10):
    digit_indices = np.where(test_labels == digit)
    true_labels_digit = test_labels[digit_indices]
    predicted_labels_digit = result[digit_indices]

    accuracy = accuracy_score(true_labels_digit, predicted_labels_digit) * 100
    accuracies.append(accuracy)

    print(f"Accuracy for digit {digit}: {accuracy:.2f}%")

# Compute the confusion matrix
matrix = confusion_matrix(test_labels.ravel(), result.ravel())
print("\nConfusion Matrix:")
print(matrix)

**Binary Classification** (similar to practicals)

In [None]:
# Prepare training data and test data
train = x[0:10,:5].reshape(-1,400).astype(np.float32) # Size = (500,400) ; first 5 rows: 0, second 5 row: 1  Use 5% (first 5 columns for trainning)
test = x[0:10,5:100].reshape(-1,400).astype(np.float32) # Size = (500,400)

# Create labels for train and test data
c = np.arange(2)  ## 2 classes
train_labels = np.repeat(c,25)[:,np.newaxis]
test_labels = np.repeat(c,475)[:,np.newaxis]

# Initialize KNN
knn = cv.ml.KNearest_create()

# Train KNN model on training data
knn.train(train, cv.ml.ROW_SAMPLE, train_labels)

# Specify the number of neighbors in k-NN
k_neighbours = 1 # Odd values for majority voting

# Test KNN model on test data
ret, result, neighbours, dist = knn.findNearest(test, k=k_neighbours)

print("\nResults for k = ", k_neighbours)

# Evaluate model's accuracy
matches = result == test_labels
correct = np.count_nonzero(matches)
accuracy = correct * 100.0 / result.size
print(f"\nAverage Accuracy: {accuracy:.2f}%")

# Individual accuracies
accuracies = []
for digit in range(2):
    digit_indices = np.where(test_labels == digit)
    true_labels_digit = test_labels[digit_indices]
    predicted_labels_digit = result[digit_indices]

    accuracy = accuracy_score(true_labels_digit, predicted_labels_digit) * 100
    accuracies.append(accuracy)

    print(f"Accuracy for digit {digit}: {accuracy:.2f}%")

# Compute the confusion matrix
matrix = confusion_matrix(test_labels.ravel(), result.ravel())
print("\nConfusion Matrix:")
print(matrix)

# **TASK 3: Linear SVM**
This code was adapted from the template in Practical 5 Exercise 3 https://drive.google.com/file/d/18qFWCetJolvJWRXm_YI4KUyEaEtc_4S-/view?usp=sharing and OpenCV SVM tutorial https://docs.opencv.org/4.x/d1/d73/tutorial_introduction_to_svm.html

In [None]:
# Assuming data has been prepared as Task 2

# One-vs-Rest SVM with varying C values

# Prepare training data and test data
# train = x[:, :5].reshape(-1, 400).astype(np.float32)  # Size should be (250,400)
# test = x[:, 5:100].reshape(-1, 400).astype(np.float32)  # Size should be (4750,400)

# # Create labels for train and test data
# c = np.arange(10)  # 10 classes, 0-9
# train_labels = np.repeat(c, 25)[:, np.newaxis]  # Should create 250 labels
# test_labels = np.repeat(c, 475)[:, np.newaxis]  # Should create 4750 labels

# Define C values for SVM hyperparameter tuning
C_values = [0.001, 0.01, 0.1, 1, 10, 100, 1000] # logarithmic

# Initialize list to store accuracy for each C value
accuracies = []

# Loop through each C value
for C in C_values:
    svms = []
    # Train an SVM for each digit
    for i in range(10):
        svm = cv.ml.SVM_create()
        svm.setType(cv.ml.SVM_C_SVC)
        svm.setKernel(cv.ml.SVM_LINEAR)
        svm.setC(C)
        svm.setTermCriteria((cv.TERM_CRITERIA_MAX_ITER, 100, 1e-8))

        labels = np.where(train_labels == i, 1, -1)
        svm.train(train, cv.ml.ROW_SAMPLE, labels)
        svms.append(svm)

    # Initiliaze list to store predictions
    predictions = []
    for sample in test:
        distances = []
        for svm in svms:
            _, r = svm.predict(sample.reshape(1, 400))
            distances.append(r[0][0])
        # Classify digit based on highest decision function value
        predictions.append(np.argmax(distances))

    # Calculate accuracy
    correct = np.count_nonzero(np.array(predictions) == test_labels.ravel())
    accuracy = correct * 100.0 / len(predictions)
    accuracies.append(accuracy)
    print(f"\nFor C = {C}, Average Accuracy: {accuracy:.2f}%")

    # Compute the confusion matrix
    matrix = confusion_matrix(test_labels.ravel(), result.ravel())
    print("\nConfusion Matrix:")
    print(matrix)

# # Plot accuracies against different C values
# plt.figure(figsize=(10,6))
# plt.semilogx(C_values, accuracies, marker='o', linestyle='-')
# plt.title('Average Accuracy vs. C values')
# plt.xlabel('C value (log scale)')
# plt.ylabel('Accuracy (%)')
# plt.grid(True, which="both", ls="--")
# plt.show()

# **TASK 4: Bag of Visual Words**
This code was adapted from the following template https://medium.com/@aybukeyalcinerr/bag-of-visual-words-bovw-db9500331b2f

In [None]:
# Step 1: Load train and test images into dictionaries.
def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        img = cv.imread(img_path, 0)    # grayscale

        # Apply histogram equalization
        img = cv.equalizeHist(img)

        if img is not None:
            # Extract label from filename, assuming filename format is: [label]_[index].png
            label = filename.split('_')[1]

            if label not in images:
                images[label] = []
            images[label].append(img)
    return images

train_images = load_images_from_folder(train_folder_path)
test_images = load_images_from_folder(test_folder_path)

In [None]:
# Step 2: Extract local features from images using SIFT.
def sift_features(images, nfeatures=150):
    sift_vectors = {}
    descriptor_list = []
    sift = cv.xfeatures2d.SIFT_create(nfeatures=nfeatures)

    for label, img_list in images.items():
        features = []
        for img in img_list:
            kp, des = sift.detectAndCompute(img, None)
            if des is not None:  # Check if descriptors are found
                descriptor_list.extend(des)
                features.append(des)
        sift_vectors[label] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(train_images, nfeatures=150)
# Takes the descriptor list which is unordered one
descriptor_list = sifts[0]
# Takes the sift features that is seperated class by class for train data
all_bovw_feature = sifts[1]
# Takes the sift features that is seperated class by class for test data
test_bovw_feature = sift_features(test_images, nfeatures=100)[1]

In [None]:
# Step 3: Send the visual dictionary to the k-means clustering algorithm and find the visual words which are center points.
def kmeans(k, descriptor_list):
    kmeans = KMeans(n_clusters = k, n_init=10)
    kmeans.fit(descriptor_list)
    visual_words = kmeans.cluster_centers_
    return visual_words

# Takes the central points which is visual words
visual_words = kmeans(600, descriptor_list)

In [None]:
# Step 4: Create histograms for both test and train images
def find_index(image, center):
    count = 0
    ind = 0

    for i in range(len(center)):
        if(i == 0):
           count = distance.euclidean(image, center[i])
        else:
            dist = distance.euclidean(image, center[i])
            if(dist < count):
                ind = i
                count = dist
    return ind

def image_class(all_bovw, centers):
    dict_feature = {}
    for key, value in all_bovw.items():
        category = []
        for img in value:
            histogram = np.zeros(len(centers))
            if img is not None:  # Check if descriptors are found
                for each_feature in img:
                    ind = find_index(each_feature, centers)
                    histogram[ind] += 1
            # Normalize the histogram
            histogram = histogram / histogram.sum()
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

bovw_train = image_class(all_bovw_feature, visual_words)
bovw_test = image_class(test_bovw_feature, visual_words)

In [None]:
# Step 5: Predict classes of the test images with k-NN function.
def knn(images, tests, k=1):
  num_test = 0
  correct_predict = 0
  class_based = {}

  for test_key, test_val in tests.items():
      class_based[test_key] = [0, 0] # [correct, all]
      for tst in test_val:
          distances = []
          for train_key, train_val in images.items():
              for train in train_val:
                  dist = distance.euclidean(tst, train)
                  distances.append((train_key, dist))

          # Sort by distance and get the labels of the k-nearest training samples
          votes = [i[0] for i in sorted(distances, key=lambda x: x[1])[:k]]

          # Get the most common class among the neighbors
          vote_result = Counter(votes).most_common(1)[0][0]

          if(test_key == vote_result):
              correct_predict += 1
              class_based[test_key][0] += 1
          num_test += 1
          class_based[test_key][1] += 1

  return [num_test, correct_predict, class_based]

# Call the knn function
results_bowl = knn(bovw_train, bovw_test, k=9)

# Step 6: Calculate the accuracy
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("\nClass based accuracies: \n")

    # Ensure keys are strings representing integers, then sort
    sorted_keys = sorted(results[2].keys(), key=lambda x: int(x))

    for key in sorted_keys:
        value = results[2][key]
        acc = (value[0] / value[1]) * 100
        print(key + " : %" + str(acc))

# Calculates the accuracies and write the results to the console.
accuracy(results_bowl)

Average accuracy: %71.41316073354909

Class based accuracies: 

0 : %96.5909090909091
1 : %97.05882352941177
2 : %61.111111111111114
3 : %73.25581395348837
4 : %78.26086956521739
5 : %59.78260869565217
6 : %54.54545454545454
7 : %61.111111111111114
8 : %83.72093023255815
9 : %63.0


In [None]:
from sklearn import svm
# Step 5: Predict classes of the test images with SVM.
X_train = []
y_train = []

for label, histograms in bovw_train.items():
    for histogram in histograms:
        X_train.append(histogram)
        y_train.append(label)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05, random_state=10)

# Train an SVM classifier
clf = svm.SVC()
clf.fit(X_train, y_train)

# Validate the classifier
y_pred = clf.predict(X_val)
print(classification_report(y_val, y_pred))

# **TASK 5: Convolutional Neural Networks**
This code was adapted from the template from Practical 7 https://colab.research.google.com/github/d2l-ai/d2l-pytorch-colab/blob/master/chapter_computer-vision/fine-tuning.ipynb and Pytorch documentation on pre-trained models https://pytorch.org/vision/stable/models.html

Custom Dataset (DigitsDataset) code adapted from Pytorch data loading tutorial https://pytorch.org/tutorials/beginner/data_loading_tutorial.html


**ResNet MODELS**

In [None]:
# Create custom dataset for existing data to work with Dataloader and ResNet architecture
class DigitsDataset(Dataset):
    def __init__(self, path, transform=None):
        self.path = path
        self.transform = transform
        self.filenames = os.listdir(path)

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.path, self.filenames[idx])
        image = cv.imread(img_name)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        label = int(self.filenames[idx].split('_')[1])
        if self.transform:
            image = self.transform(image)
        return image, label

# Data transformations and normalization
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = DigitsDataset(train_folder_path, transform=transform)
test_dataset = DigitsDataset(test_folder_path, transform=transform)

# Number of epochs declared here to use later for time
number_epochs = 5

# Fine tuning method
def resnet_fine_tuning(net, learning_rate, batch_size=256, num_epochs=number_epochs, param_group=True):
    # Data loaders
    train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Device configuration
    devices = [torch.device('cuda' if torch.cuda.is_available() else 'cpu')]
    net = net.to(devices[0])

    # Loss function
    loss = nn.CrossEntropyLoss(reduction="none")

    # Optimizer setup
    if param_group:
        params_1x = [param for name, param in net.named_parameters() if name not in ["fc.weight", "fc.bias"]]
        trainer = torch.optim.SGD([{'params': params_1x},
                                   {'params': net.fc.parameters(), 'lr': learning_rate * 10}],
                                  lr=learning_rate, weight_decay=0.001)
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.001)

    # Training
    d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices)

In [None]:
# Initialize and modify the pre-trained ResNet-18
finetune_resnet18 = models.resnet18(pretrained=True)
finetune_resnet18.fc = nn.Linear(finetune_resnet18.fc.in_features, 10)  # 10 classes
nn.init.xavier_uniform_(finetune_resnet18.fc.weight)

# Initialize start time for calculating time
start_time = time.time()

# Train the model
resnet_fine_tuning(finetune_resnet18, 5e-5)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / number_epochs

# Print time results
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per epoch: {avg_time_epoch:.2f} seconds")

In [None]:
# Initialize and modify pre-trained ResNet-34
finetune_resnet34 = torchvision.models.resnet34(pretrained=True)
finetune_resnet34.fc = nn.Linear(finetune_resnet34.fc.in_features, 10) # 10 classes
nn.init.xavier_uniform_(finetune_resnet34.fc.weight);

# Initialize start time for calculating time
start_time = time.time()

# Train the model
resnet_fine_tuning(finetune_resnet34, 5e-5)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / number_epochs

# Print time results
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per epoch: {avg_time_epoch:.2f} seconds")

In [None]:
# Initialize and modify pre-trained ResNet-50
finetune_resnet50 = torchvision.models.resnet50(pretrained=True)
finetune_resnet50.fc = nn.Linear(finetune_resnet50.fc.in_features, 10) # 10 classes
nn.init.xavier_uniform_(finetune_resnet50.fc.weight);

# Initialize start time for calculating time
start_time = time.time()

# Train the model
resnet_fine_tuning(finetune_resnet50, 5e-5)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / number_epochs

# Print time results
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per epoch: {avg_time_epoch:.2f} seconds")

**Alex and VGG Net**

In [None]:
# Create custom dataset for existing data to work with Dataloader and Alex/VGG Net architecture.
class DigitsDataset(Dataset):
    def __init__(self, path, transform=None):
        self.path = path
        self.transform = transform
        self.filenames = os.listdir(path)

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.path, self.filenames[idx])
        image = cv.imread(img_name)
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        image = Image.fromarray(image)  # Convert to PIL Image
        label = int(self.filenames[idx].split('_')[1])
        if self.transform:
            image = self.transform(image)
        return image, label

# Data transformations and normalization
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to fit Alex/VGG Net input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = DigitsDataset(train_folder_path, transform=transform)
test_dataset = DigitsDataset(test_folder_path, transform=transform)

# Number of epochs declared here to use later for time
number_epochs = 5

# Fine tuning method
def alexvgg_fine_tuning(net, learning_rate, batch_size=32, num_epochs=number_epochs, param_group=True):
    # Data loaders
    train_iter = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_iter = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # Device configuration
    devices = [torch.device('cuda' if torch.cuda.is_available() else 'cpu')]
    net = net.to(devices[0])

    # Loss function
    loss = nn.CrossEntropyLoss(reduction="none")

    # Optimizer setup
    if param_group:
        params_1x = [param for name, param in net.named_parameters() if "classifier.6" not in name]
        trainer = torch.optim.SGD([{'params': params_1x},
                                   {'params': net.classifier[6].parameters(), 'lr': learning_rate * 10}],
                                  lr=learning_rate, weight_decay=0.001)
    else:
        trainer = torch.optim.SGD(net.parameters(), lr=learning_rate, weight_decay=0.001)

    # Training
    d2l.train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices)

In [None]:
# Initialize and modify the pre-trained AlexNet
finetune_alexnet = models.alexnet(pretrained=True)
finetune_alexnet.classifier[6] = nn.Linear(finetune_alexnet.classifier[6].in_features, 10)  # 10 classes
nn.init.xavier_uniform_(finetune_alexnet.classifier[6].weight)

# Initialize start time for calculating time
start_time = time.time()

# Train the model
alexvgg_fine_tuning(finetune_alexnet, 5e-5)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / number_epochs

# Print time results
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per epoch: {avg_time_epoch:.2f} seconds")

In [None]:
# Initialize and modify pre-trained VGG Net
finetune_vggnet = models.vgg16(pretrained=True)
finetune_vggnet.classifier[6] = nn.Linear(finetune_vggnet.classifier[6].in_features, 10)  # 10 classes
nn.init.xavier_uniform_(finetune_vggnet.classifier[6].weight)

# Initialize start time for calculating time
start_time = time.time()

# Train the model
alexvgg_fine_tuning(finetune_vggnet, 5e-5)

# Calculate total training time
total_time = time.time() - start_time

# Calculate average training time of one epoch
avg_time_epoch = total_time / number_epochs

# Print time results
print(f"Total time: {total_time:.2f} seconds")
print(f"Average time per epoch: {avg_time_epoch:.2f} seconds")