<a href="https://colab.research.google.com/github/mithunkumarsr/LearnComputerVisionWithMithun/blob/main/CV9_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install torch torchvision matplotlib numpy scikit-learn




In [2]:
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split

# Transform to normalize and convert to tensors
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Download CIFAR-10 dataset
dataset = CIFAR10(root='./data', train=True, transform=transform, download=True)
test_set = CIFAR10(root='./data', train=False, transform=transform, download=True)

# Split dataset into training and validation
train_set, val_set = random_split(dataset, [40000, 10000])

# Data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:02<00:00, 80.9MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Nearest neighbour


In [3]:
import numpy as np

# Reduce dataset size for simplicity
small_train_set, _ = random_split(train_set, [1000, len(train_set) - 1000])
small_test_set, _ = random_split(test_set, [100, len(test_set) - 100])


In [4]:
from sklearn.metrics import accuracy_score

def nearest_neighbor(train_data, train_labels, test_data):
    predictions = []
    for test_img in test_data:
        distances = np.linalg.norm(train_data - test_img, axis=1)
        nearest_index = np.argmin(distances)
        predictions.append(train_labels[nearest_index])
    return np.array(predictions)

# Prepare data
train_data = np.array([img[0].numpy().flatten() for img, _ in small_train_set])
train_labels = np.array([label for _, label in small_train_set])
test_data = np.array([img[0].numpy().flatten() for img, _ in small_test_set])
test_labels = np.array([label for _, label in small_test_set])

# Predict using Nearest Neighbor
predictions = nearest_neighbor(train_data, train_labels, test_data)
print("Nearest Neighbor Accuracy:", accuracy_score(test_labels, predictions))


Nearest Neighbor Accuracy: 0.2


Linear Classifier

In [5]:
import torch
import torch.nn as nn

class LinearClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(LinearClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

model = LinearClassifier(32 * 32 * 3, 10)


In [6]:
import torch.optim as optim

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(5):
    total_loss = 0
    for images, labels in train_loader:
        images = images.view(images.size(0), -1)  # Flatten the images
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 1157.1256
Epoch 2, Loss: 1101.5522
Epoch 3, Loss: 1083.0865
Epoch 4, Loss: 1071.8369
Epoch 5, Loss: 1063.3982


In [7]:
def evaluate(model, data_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.view(images.size(0), -1)  # Flatten
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

print("Validation Accuracy:", evaluate(model, val_loader))
print("Test Accuracy:", evaluate(model, test_loader))


Validation Accuracy: 0.4078
Test Accuracy: 0.404


# Student Tasks


**Nearest Neighbor Classifier: **

Modify the nearest neighbor implementation to use a weighted distance metric.
Experiment with different sizes of the training dataset and observe accuracy changes.


**Linear Classifier:**

Add L2 regularization (weight decay) to the optimizer.
Increase the number of epochs and learning rate to see how accuracy improves.
Explore More:

Visualize a few misclassified images with their predicted and actual labels.
Implement a softmax-based probability score visualization for a few test images.

In [8]:
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

# Load CIFAR-10 dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
dataset = datasets.CIFAR10(root='./data', train=True, transform=transform, download=True)

# Split dataset
train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_set, val_set, test_set = random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = DataLoader(test_set, batch_size=64, shuffle=False)


Files already downloaded and verified


In [9]:
import numpy as np
from sklearn.metrics import accuracy_score

# Prepare data
train_data = np.array([img[0].numpy().flatten() for img, _ in train_set])
train_labels = np.array([label for _, label in train_set])
test_data = np.array([img[0].numpy().flatten() for img, _ in test_set])
test_labels = np.array([label for _, label in test_set])

# Compute Euclidean distance
def knn_predict(train_data, train_labels, test_data, k=3):
    predictions = []
    for test_img in test_data:
        distances = np.linalg.norm(train_data - test_img, axis=1)
        nearest_indices = np.argsort(distances)[:k]
        nearest_labels = train_labels[nearest_indices]
        predictions.append(np.bincount(nearest_labels).argmax())
    return predictions

# Predict and evaluate
predictions = knn_predict(train_data[:1000], train_labels[:1000], test_data[:100], k=3)
print("kNN Accuracy:", accuracy_score(test_labels[:100], predictions))


kNN Accuracy: 0.19


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

# Linear classifier model
class LinearClassifier(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(LinearClassifier, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

# Initialize model
model = LinearClassifier(32 * 32 * 3, 10)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(5):
    total_loss = 0
    for images, labels in train_loader:
        images = images.view(images.size(0), -1)  # Flatten
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


Epoch 1, Loss: 879.5060
Epoch 2, Loss: 834.2870
Epoch 3, Loss: 818.8974
Epoch 4, Loss: 809.1496
Epoch 5, Loss: 802.3449


In [11]:
# Example: Tuning k for kNN
k_values = [1, 3, 5, 7]
accuracies = []
for k in k_values:
    predictions = knn_predict(train_data[:1000], train_labels[:1000], test_data[:100], k=k)
    acc = accuracy_score(test_labels[:100], predictions)
    accuracies.append(acc)

print("Accuracies for k values:", dict(zip(k_values, accuracies)))


Accuracies for k values: {1: 0.22, 3: 0.19, 5: 0.16, 7: 0.2}


In [None]:
import matplotlib.pyplot as plt

# Get misclassified images
predictions = knn_predict(train_data[:1000], train_labels[:1000], test_data[:100], k=3)
misclassified = [(img, pred, true) for img, pred, true in zip(test_data, predictions, test_labels[:100]) if pred != true]

# Display a few misclassified examples
for i, (img, pred, true) in enumerate(misclassified[:5]):
    plt.imshow(img.reshape(32, 32, 3))
    plt.title(f"Predicted: {pred}, True: {true}")
    plt.show()


In [None]:
# Compare kNN and Linear Classifier
print("kNN Accuracy:", accuracy_score(test_labels[:100], knn_predict(train_data[:1000], train_labels[:1000], test_data[:100], k=3)))
print("Linear Classifier Accuracy:", evaluate(model, test_loader))


In [None]:
from sklearn.decomposition import PCA
import seaborn as sns

# Reduce data to 2D using PCA
pca = PCA(n_components=2)
reduced_train_data = pca.fit_transform(train_data[:1000])
reduced_test_data = pca.transform(test_data[:100])

# Visualize decision boundaries for kNN
sns.scatterplot(x=reduced_train_data[:, 0], y=reduced_train_data[:, 1], hue=train_labels[:1000], palette="tab10")
plt.title("kNN Decision Boundaries")
plt.show()
