In [1]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118



Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torchaudio
  Using cached https://download.pytorch.org/whl/cu118/torchaudio-2.5.1%2Bcu118-cp312-cp312-win_amd64.whl (4.0 MB)
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.5.1%2Bcu118-cp312-cp312-win_amd64.whl (2700.1 MB)
     ---------------------------------------- 0.0/2.7 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.7 GB 4.7 MB/s eta 0:09:38
     ---------------------------------------- 0.0/2.7 GB 5.2 MB/s eta 0:08:42
     ---------------------------------------- 0.0/2.7 GB 5.0 MB/s eta 0:08:55
     ---------------------------------------- 0.0/2.7 GB 5.6 MB/s eta 0:08:00
     ---------------------------------------- 0.0/2.7 GB 5.5 MB/s eta 0:08:07
     ---------------------------------------- 0.0/2.7 GB 6.0 MB/s eta 0:07:28
     ---------------------------------------- 0.0/2.7 GB 5.8 MB/s eta 0:07:45
     ---------------------------------------- 0.0/2.7 GB 5.8

In [2]:
import torch

# Check if CUDA is available
print("CUDA Available:", torch.cuda.is_available())

# Print the current device being used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


CUDA Available: False
Using device: cpu


In [3]:
# Import necessary libraries
import torch  # PyTorch library for building and training neural networks
import torch.nn as nn  # Contains basic building blocks for neural networks
import torch.optim as optim  # Optimizers for training models
import torch.nn.functional as F  # Functions like ReLU and Softmax
from torch.utils.data import DataLoader  # Data loader utility
from torchvision import datasets, transforms  # For dataset handling and transformations
import matplotlib.pyplot as plt  # For plotting results

# Define the dataset transformation pipeline
# The data is converted to tensors and normalized to have mean=0.5 and std=0.5 for each channel.
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the images
])

# Load the CIFAR-10 dataset
# CIFAR-10 is a collection of 60,000 32x32 color images across 10 classes.
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# DataLoader wraps the dataset and allows efficient batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the base CNN model for image classification
class BaseCNN(nn.Module):
    def __init__(self):
        super(BaseCNN, self).__init__()
        # Define layers: Two convolutional layers followed by a fully connected layer
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)  # Convolution with 16 filters, kernel size 3
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)  # Convolution with 32 filters, kernel size 3
        self.pool = nn.MaxPool2d(2, 2)  # Max pooling to downsample feature maps
        self.fc1 = nn.Linear(32 * 8 * 8, 128)  # Fully connected layer
        self.fc2 = nn.Linear(128, 10)  # Final layer maps to 10 classes

    def forward(self, x):
        # Define forward pass
        x = self.pool(F.relu(self.conv1(x)))  # Conv1 + ReLU + Pooling
        x = self.pool(F.relu(self.conv2(x)))  # Conv2 + ReLU + Pooling
        x = x.view(-1, 32 * 8 * 8)  # Flatten feature maps for fully connected layers
        x = F.relu(self.fc1(x))  # FC1 + ReLU
        x = self.fc2(x)  # Output layer (logits)
        return x

# Instantiate the base model
base_model = BaseCNN()

# Define loss function (cross-entropy) and optimizer (SGD)
criterion = nn.CrossEntropyLoss()  # Suitable for classification tasks
optimizer = optim.SGD(base_model.parameters(), lr=0.01, momentum=0.9)  # SGD with momentum




Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()  # Set model to training mode
    train_losses = []

    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            # Move data to appropriate device (CPU or GPU)
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        epoch_loss = running_loss / len(train_loader)
        train_losses.append(epoch_loss)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.4f}")

    return train_losses

# Evaluation function
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set model to evaluation mode
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():  # Disable gradient computation for evaluation
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)  # Get class with highest probability
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    test_loss /= len(test_loader)
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {accuracy:.2f}%")
    return test_loss, accuracy

# Plotting function
def plot_training_loss(train_losses):
    plt.figure(figsize=(8, 5))
    plt.plot(train_losses, label="Training Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

# Device configuration (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Move model to device
base_model.to(device)

# Train the model
num_epochs = 10
train_losses = train_model(base_model, train_loader, criterion, optimizer, num_epochs)

# Evaluate the model
test_loss, test_accuracy = evaluate_model(base_model, test_loader, criterion)

# Plot training loss
plot_training_loss(train_losses)









Using device: cpu
Epoch 1/10, Loss: 1.6184
Epoch 2/10, Loss: 1.1869
Epoch 3/10, Loss: 0.9814
Epoch 4/10, Loss: 0.8570
Epoch 5/10, Loss: 0.7505
Epoch 6/10, Loss: 0.6658
Epoch 7/10, Loss: 0.5836
Epoch 8/10, Loss: 0.4985
Epoch 9/10, Loss: 0.4215
Epoch 10/10, Loss: 0.3515
Test Loss: 1.1174, Test Accuracy: 68.80%
