In [9]:
#Import Libararies
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as trans
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader, random_split

In [23]:
# Transformation for training datasets
train_transformation = trans.Compose([
    trans.Resize((227,227)),  # Adjust to match AlexNet input dimensions
    trans.RandomHorizontalFlip(p=0.7),  # Augmenting data
    trans.ToTensor(),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing
])

In [24]:
# Transformation for testing datasets
test_transformation = trans.Compose([
    trans.Resize((227,227)),
    trans.ToTensor(),
    trans.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [12]:
torch.manual_seed(42)  # Ensure reproducibility
full_train_dataset = CIFAR10("data/", train=True, download=True, transform=train_transformation)
test_dataset = CIFAR10("data/", train=False, download=True, transform=test_transformation)


Files already downloaded and verified
Files already downloaded and verified


In [13]:
# Splitting training dataset into training and validation datasets
validation_size = 10000
training_size = len(full_train_dataset) - validation_size
training_dataset, validation_dataset = random_split(full_train_dataset, [training_size, validation_size])

In [14]:
# DataLoader setup for training, validation, and test datasets
batch_size = 64
training_loader = DataLoader(training_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=2)
testing_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

In [17]:
# Define Alexnet Architechture
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.pool_layer = nn.AdaptiveAvgPool2d((6, 6))
        self.fully_connected_layers = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 10),  # CIFAR-10 has 10 distinct classes.
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.pool_layer(x)
        x = torch.flatten(x, 1)
        x = self.fully_connected_layers(x)
        return x

In [18]:
# Setting device to CUDA if available
compute_device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net = AlexNet().to(compute_device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=1e-4)
learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

In [19]:
epochs = 10  # Number of epochs can be adjusted

In [20]:
for epoch in range(epochs):
    net.train()  # Switch to training mode
    epoch_loss = 0.0
    for images, labels in training_loader:
        images, labels = images.to(compute_device), labels.to(compute_device)
        optimizer.zero_grad()
        predictions = net(images)
        loss = loss_function(predictions, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    learning_rate_scheduler.step()  # Update learning rate
    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(training_loader)}')

Epoch 1, Loss: 1.5648120071411133
Epoch 2, Loss: 1.102625904560089
Epoch 3, Loss: 0.8775941960334778
Epoch 4, Loss: 0.7367440643310547
Epoch 5, Loss: 0.6420056324481964
Epoch 6, Loss: 0.5668321094989777
Epoch 7, Loss: 0.5030257425785065
Epoch 8, Loss: 0.45459479126930236
Epoch 9, Loss: 0.40965993921756744
Epoch 10, Loss: 0.35877228055000304


In [22]:
net.eval()  # Switch to evaluation mode
correct_predictions = 0
total_images = 0
with torch.no_grad():
    for images, labels in testing_loader:
        images, labels = images.to(compute_device), labels.to(compute_device)
        output = net(images)
        _, predicted_labels = torch.max(output.data, 1)
        total_images += labels.size(0)
        correct_predictions += (predicted_labels == labels).sum().item()

accuracy = 100 * correct_predictions / total_images
print(f'Accuracy of the network on the 10000 test images: {accuracy:.2f} %')

Accuracy of the network on the 10000 test images: 82.34 %


**Description of Dataset**

The code provided implements a variant of the AlexNet architecture for the CIFAR-10 dataset. The CIFAR-10 dataset is a widely-used dataset for benchmarking image recognition algorithms. It consists of 60,000 32x32 color images in 10 classes, with 6,000 images per class. The dataset is split into 50,000 training images and 10,000 test images. The classes include airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.

**Dataset and Transformations**

1. The CIFAR-10 dataset is transformed to fit the input size of the AlexNet architecture (227x227 pixels) and normalized using mean and standard deviation values common for pre-trained models on ImageNet. This normalization helps in faster convergence and improves model performance.
2. Data augmentation through random horizontal flipping is used to increase the diversity of the training set, reducing overfitting and improving the model's generalization.

**Training and Test Results**

1. The modified AlexNet model is trained for 10 epochs, showing a consistent decrease in loss, indicating that the model is learning effectively from the training dataset.
2. The final accuracy on the test dataset is reported as 82.34%, which is a strong result for CIFAR-10, considering the simplicity of the approach and the architectural limitations of AlexNet compared to more recent deep learning models.


**Observations and Improvements**

1. The performance of the model could potentially be improved by incorporating more sophisticated data augmentation techniques, using more complex architectures like ResNet or DenseNet, or applying regularization techniques like dropout more extensively.
2. Learning rate scheduling and fine-tuning of hyperparameters (such as the learning rate, batch size, or optimizer choice) could further enhance model accuracy.