In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader

In [8]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761))
])

# We can also create a target_transform if needed
""" target_transform = transforms.Compose([
    transforms.Lambda(lambda y: torch.zeros(10, dtype = torch.float).scatter_(0, torch.tensor(y), value = 1))
])
"""

' target_transform = transforms.Compose([\n    transforms.Lambda(lambda y: torch.zeros(10, dtype = torch.float).scatter_(0, torch.tensor(y), value = 1))\n])\n'

In [9]:
full_train_set = torchvision.datasets.CIFAR100(root = "./data2", train = True, download = False, transform = transform)

test_set = torchvision.datasets.CIFAR100(root = "./data2", train = False, download = False, transform = transform)

In [10]:
# Defining the size of the validation set
val_size = 10000
train_size = len(full_train_set) - val_size

In [11]:
train_subset, val_subset = torch.utils.data.random_split(full_train_set, [train_size, val_size])

print(f"Full training set size: {len(full_train_set)}")
print(f"New training set size: {len(train_subset)}")
print(f"Validation set size: {len(val_subset)}")
print(f"Test set size: {len(test_set)}")

Full training set size: 50000
New training set size: 40000
Validation set size: 10000
Test set size: 10000


In [12]:
batch_size = 64

train_loader = DataLoader(dataset = train_subset, batch_size = batch_size, shuffle = True)                              
val_loader = DataLoader(dataset = val_subset, batch_size = batch_size, shuffle  = False)
test_loader = DataLoader(dataset = test_set, batch_size = batch_size, shuffle = False)

In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"\nUsing device: {device}")


Using device: cuda:0


In [14]:
class InceptionModule(nn.Module):
    # The Basic Inception Module
    def __init__(self, in_channels, n1x1, n3x3_reduce, n3x3, n5x5_reduce, n5x5, pool_proj):
        super(InceptionModule, self).__init__()
        
        # 1x1 Convolution Branch
        self.b1 = nn.Sequential(
            nn.Conv2d(in_channels = in_channels, out_channels = n1x1, kernel_size = 1),
            nn.BatchNorm2d(num_features = n1x1),
            nn.ReLU(inplace = True)
        )
        
        # 1x1 --> 3x3 Convolution Branch
        self.b2 = nn.Sequential(
            nn.Conv2d(in_channels = in_channels, out_channels = n3x3_reduce, kernel_size = 1),
            nn.BatchNorm2d(num_features = n3x3_reduce),
            nn.ReLU(inplace = True),
            
            nn.Conv2d(in_channels = n3x3_reduce, out_channels = n3x3, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(num_features = n3x3),
            nn.ReLU(inplace = True)
        )
        
        # 1x1 --> 5x5 Convolution Branch
        self.b3 = nn.Sequential(
            nn.Conv2d(in_channels = in_channels, out_channels = n5x5_reduce, kernel_size = 1),
            nn.BatchNorm2d(num_features = n5x5_reduce),
            nn.ReLU(inplace = True),
            
            nn.Conv2d(in_channels = n5x5_reduce, out_channels = n5x5, kernel_size = 5, padding = 2),
            nn.BatchNorm2d(num_features = n5x5),
            nn.ReLU(inplace = True)
        )
        
        # 3x3 Maxpool --> 1x1 Convolution Branch
        self.b4 = nn.Sequential(
            nn.MaxPool2d(kernel_size = 3, stride = 1, padding = 1),
            nn.Conv2d(in_channels = in_channels, out_channels = pool_proj, kernel_size = 1),
            nn.BatchNorm2d(num_features = pool_proj),
            nn.ReLU(inplace = True)
        )
        
    def forward(self, x):
        # Concatenate the outputs of the four branches along the channel dimension
        return torch.cat([self.b1(x), self.b2(x), self.b3(x), self.b4(x)], dim = 1)

In [None]:
class BasicInceptionNet(nn.Module):
    # A Basic InceptionNet(GoogleNet Style) model adapted for CIFAR100
    def __init__(self, num_classes = 100):
        super(BasicInceptionNet, self).__init__()
        # Initial Convolutional Layer - adapted for 32x32 Input
        self.pre_layers = nn.Sequential(
            # Input: 32x32x3
            nn.Conv2d(in_channels = 3, out_channels = 192, kernel_size = 3, padding = 1),
            nn.BatchNorm2d(num_features = 192),
            nn.ReLU(inplace = True),
            # Output: 32x32x192
        )
        
        # Inception Modules
        # The Output Channels from the previous layer become the input channels for the next
        self.a3 = InceptionModule(in_channels = 192, n1x1 = 64, n3x3_reduce = 96, n3x3 = 128, n5x5_reduce = 16, n5x5 = 32, pool_proj = 32)  # Output Channels = 64 + 128 + 32 + 32 = 256
        self.b3 = InceptionModule(in_channels = 256, n1x1 = 128, n3x3_reduce = 128, n3x3 = 192, n5x5_reduce = 32, n5x5 = 96, pool_proj = 64)  # Output Channels = 128 + 192 + 96 + 64 = 480
        
        # Max Pooling between modules to reduce spatial dimensions
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        
        # Another Set of Inception Modules
        self.a4 = InceptionModule(in_channels = 480, n1x1 = 192, n3x3_reduce = 96, n3x3 = 208, n5x5_reduce = 16, n5x5 = 48, pool_proj = 64)  # Output Channels: 192 + 208 + 48 + 64 = 512
        self.b4 = InceptionModule(in_channels = 512, n1x1 = 160, n3x3_reduce = 112, n3x3 = 224, n5x5_reduce = 24, n5x5 = 64, pool_proj = 64)  # Output Channels: 160 + 224 + 64 + 64 = 512
        self.c4 = InceptionModule(in_channels = 512, n1x1 = 128, n3x3_reduce = 128, n3x3 = 256, n5x5_reduce = 24, n5x5 = 64, pool_proj = 64)  # Output Channels: 128 + 256 + 64 + 64 = 512
        self.d4 = InceptionModule(in_channels = 512, n1x1 = 112, n3x3_reduce = 144, n3x3 = 288, n5x5_reduce = 32, n5x5 = 64, pool_proj = 64)  # Output Channels: 112 + 288 + 64 + 64 = 528
        self.e4 = InceptionModule(in_channels = 528, n1x1 = 256, n3x3_reduce = 160, n3x3 = 320, n5x5_reduce = 32, n5x5 = 128, pool_proj = 128)  # Output Channels: 256 + 320 + 128 + 128 = 832
        
        # Final Layers for Classification
        self.avgpool = nn.AdaptiveAvgPool2d(output_size = (1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(in_features = 832, out_features = num_classes)
        
    def forward(self, x):
        x = self.pre_layers(x)
        x = self.a3(x)
        x = self.b3(x)
        x = self.maxpool(x)
        x = self.a4(x)
        x = self.b4(x)
        x = self.c4(x)
        x = self.d4(x)
        x = self.e4(x)
        x = self.maxpool(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = self.fc(x)
        return x

In [17]:
model_BasicInceptionNet = BasicInceptionNet().to(device)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params = model_BasicInceptionNet.parameters(), lr = 0.0001)
n_epochs = 10

In [None]:
def evaluate_model(model, data_loader, loss_function, device):
    # Evaluates the model on the given dataset
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = loss_function(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    avg_loss = running_loss / len(data_loader)
    accuracy = 100 * correct / total

    return avg_loss, accuracy

In [None]:
def training_loop(model, train_loader, val_loader, test_loader, loss_function, optimizer, n_epochs, device):
    
    history = {
        "train_losses" : [], "train_accs": [],
        "val_losses": [], "val_accs": [],
        "test_losses": [], "test_accs": []
    }
    
    for epoch in range(n_epochs):
        # Training Phase
        model.train()
        running_train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_function(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_train_loss += loss.item()
            
            _, predicted = torch.max(outputs, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
        
        avg_train_loss = running_train_loss / len(train_loader)
        avg_train_acc = 100 * train_correct / train_total
        
        
        # Evaluation Phase
        val_loss, val_acc = evaluate_model(model, val_loader, loss_function, device)
        test_loss, test_acc = evaluate_model(model, test_loader, loss_function, device)
        
        history['train_losses'].append(avg_train_loss)
        history['train_accs'].append(avg_train_acc)
        history['val_losses'].append(val_loss)
        history['val_accs'].append(val_acc)
        history['test_losses'].append(test_loss)
        history['test_accs'].append(test_acc)
        
        print(f"Epoch [{epoch + 1}/{n_epochs}] | "
              f"Train Loss: {avg_train_loss:.4f}, Train Acc: {avg_train_acc:.2f}% | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}% | "
              f"Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%")
        
    print("Finished Training!")
    return history

In [None]:
import matplotlib.pyplot as plt

def plot_metrics(history):
    """Plots the training, validation, and test metrics."""
    epochs = range(1, len(history['train_losses']) + 1)

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

    # Plot Loss
    ax1.plot(epochs, history['train_losses'], 'o-', label='Training Loss')
    ax1.plot(epochs, history['val_losses'], 'o-', label='Validation Loss')
    ax1.plot(epochs, history['test_losses'], 'o-', label='Test Loss')
    ax1.set_title('Loss vs. Epochs')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.legend()
    ax1.set_xticks(epochs)


    # Plot Accuracy
    ax2.plot(epochs, history['train_accs'], 'o-', label='Training Accuracy')
    ax2.plot(epochs, history['val_accs'], 'o-', label='Validation Accuracy')
    ax2.plot(epochs, history['test_accs'], 'o-', label='Test Accuracy')
    ax2.set_title('Accuracy vs. Epochs')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy (%)')
    ax2.legend()
    ax2.set_xticks(epochs)

    plt.tight_layout()
    plt.show()

In [None]:
training_history = training_loop(model = model_BasicInceptionNet,
                                 train_loader = train_loader,
                                 val_loader = val_loader, 
                                 test_loader = test_loader, 
                                 loss_function = loss_fn,
                                 optimizer = optimizer,
                                 n_epochs = n_epochs,
                                 device = device)

In [None]:
plot_metrics(training_history)

In [None]:
final_test_loss, final_test_acc = evaluate_model(model_BasicInceptionNet, test_loader, loss_fn, device)

print(f'\nFinal accuracy on the 10000 test images: {final_test_acc:.2f}%')