In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# CIFAR-100 dataset loading with transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load training and test sets
train_set = dsets.CIFAR100(root='./data', train=True, transform=transform, download=True)
test_set = dsets.CIFAR100(root='./data', train=False, transform=transform, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:05<00:00, 33.6MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [2]:
train_size = int(0.8 * len(train_set))
val_size = len(train_set) - train_size
subtrain_set, val_set = torch.utils.data.random_split(train_set, [train_size, val_size])

# Data loaders
batch_size = 64
train_loader = DataLoader(subtrain_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(4 * 4 * 128, 512)
        self.fc2 = nn.Linear(512, 100)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = nn.ReLU()(self.fc1(out))
        out = self.fc2(out)
        return out

In [4]:
# Model 1: criterion, and optimizer
model = CNN().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [5]:
epochs = 10
for epoch in range(epochs):
    avg_cost = 0
    total_batch = len(train_loader)

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        cost = criterion(model(X), Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch + 1}, cost = {avg_cost:.6f}')

    # Validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for val_X, val_Y in val_loader:
            val_X, val_Y = val_X.to(device), val_Y.to(device)
            val_outputs = model(val_X)
            _, predicted = torch.max(val_outputs, 1)
            total += val_Y.size(0)
            correct += (predicted == val_Y).sum().item()

    val_acc = 100 * correct / total
    print(f'Validation Accuracy: {val_acc:.2f}%')
    model.train()  # Setting the model back to training mode

Epoch: 1, cost = 3.677025
Validation Accuracy: 22.83%
Epoch: 2, cost = 2.890987
Validation Accuracy: 31.00%
Epoch: 3, cost = 2.499290
Validation Accuracy: 34.95%
Epoch: 4, cost = 2.197762
Validation Accuracy: 38.00%
Epoch: 5, cost = 1.947838
Validation Accuracy: 38.86%
Epoch: 6, cost = 1.715972
Validation Accuracy: 40.06%
Epoch: 7, cost = 1.494463
Validation Accuracy: 40.11%
Epoch: 8, cost = 1.279967
Validation Accuracy: 39.84%
Epoch: 9, cost = 1.086881
Validation Accuracy: 38.19%
Epoch: 10, cost = 0.906014
Validation Accuracy: 38.65%


In [6]:
# Test model evaluation
with torch.no_grad():
    correct = 0
    total = 0
    for test_X, test_Y in test_loader:
        test_X, test_Y = test_X.to(device), test_Y.to(device)
        outputs = model(test_X)
        _, predicted = torch.max(outputs, 1)
        total += test_Y.size(0)
        correct += (predicted == test_Y).sum().item()

test_acc = 100 * correct / total
print(f'Test Accuracy: {test_acc:.2f}%')

Test Accuracy: 39.05%


In [11]:
class CNN2(nn.Module):
    def __init__(self):
        super(CNN2, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(8 * 8 * 128, 256)
        self.fc2 = nn.Linear(256, 100)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = nn.ReLU()(self.fc1(out))
        out = self.fc2(out)
        return out

In [15]:
# Model 2: criterion, and optimizer
model = CNN2().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.05)

In [16]:
epochs = 5
for epoch in range(epochs):
    avg_cost = 0
    total_batch = len(train_loader)

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        cost = criterion(model(X), Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch + 1}, cost = {avg_cost:.6f}')

    # Validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for val_X, val_Y in val_loader:
            val_X, val_Y = val_X.to(device), val_Y.to(device)
            val_outputs = model(val_X)
            _, predicted = torch.max(val_outputs, 1)
            total += val_Y.size(0)
            correct += (predicted == val_Y).sum().item()

    val_acc = 100 * correct / total
    print(f'Validation Accuracy: {val_acc:.2f}%')
    model.train()  # Setting the model back to training mode

Epoch: 1, cost = 8.416035
Validation Accuracy: 0.93%
Epoch: 2, cost = 4.621924
Validation Accuracy: 0.97%
Epoch: 3, cost = 4.621541
Validation Accuracy: 1.03%
Epoch: 4, cost = 4.621964
Validation Accuracy: 0.99%
Epoch: 5, cost = 4.621321
Validation Accuracy: 1.03%


In [17]:
# Test model evaluation
with torch.no_grad():
    correct = 0
    total = 0
    for test_X, test_Y in test_loader:
        test_X, test_Y = test_X.to(device), test_Y.to(device)
        outputs = model(test_X)
        _, predicted = torch.max(outputs, 1)
        total += test_Y.size(0)
        correct += (predicted == test_Y).sum().item()

test_acc = 100 * correct / total
print(f'Test Accuracy: {test_acc:.2f}%')

Test Accuracy: 1.00%


In [18]:
class CNN3(nn.Module):
    def __init__(self):
        super(CNN3, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(0.1),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(4 * 4 * 128, 512)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 100)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.dropout(nn.LeakyReLU(0.1)(self.fc1(out)))
        out = self.fc2(out)
        return out

In [19]:
# Model 3: criterion, and optimizer
model = CNN3().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.1)

In [20]:
epochs = 7
for epoch in range(epochs):
    avg_cost = 0
    total_batch = len(train_loader)

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        cost = criterion(model(X), Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch + 1}, cost = {avg_cost:.6f}')

    # Validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for val_X, val_Y in val_loader:
            val_X, val_Y = val_X.to(device), val_Y.to(device)
            val_outputs = model(val_X)
            _, predicted = torch.max(val_outputs, 1)
            total += val_Y.size(0)
            correct += (predicted == val_Y).sum().item()

    val_acc = 100 * correct / total
    print(f'Validation Accuracy: {val_acc:.2f}%')
    model.train()  # Setting the model back to training mode

Epoch: 1, cost = 43739.777344
Validation Accuracy: 2.42%
Epoch: 2, cost = 502.512054
Validation Accuracy: 2.98%
Epoch: 3, cost = 239.197739
Validation Accuracy: 4.18%
Epoch: 4, cost = 67.072227
Validation Accuracy: 3.77%
Epoch: 5, cost = 45.390640
Validation Accuracy: 4.26%
Epoch: 6, cost = 188396240.000000
Validation Accuracy: 1.25%
Epoch: 7, cost = 15689974.000000
Validation Accuracy: 3.67%


In [21]:
# Test model evaluation
with torch.no_grad():
    correct = 0
    total = 0
    for test_X, test_Y in test_loader:
        test_X, test_Y = test_X.to(device), test_Y.to(device)
        outputs = model(test_X)
        _, predicted = torch.max(outputs, 1)
        total += test_Y.size(0)
        correct += (predicted == test_Y).sum().item()

test_acc = 100 * correct / total
print(f'Test Accuracy: {test_acc:.2f}%')

Test Accuracy: 2.01%


In [1]:
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for test_X, test_Y in test_loader:
            test_X, test_Y = test_X.to(device), test_Y.to(device)
            outputs = model(test_X)
            _, predicted = torch.max(outputs, 1)
            total += test_Y.size(0)
            correct += (predicted == test_Y).sum().item()
    test_acc = 100 * correct / total
    print(f'Test Accuracy: {test_acc:.2f}%')

In [27]:
class CNN4(nn.Module):
    def __init__(self):
        super(CNN4, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(4 * 4 * 128, 256)
        self.dropout = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256, 100)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = self.dropout(self.fc1(out))
        out = self.fc2(out)
        return out

In [28]:
# Model 4: criterion, and optimizer
model = CNN4().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.9)

In [29]:
epochs = 3
for epoch in range(epochs):
    avg_cost = 0
    total_batch = len(train_loader)

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        cost = criterion(model(X), Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch + 1}, cost = {avg_cost:.6f}')

    # Validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for val_X, val_Y in val_loader:
            val_X, val_Y = val_X.to(device), val_Y.to(device)
            val_outputs = model(val_X)
            _, predicted = torch.max(val_outputs, 1)
            total += val_Y.size(0)
            correct += (predicted == val_Y).sum().item()

    val_acc = 100 * correct / total
    print(f'Validation Accuracy: {val_acc:.2f}%')
    model.train()  # Setting the model back to training mode

Epoch: 1, cost = 1432.541016
Validation Accuracy: 1.06%
Epoch: 2, cost = 206.046295
Validation Accuracy: 0.99%
Epoch: 3, cost = 149.994690
Validation Accuracy: 1.05%


In [30]:
class CNN5(nn.Module):
    def __init__(self):
        super(CNN5, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ELU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc1 = nn.Linear(4 * 4 * 256, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 100)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = nn.ELU()(self.fc1(out))
        out = nn.ELU()(self.fc2(out))
        out = self.fc3(out)
        return out

In [31]:
# Model 5: criterion, and optimizer
model = CNN4().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=10)

In [32]:
epochs = 5
for epoch in range(epochs):
    avg_cost = 0
    total_batch = len(train_loader)

    for X, Y in train_loader:
        X, Y = X.to(device), Y.to(device)

        optimizer.zero_grad()
        cost = criterion(model(X), Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print(f'Epoch: {epoch + 1}, cost = {avg_cost:.6f}')

    # Validation after each epoch
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for val_X, val_Y in val_loader:
            val_X, val_Y = val_X.to(device), val_Y.to(device)
            val_outputs = model(val_X)
            _, predicted = torch.max(val_outputs, 1)
            total += val_Y.size(0)
            correct += (predicted == val_Y).sum().item()

    val_acc = 100 * correct / total
    print(f'Validation Accuracy: {val_acc:.2f}%')
    model.train()  # Setting the model back to training mode

Epoch: 1, cost = 206849.937500
Validation Accuracy: 1.01%
Epoch: 2, cost = 12197.692383
Validation Accuracy: 0.95%
Epoch: 3, cost = 11883.354492
Validation Accuracy: 0.79%
Epoch: 4, cost = 13132.405273
Validation Accuracy: 0.93%
Epoch: 5, cost = 12136.412109
Validation Accuracy: 0.80%


In [33]:
# Use the entire training dataset (without splitting)
full_train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)

In [34]:
def train_model(model, train_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for X, Y in train_loader:
            X, Y = X.to(device), Y.to(device)

            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, Y)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {avg_loss:.4f}")

In [35]:
model1 = CNN().to(device)
model2 = CNN2().to(device)
model3 = CNN3().to(device)

# Define loss function
criterion = nn.CrossEntropyLoss().to(device)

# Define optimizers
optimizer1 = optim.Adam(model1.parameters(), lr=0.001)
optimizer2 = optim.Adam(model2.parameters(), lr=0.001)
optimizer3 = optim.Adam(model3.parameters(), lr=0.001)

In [36]:
print("Model 1 Retrain")
train_model(model1, full_train_loader, optimizer1, criterion)

print("\n Model 2 Retrain")
train_model(model2, full_train_loader, optimizer2, criterion)

print("\n Model 3 Retrain")
train_model(model3, full_train_loader, optimizer3, criterion)

Model 1 Retrain
Epoch [1/10], Loss: 3.5497
Epoch [2/10], Loss: 2.7308
Epoch [3/10], Loss: 2.3128
Epoch [4/10], Loss: 2.0129
Epoch [5/10], Loss: 1.7621
Epoch [6/10], Loss: 1.5338
Epoch [7/10], Loss: 1.3212
Epoch [8/10], Loss: 1.1239
Epoch [9/10], Loss: 0.9366
Epoch [10/10], Loss: 0.7829

 Model 2 Retrain
Epoch [1/10], Loss: 3.4140
Epoch [2/10], Loss: 2.6029
Epoch [3/10], Loss: 2.2070
Epoch [4/10], Loss: 1.8949
Epoch [5/10], Loss: 1.6143
Epoch [6/10], Loss: 1.3476
Epoch [7/10], Loss: 1.1028
Epoch [8/10], Loss: 0.8768
Epoch [9/10], Loss: 0.6836
Epoch [10/10], Loss: 0.5279

 Model 3 Retrain
Epoch [1/10], Loss: 3.5587
Epoch [2/10], Loss: 2.7553
Epoch [3/10], Loss: 2.3526
Epoch [4/10], Loss: 2.0624
Epoch [5/10], Loss: 1.8451
Epoch [6/10], Loss: 1.6485
Epoch [7/10], Loss: 1.4772
Epoch [8/10], Loss: 1.3153
Epoch [9/10], Loss: 1.1793
Epoch [10/10], Loss: 1.0580


In [37]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X, Y in test_loader:
            X, Y = X.to(device), Y.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs, 1)
            total += Y.size(0)
            correct += (predicted == Y).sum().item()

    accuracy = 100 * correct / total
    return accuracy

In [38]:
test1 = evaluate_model(model1, test_loader)
test2 = evaluate_model(model2, test_loader)
test3 = evaluate_model(model3, test_loader)

print(f"Test Accuracy of Model 1 (CNN): {test1:.2f}%")
print(f"Test Accuracy of Model 2 (CNN2): {test2:.2f}%")
print(f"Test Accuracy of Model 3 (CNN3): {test3:.2f}%")

Test Accuracy of Model 1 (CNN): 41.42%
Test Accuracy of Model 2 (CNN2): 38.46%
Test Accuracy of Model 3 (CNN3): 50.22%


In [40]:
# Benchmarking:
# These models achieved test accuracies of 41.42%, 38.46%, and 50.22%.
# The top models on the CIFAR-100 leaderboard often exceed 80% accuracy,
# but these use additional data and more complex architectures.