In [4]:
import torch
import torch.nn as nn

# Define AlexNet class with updated padding, stride, and input size
class AlexNet(nn.Module):
    def __init__(self, num_classes=1000):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            # Conv1 Layer: 3 input channels, 64 output channels, 7x7 kernel, stride 2, padding 3
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            # Conv2 Layer: 64 input channels, 192 output channels, 5x5 kernel, padding 2
            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),

            # Conv3 Layer: 192 input channels, 384 output channels, 3x3 kernel, padding 1
            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            # Conv4 Layer: 384 input channels, 256 output channels, 3x3 kernel, padding 1
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),

            # Conv5 Layer: 256 input channels, 256 output channels, 3x3 kernel, padding 1
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        print("Input shape:", x.shape)

        x = self.features[0](x)  # Conv1
        print("After Conv1:", x.shape)
        x = self.features[1](x)  # ReLU1
        print("After ReLU1:", x.shape)
        x = self.features[2](x)  # MaxPool1
        print("After MaxPool1:", x.shape)

        x = self.features[3](x)  # Conv2
        print("After Conv2:", x.shape)
        x = self.features[4](x)  # ReLU2
        print("After ReLU2:", x.shape)
        x = self.features[5](x)  # MaxPool2
        print("After MaxPool2:", x.shape)

        x = self.features[6](x)  # Conv3
        print("After Conv3:", x.shape)
        x = self.features[7](x)  # ReLU3
        print("After ReLU3:", x.shape)

        x = self.features[8](x)  # Conv4
        print("After Conv4:", x.shape)
        x = self.features[9](x)  # ReLU4
        print("After ReLU4:", x.shape)

        x = self.features[10](x)  # Conv5
        print("After Conv5:", x.shape)
        x = self.features[11](x)  # ReLU5
        print("After ReLU5:", x.shape)
        x = self.features[12](x)  # MaxPool3
        print("After MaxPool3:", x.shape)

        x = x.view(x.size(0), -1)  # Flatten
        print("After Flatten:", x.shape)

        x = self.classifier[0](x)  # Dropout
        x = self.classifier[1](x)  # Linear1
        print("After Linear1:", x.shape)
        x = self.classifier[2](x)  # ReLU6
        print("After ReLU6:", x.shape)
        x = self.classifier[3](x)  # Dropout2
        x = self.classifier[4](x)  # Linear2
        print("After Linear2:", x.shape)
        x = self.classifier[5](x)  # ReLU7
        print("After ReLU7:", x.shape)
        x = self.classifier[6](x)  # Linear3
        print("After Linear3:", x.shape)

        return x

# Example of loading the AlexNet model and using it for inference
model = AlexNet(num_classes=1000)

# Example of using the model with an input tensor (batch size of 1, 3 channels, 112x112 image)
input_tensor = torch.randn(1, 3, 112, 112)  # Random tensor with size (batch, channels, height, width)
output = model(input_tensor)


Input shape: torch.Size([1, 3, 112, 112])
After Conv1: torch.Size([1, 64, 56, 56])
After ReLU1: torch.Size([1, 64, 56, 56])
After MaxPool1: torch.Size([1, 64, 27, 27])
After Conv2: torch.Size([1, 192, 27, 27])
After ReLU2: torch.Size([1, 192, 27, 27])
After MaxPool2: torch.Size([1, 192, 13, 13])
After Conv3: torch.Size([1, 384, 13, 13])
After ReLU3: torch.Size([1, 384, 13, 13])
After Conv4: torch.Size([1, 256, 13, 13])
After ReLU4: torch.Size([1, 256, 13, 13])
After Conv5: torch.Size([1, 256, 13, 13])
After ReLU5: torch.Size([1, 256, 13, 13])
After MaxPool3: torch.Size([1, 256, 6, 6])
After Flatten: torch.Size([1, 9216])
After Linear1: torch.Size([1, 4096])
After ReLU6: torch.Size([1, 4096])
After Linear2: torch.Size([1, 4096])
After ReLU7: torch.Size([1, 4096])
After Linear3: torch.Size([1, 1000])


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST

# Define the optimized AlexNet class
class OptimizedAlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(OptimizedAlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # Fewer filters
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.dummy_input = torch.zeros(1, 1, 64, 64)
        self.flattened_size = self._get_flattened_size(self.dummy_input)

        self.classifier = nn.Sequential(
            nn.Linear(self.flattened_size, 512),  # Smaller fully connected layers
            nn.ReLU(inplace=True),
            nn.Linear(512, num_classes),
        )

    def _get_flattened_size(self, x):
        x = self.features(x)
        return x.view(x.size(0), -1).size(1)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


# Data preprocessing
transform = transforms.Compose([
    transforms.Resize((64, 64)),  # Smaller size
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

# Load the MNIST dataset
trainset = MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True)  # Smaller batch size

testset = MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False)

# Initialize the model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = OptimizedAlexNet(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 2  # Fewer epochs for quicker training
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 50 == 49:  # Print every 50 batches
            print(f"Epoch [{epoch+1}/{epochs}], Batch [{i+1}], Loss: {running_loss/50:.4f}")
            running_loss = 0.0

print("Training Finished")

# Evaluation on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on the test images: {100 * correct / total:.2f}%")


Epoch [1/2], Batch [50], Loss: 1.1152
Epoch [1/2], Batch [100], Loss: 0.3624
Epoch [1/2], Batch [150], Loss: 0.2302
Epoch [1/2], Batch [200], Loss: 0.1765
Epoch [1/2], Batch [250], Loss: 0.1845
Epoch [1/2], Batch [300], Loss: 0.1417
Epoch [1/2], Batch [350], Loss: 0.1390
Epoch [1/2], Batch [400], Loss: 0.1093
Epoch [1/2], Batch [450], Loss: 0.1353
Epoch [1/2], Batch [500], Loss: 0.0922
Epoch [1/2], Batch [550], Loss: 0.1171
Epoch [1/2], Batch [600], Loss: 0.1006
Epoch [1/2], Batch [650], Loss: 0.0987
Epoch [1/2], Batch [700], Loss: 0.0706
Epoch [1/2], Batch [750], Loss: 0.0874
Epoch [1/2], Batch [800], Loss: 0.0929
Epoch [1/2], Batch [850], Loss: 0.0968
Epoch [1/2], Batch [900], Loss: 0.0709
Epoch [1/2], Batch [950], Loss: 0.0807
Epoch [1/2], Batch [1000], Loss: 0.0667
Epoch [1/2], Batch [1050], Loss: 0.0729
Epoch [1/2], Batch [1100], Loss: 0.0726
Epoch [1/2], Batch [1150], Loss: 0.0488
Epoch [1/2], Batch [1200], Loss: 0.0700
Epoch [1/2], Batch [1250], Loss: 0.0621
Epoch [1/2], Batch [