In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
'''
Question 1:
    Train your own AlexNet from scratch, 
    without referring to any weights published. 
    Report test accuracy 
'''

class AlexNet(nn.Module):
    def __init__(self, num_classes=100):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

model = AlexNet(num_classes=100)

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = datasets.ImageFolder(root='./data/sports_image/train/', transform=transform)
test_dataset = datasets.ImageFolder(root='./data/sports_image/test/', transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training
num_epochs = 2
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        optimizer.zero_grad()
                
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

    
# Testing
model.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the Test images: {100 * correct // total} %')
    

torch.save(model.state_dict(), 'alexnet_from_scratch.pth')

Epoch 1, Loss: 4.560397692766235
Epoch 2, Loss: 4.267620034692412
Accuracy of the network on the Test images: 3 %


In [3]:
'''
Question 2:
    Reuse the weights from AlexNet 2, 
    design a transfer learning strategy, and report test accuracy.
    Compare and discuss with the result you get for question 1.
'''

alexnet = AlexNet(num_classes=100)

alexnet.load_state_dict(torch.load('alexnet_from_scratch.pth'))


alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 100)

for param in alexnet.features.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(alexnet.classifier.parameters(), lr=0.001)

num_epochs = 2
for epoch in range(num_epochs):
    alexnet.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')


alexnet.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = alexnet(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the Test images: {100 * correct // total} %')
    

torch.save(model.state_dict(), 'alexnet_transfer_learned.pth')

'''
Discussion of Question1 and Question2:
    
    The test accuracies for Questions 2 and 3 are very 3% and 7%, respectively.
    Reusing pre-trained models yield  =better performance due to their learned representations. 
'''

Epoch 1, Loss: 4.160967478819933
Epoch 2, Loss: 4.007462016779099
Accuracy of the network on the Test images: 7 %


In [6]:
from torchvision import models

squeezenet = models.squeezenet1_1(pretrained=True)

final_conv = nn.Conv2d(512, 100, kernel_size=(1,1))
squeezenet.classifier = nn.Sequential(
    nn.Dropout(p=0.5),
    final_conv,
    nn.ReLU(inplace=True),
    nn.AdaptiveAvgPool2d((1, 1))
)

squeezenet.num_classes = 100

for param in squeezenet.features.parameters():
    param.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(squeezenet.classifier.parameters(), lr=0.001)


num_epochs = 2
for epoch in range(num_epochs):
    squeezenet.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        
        outputs = squeezenet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')


squeezenet.eval()
total = 0
correct = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = squeezenet(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the Test images: {100 * correct // total} %')
    

torch.save(model.state_dict(), 'squeezenet_transfer_learned.pth')

Epoch 1, Loss: 2.1544788197318523
Epoch 2, Loss: 0.7792631475281376
Accuracy of the network on the Test images: 87 %
