In [2]:
""" 
this is a pytorch implementation of alexnet, trained and validated on 
CIFAR-10.

first time implementing a deep learning paper. 

using CIFAR-10 because ImageNet is too big to do locally

"""

### TODO ### 
# - import dependencies 
# - load the data
# - set up model architecture
# - define the forward pass 
# - write a training loop
# - train and validate model
# - final accuracy on test data

### add later ###
# - visualize loss 
# - visualize data
# - figure out how to get ImageNet

' \nthis is a pytorch implementation of alexnet, trained and validated on \nImageNet.\n\nfirst time implementing a deep learning paper. \n\nusing CIFAR-10 because ImageNet is too big to do locally\n\n'

In [11]:
import torch
import torch.nn as nn

# to load the data
from torch.utils.data import DataLoader

# optimizer 
import torch.optim as optim

from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as T 

import numpy as np

In [21]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 100

print('using device:', device)

using device: cpu


In [18]:
# set up preprocessing transformations: 
# - resize to 256x256
# - take center crops 224x224
# - center with mean

# original paper resized image to 224x224, skip that becuase CIFAR-10
# has tiny images 


transform = T.Compose([
                T.ToTensor(),
                T.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010))
            ])

In [19]:
# load the data using a Dataset object and DataLoader wrapper
%cd ../datasets
!bash get_datasets.sh
%cd ../AlexNet
NUM_TRAIN = 49000

cifar10_train = dset.CIFAR10('./datasets', train=True, download=True,
                             transform=transform)
loader_train = DataLoader(cifar10_train, batch_size=64, 
                         sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN)))

cifar10_val = dset.CIFAR10('./datasets', train=True, download=True, 
                          transform=transform)
loader_val = DataLoader(cifar10_val, batch_size=64, 
                       sampler=sampler.SubsetRandomSampler(range(NUM_TRAIN, 50000)))

cifar10_test = dset.CIFAR10('./datasets', train=False, download=True, 
                            transform=transform)
loader_test = DataLoader(cifar10_test, batch_size=64)

/Users/Nrj/Desktop/paper implementations/datasets
/Users/Nrj/Desktop/paper implementations/AlexNet
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [31]:
def dim_helper(len_in, kernel, pad, stride): 
    out = ((len_in + 2 * pad - kernel - 2) / stride) + 1
    return out

In [59]:
dim_helper(32, 3, 2, 1)

32.0

In [60]:
# set up model architecture

"""
model details: 
- 5 conv layers
- 3 FC layers
- relu nonlinearities
- local response normalization
- dropout
- momentum
- flatten between conv and fc layers
"""

class AlexNet(nn.Module): 
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        
        ### TODO ### 
        # - initialize layers
        # conv, localresponsenorm, fc, dropout
        
        # conv layers 
        # TODO: calculate the strides and paddings to fit dims
        self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=2)
        self.conv2 = nn.Conv2d(96, 256, kernel_size=3, stride=1, padding=2)
        self.conv3 = nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=2)
        self.conv4 = nn.Conv2d(384, 192, kernel_size=3, stride=1, padding=2)
        self.conv5 = nn.Conv2d(192, 256, kernel_size=3, stride=1, padding=2)
        
        # fully connected layers 
        # TODO: calculate in_features 
        self.fc1 = nn.Linear(in_features=256 * 62 * 62, out_features=4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(in_features=4096, out_features=num_classes)
        
        # overlapping pool where s < z 
        self.pool = nn.MaxPool2d(kernel_size=3, stride=2)
        
        # ReLU nonlinearity
        self.relu = nn.ReLU(inplace=True)
        
        # local response norm
        # TODO: calc dims
        self.localresponsenorm = nn.LocalResponseNorm(size=5, alpha=1e-4, 
                                                      beta=0.75, k=2.0)

        # dropout regularization
        self.dropout = nn.Dropout()

        
    def forward(self, x): 
        # define connectivity
        x = self.conv1(x)
        x = self.localresponsenorm(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.localresponsenorm(x)
        x = self.relu(x)
        x = self.pool(x)
        
        x = self.conv3(x)
        x = self.relu(x)
        x = self.conv4(x)
        x = self.relu(x)
        x = self.conv5(x)
        x = self.relu(x)
        
        x = torch.flatten(x, 1)
        
        x = self.dropout(x)
        x = self.fc1(x)
        x = self.relu(x)
        
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        
        x = self.dropout(x)
        x = self.fc3(x)

In [61]:
def check_accuracy(loader, model): 
    if loader.dataset.train: 
        print('checking validation set')
    else: 
        print('checking test set')
    num_correct, num_samples = 0, 0
    model.eval()
    with torch.no_grad():
        for x, y in loader: 
            # put onto device (here using cpu)
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)
            
            # scores of val/test set
            scores = model(x)
            
            # preds are argmaxes of scores
            # torch.max returns tuple (values, indices)
            _, preds = scores.max(1)
            
            num_correct += (preds == y).sum()
            num_samples += preds.size(0)
        acc = float(num_correct) / num_samples
        print('%d / %d correct (%.2f)' % (num_correct, num_samples, 100*acc))
            

In [65]:
# write a training loop

model = AlexNet()

cross_entropy_loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0005, momentum=0.9)

def train(model, optimizer, epochs=1):
    model = model.to(device=device)
    for e in range(epochs): 
        for t, (x, y) in enumerate(loader_train): 

            # puts model in train mode. call model.eval() for testing
            model.train() 

            # put onto device (here using cpu)
            x = x.to(device=device, dtype=dtype)
            y = y.to(device=device, dtype=torch.long)

            # forward pass 
            scores = model(x)

            # loss
            loss = cross_entropy_loss(scores, y)

            # zero the gradients
            optimizer.zero_grad()
            
            # backward pass
            loss.backward()

            # update parameters
            optimizer.step()

            # print progress 
            print(t)
            if t % 100 == 0: 
                print('Iteration %d, loss = %.4f' % (t, loss.item()))
                check_accuracy(loader_val, model)
                print()


In [None]:
# train the net
train(model, optimizer)

In [None]:
# for test set evaluation
best_model = model
check_accuracy(loader_test, best_model)