STAT 479: Deep Learning (Spring 2019)  
Instructor: Sebastian Raschka (sraschka@wisc.edu)  
Course website: http://pages.stat.wisc.edu/~sraschka/teaching/stat479-ss2019/  
GitHub repository: https://github.com/rasbt/stat479-deep-learning-ss19

---

In [1]:
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
import time

## Settings and Dataset

In [2]:
##########################
### SETTINGS
##########################

RANDOM_SEED = 1
BATCH_SIZE = 128
NUM_EPOCHS = 10
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')


##########################
### MNIST DATASET
##########################

# Note transforms.ToTensor() scales input images
# to 0-1 range
train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())


train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          shuffle=True)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE, 
                         shuffle=False)

# Checking the dataset
for images, labels in train_loader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

Image batch dimensions: torch.Size([128, 1, 28, 28])
Image label dimensions: torch.Size([128])


In [3]:
def compute_accuracy(net, data_loader):
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28).to(DEVICE)
            targets = targets.to(DEVICE)
            a1, a2 = net.forward(features)
            predicted_labels = torch.argmax(a2, 1)
            num_examples += targets.size(0)
            correct_pred += (predicted_labels == targets).sum()
        return correct_pred.float()/num_examples * 100


def compute_loss(net, data_loader):
    curr_loss = 0.
    with torch.no_grad():
        for cnt, (features, targets) in enumerate(data_loader):
            features = features.view(-1, 28*28).to(DEVICE)
            targets = targets.to(DEVICE)
            logits, probas = net.forward(features)
            loss = F.nll_loss(torch.log(probas), targets)
            # or better (more numerically stable):
            # loss = F.cross_entropy(logits, targets)
            # see 
            # ../../other/pytorch-lossfunc-cheatsheet.md
            curr_loss += loss
        return float(curr_loss)/cnt

## Object-Oriented Dropout

In [4]:
class MultilayerPerceptron(torch.nn.Module):

    def __init__(self, num_features, num_classes, drop_proba, 
                 num_hidden_1, num_hidden_2):
        super(MultilayerPerceptron, self).__init__()
        
        self.my_network = torch.nn.Sequential(
            torch.nn.Linear(num_features, num_hidden_1),
            torch.nn.ReLU(),
            torch.nn.Dropout(drop_proba),
            torch.nn.Linear(num_hidden_1, num_hidden_2),
            torch.nn.ReLU(),
            torch.nn.Dropout(drop_proba),
            torch.nn.Linear(num_hidden_2, num_classes)
        )
           
    def forward(self, x):
        logits = self.my_network(x)
        probas = F.softmax(logits, dim=1)
        return logits, probas

In [5]:
#################################
### Model Initialization
#################################
    
torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
                             num_hidden_1=100,
                             num_hidden_2=50,
                             drop_proba=0.5,
                             num_classes=10)

model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

#################################
### Training
#################################

start_time = time.time()
minibatch_cost = []
epoch_cost = []
for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.view(-1, 28*28).to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        minibatch_cost.append(cost)
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))
       
    model.eval()
    with torch.no_grad():
        cost = compute_loss(model, train_loader)
        epoch_cost.append(cost)
        print('Epoch: %03d/%03d Train Cost: %.4f' % (
                epoch+1, NUM_EPOCHS, cost))
        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

        
model.eval()
with torch.no_grad():
    print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

    print('Training Accuracy: %.2f' % compute_accuracy(model, train_loader))
    print('Test Accuracy: %.2f' % compute_accuracy(model, test_loader))

Epoch: 001/010 | Batch 000/469 | Cost: 2.3018
Epoch: 001/010 | Batch 050/469 | Cost: 2.1577
Epoch: 001/010 | Batch 100/469 | Cost: 1.5877
Epoch: 001/010 | Batch 150/469 | Cost: 1.0578
Epoch: 001/010 | Batch 200/469 | Cost: 0.9922
Epoch: 001/010 | Batch 250/469 | Cost: 0.9561
Epoch: 001/010 | Batch 300/469 | Cost: 0.7748
Epoch: 001/010 | Batch 350/469 | Cost: 0.6270
Epoch: 001/010 | Batch 400/469 | Cost: 0.6704
Epoch: 001/010 | Batch 450/469 | Cost: 0.6320
Epoch: 001/010 Train Cost: 0.3947
Time elapsed: 0.23 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.5669
Epoch: 002/010 | Batch 050/469 | Cost: 0.7065
Epoch: 002/010 | Batch 100/469 | Cost: 0.3853
Epoch: 002/010 | Batch 150/469 | Cost: 0.4498
Epoch: 002/010 | Batch 200/469 | Cost: 0.6832
Epoch: 002/010 | Batch 250/469 | Cost: 0.5445
Epoch: 002/010 | Batch 300/469 | Cost: 0.4575
Epoch: 002/010 | Batch 350/469 | Cost: 0.4322
Epoch: 002/010 | Batch 400/469 | Cost: 0.3241
Epoch: 002/010 | Batch 450/469 | Cost: 0.4325
Epoch: 002/010 Train Co

## Functional API Dropout

In [6]:
import torch.nn.functional as F


class MultilayerPerceptron(torch.nn.Module):

    def __init__(self, num_features, num_classes, drop_proba, 
                 num_hidden_1, num_hidden_2):
        super(MultilayerPerceptron, self).__init__()
        
        self.drop_proba = drop_proba
        self.linear_1 = torch.nn.Linear(num_features,
                                        num_hidden_1)

        self.linear_2 = torch.nn.Linear(num_hidden_1,
                                        num_hidden_2)

        self.linear_out = torch.nn.Linear(num_hidden_2,
                                          num_classes)
        
    def forward(self, x):
        out = self.linear_1(x)
        out = F.relu(out)
        out = F.dropout(out, p=self.drop_proba, training=self.training)
        out = self.linear_2(out)
        out = F.relu(out)
        out = F.dropout(out, p=self.drop_proba, training=self.training)
        logits = self.linear_out(out)
        probas = F.log_softmax(logits, dim=1)
        return logits, probas

In [7]:
#################################
### Model Initialization
#################################
    
torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
                             num_hidden_1=100,
                             num_hidden_2=50,
                             drop_proba=0.5,
                             num_classes=10)

model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)

#################################
### Training
#################################

start_time = time.time()
minibatch_cost = []
epoch_cost = []
for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        
        features = features.view(-1, 28*28).to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        minibatch_cost.append(cost)
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 50:
            print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
                   %(epoch+1, NUM_EPOCHS, batch_idx, 
                     len(train_loader), cost))
       
    model.eval()
    with torch.no_grad():
        cost = compute_loss(model, train_loader)
        epoch_cost.append(cost)
        print('Epoch: %03d/%03d Train Cost: %.4f' % (
                epoch+1, NUM_EPOCHS, cost))
        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

        
model.eval()
with torch.no_grad():
    print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

    print('Training Accuracy: %.2f' % compute_accuracy(model, train_loader))
    print('Test Accuracy: %.2f' % compute_accuracy(model, test_loader))

Epoch: 001/010 | Batch 000/469 | Cost: 2.3018
Epoch: 001/010 | Batch 050/469 | Cost: 2.1577
Epoch: 001/010 | Batch 100/469 | Cost: 1.5877
Epoch: 001/010 | Batch 150/469 | Cost: 1.0578
Epoch: 001/010 | Batch 200/469 | Cost: 0.9922
Epoch: 001/010 | Batch 250/469 | Cost: 0.9561
Epoch: 001/010 | Batch 300/469 | Cost: 0.7748
Epoch: 001/010 | Batch 350/469 | Cost: 0.6270
Epoch: 001/010 | Batch 400/469 | Cost: 0.6704
Epoch: 001/010 | Batch 450/469 | Cost: 0.6320
Epoch: 001/010 Train Cost: nan
Time elapsed: 0.20 min
Epoch: 002/010 | Batch 000/469 | Cost: 0.5669
Epoch: 002/010 | Batch 050/469 | Cost: 0.7065
Epoch: 002/010 | Batch 100/469 | Cost: 0.3853
Epoch: 002/010 | Batch 150/469 | Cost: 0.4498
Epoch: 002/010 | Batch 200/469 | Cost: 0.6832
Epoch: 002/010 | Batch 250/469 | Cost: 0.5445
Epoch: 002/010 | Batch 300/469 | Cost: 0.4575
Epoch: 002/010 | Batch 350/469 | Cost: 0.4322
Epoch: 002/010 | Batch 400/469 | Cost: 0.3241
Epoch: 002/010 | Batch 450/469 | Cost: 0.4325
Epoch: 002/010 Train Cost:

In [8]:
%load_ext watermark
%watermark -a 'Sebastian Raschka' -iv

torchvision 0.2.1
torch       1.0.1
Sebastian Raschka
