In [23]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as trans
import matplotlib
import matplotlib.pyplot as plt
import torch.nn.functional as F
import time

%matplotlib inline

In [24]:
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [25]:
trainSet = dsets.MNIST(
    root='./data', 
    train=True, 
    transform=trans.ToTensor(), 
    download=True)

testSet = dsets.MNIST(root='./data', 
                      train = False, 
                      transform=trans.ToTensor(), 
                      download=True)

In [26]:
print('Number of images {}'.format(len(train_set)))
print('Type {}'.format(type(train_set[0][0])))
print('size of images {}'.format(train_set[0][0].size()))

Number of images 60000
Type <class 'torch.Tensor'>
size of images torch.Size([1, 28, 28])


In [64]:
class Model4_1(nn.Module):
    def __init__(self):
        super(Model4_1, self).__init__()
        self.linear1 = nn.Linear(784, 100)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(100, 10)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.relu1(out)
        out = self.linear2(out)
        return out
    
class Model4_2(nn.Module):
    def __init__(self):
        super(Model4_2, self).__init__()
        self.linear1 = nn.Linear(784, 100)
        self.tanh1 = nn.Tanh()
        self.linear2 = nn.Linear(100, 10)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.tanh1(out)
        out = self.linear2(out)
        return out
    
class Model4_3(nn.Module):
    def __init__(self):
        super(Model4_3, self).__init__()
        self.linear1 = nn.Linear(784, 100)
        self.sig1 = nn.Sigmoid()
        self.linear2 = nn.Linear(100, 10)
        
    def forward(self, x):
        out = self.linear1(x)
        out = self.sig1(out)
        out = self.linear2(out)
        return out

In [61]:
def accuracy(testLoader, model):    
    correct, total = 0, 0
    with torch.no_grad():
        for data in testLoader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model.forward(images.view(-1, 28*28)).to(device)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()      
    return(correct / total)


def benchmark(trainLoader, testLoader, model, epochs=1, lr=0.01):
    model.to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    #lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = lambda x: x*0.7)
    loss_function = nn.CrossEntropyLoss()
    
    start = time.time()
    for epoch in range(epochs):
        runningLoss = 0.0
        for i, (images, labels) in enumerate(trainloader):
            optimizer.zero_grad()
            images = images.view(-1, 28 * 28)
            images = images.to(device)
            labels = labels.to(device)
            out = model(images).to(device)
            loss = loss_function(out, labels)
            loss.backward()
            optimizer.step()
            runningLoss += loss.item()
        print('epoch {}, loss {}'.format(epoch, runningLoss))
        #lr_scheduler.step()
        #if epoch%5 == 0:
        #    lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda = lambda x: x*0.7)

    print('Accuracy: {0:.4f}'.format(accuracy(testLoader, model)))
    print('Training time: {0:.2f}'.format(time.time() - start))    

In [62]:
model = Model4_1()
lr = 0.05
batchSize = 100

trainloader = torch.utils.data.DataLoader(
    dataset=train_set,
    batch_size=batchSize,
    num_workers = 12,
    pin_memory=True,
    shuffle=True)

testloader = torch.utils.data.DataLoader(dataset=testSet,  
    batch_size=10000,
    num_workers = 12,
    pin_memory=True)

In [65]:
print('ReLU activation:')
benchmark(trainLoader, testloader, Model4_1(), epochs=5, lr = 0.1)
print('Tanh activation')
benchmark(trainLoader, testloader, Model4_2(), epochs=5, lr = 0.1)
print('sigmoid activation')
benchmark(trainLoader, testloader, Model4_3(), epochs=5, lr = 0.1)

ReLU activation:
epoch 0, loss 316.5952096879482
epoch 1, loss 163.8906356021762
epoch 2, loss 131.95745799690485
epoch 3, loss 110.85016337782145
epoch 4, loss 95.4335983172059
Accuracy: 0.9575
Training time: 8.86
Tanh activation
epoch 0, loss 308.6659102141857
epoch 1, loss 170.77222556620836
epoch 2, loss 142.90320280194283
epoch 3, loss 122.57304010912776
epoch 4, loss 107.83303147181869
Accuracy: 0.9520
Training time: 8.98
sigmoid activation
epoch 0, loss 688.7089750468731
epoch 1, loss 267.2554040849209
epoch 2, loss 214.40332981944084
epoch 3, loss 192.69825249910355
epoch 4, loss 179.52659180760384
Accuracy: 0.9208
Training time: 9.12
