In [2]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
from torch.autograd import Variable

In [3]:
train_dataset = dsets.MNIST(root='./data',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = dsets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())


In [4]:
# Make dataset iterable
batch_size = 100
n_iters = 3000
num_epochs = n_iters / (len(train_dataset) / batch_size)
num_epochs = int(num_epochs)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)


In [11]:
# Create model class
class FeedforwardNeuralNetModelSigmoid(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModelSigmoid, self).__init__()
        # Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        # Non-linearity
        self.sigmoid = nn.Sigmoid()
        # Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Linear function
        out = self.fcl(x)
        # Non-linearity
        out = self.sigmoid(out)
        # Linear function
        out = self.fc2(out)
        return out
    

# Create model class
class FeedforwardNeuralNetModelRelu(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModelRelu, self).__init__()
        # Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        # Non-linearity
        self.relu = nn.ReLU()
        # Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Linear function
        out = self.fcl(x)
        # Non-linearity
        out = self.relu(out)
        # Linear function
        out = self.fc2(out)
        return out
    

# Create model class
class FeedforwardNeuralNetModelTanh(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModelTanh, self).__init__()
        # Linear function
        self.fcl = nn.Linear(input_dim, hidden_dim)
        # Non-linearity
        self.tanh = nn.Tanh()
        # Linear function (readout)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Linear function
        out = self.fcl(x)
        # Non-linearity
        out = self.tanh(out)
        # Linear function
        out = self.fc2(out)
        return out

In [29]:
# Instantiate model class
input_dim = 28*28
hidden_dim = 100
output_dim = 10

sigmoidmodel = FeedforwardNeuralNetModelSigmoid(input_dim, hidden_dim, output_dim)
tanhmodel = FeedforwardNeuralNetModelTanh(input_dim, hidden_dim, output_dim)
relumodel = FeedforwardNeuralNetModelRelu(input_dim, hidden_dim, output_dim)

In [30]:
# Instantiate loss class
sigmoidcriterion = nn.CrossEntropyLoss()
tanhcriterion = nn.CrossEntropyLoss()
relucriterion = nn.CrossEntropyLoss()

In [31]:
# Instantiate optimizer class
learning_rate = 0.1

sigmoidoptimizer = torch.optim.SGD(sigmoidmodel.parameters(), lr=learning_rate)
tanhoptimizer = torch.optim.SGD(tanhmodel.parameters(), lr=learning_rate)
reluoptimizer = torch.optim.SGD(relumodel.parameters(), lr=learning_rate)

In [34]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        sigmoidoptimizer.zero_grad()
                       
        outputs = sigmoidmodel(images)
        
        loss = sigmoidcriterion(outputs, labels)
        
        loss.backward()
        
        sigmoidoptimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                outputs = sigmoidmodel(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print(f'Model: Sigmoid. Iteration: {iter}. Loss: {loss.data[0]}. Accuracy {accuracy}')

Model: Sigmoid. Iteration: 500. Loss: 0.17951397597789764. Accuracy 93.66


Model: Sigmoid. Iteration: 1000. Loss: 0.13299494981765747. Accuracy 93.78


Model: Sigmoid. Iteration: 1500. Loss: 0.21890108287334442. Accuracy 93.93


Model: Sigmoid. Iteration: 2000. Loss: 0.15049107372760773. Accuracy 94.17


Model: Sigmoid. Iteration: 2500. Loss: 0.20596104860305786. Accuracy 94.18


Model: Sigmoid. Iteration: 3000. Loss: 0.32422056794166565. Accuracy 94.38


In [35]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        tanhoptimizer.zero_grad()
                       
        outputs = tanhmodel(images)
        
        loss = tanhcriterion(outputs, labels)
        
        loss.backward()
        
        tanhoptimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                outputs = tanhmodel(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print(f'Model: TanH. Iteration: {iter}. Loss: {loss.data[0]}. Accuracy {accuracy}')

Model: TanH. Iteration: 500. Loss: 0.19587664306163788. Accuracy 91.18


Model: TanH. Iteration: 1000. Loss: 0.2994256913661957. Accuracy 92.5


Model: TanH. Iteration: 1500. Loss: 0.2552839517593384. Accuracy 93.43


Model: TanH. Iteration: 2000. Loss: 0.21518149971961975. Accuracy 94.16


Model: TanH. Iteration: 2500. Loss: 0.2553020715713501. Accuracy 94.84


Model: TanH. Iteration: 3000. Loss: 0.19940626621246338. Accuracy 95.18


In [37]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        reluoptimizer.zero_grad()
                       
        outputs = relumodel(images)
        
        loss = relucriterion(outputs, labels)
        
        loss.backward()
        
        reluoptimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                outputs = relumodel(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print(f'Model: ReLU. Iteration: {iter}. Loss: {loss.data[0]}. Accuracy {accuracy}')

Model: ReLU. Iteration: 500. Loss: 0.4034648835659027. Accuracy 92.89


Model: ReLU. Iteration: 1000. Loss: 0.24298278987407684. Accuracy 93.79


Model: ReLU. Iteration: 1500. Loss: 0.11129971593618393. Accuracy 94.71


Model: ReLU. Iteration: 2000. Loss: 0.18198589980602264. Accuracy 95.16


Model: ReLU. Iteration: 2500. Loss: 0.10730808228254318. Accuracy 95.69


Model: ReLU. Iteration: 3000. Loss: 0.1305820792913437. Accuracy 96.16


In [39]:
# 2 Hidden Layer Feedforward Neuralnetwork (ReLU)
# Create model class
class FeedforwardNeuralNetModelSigmoid2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModelSigmoid2, self).__init__()
        # Linear function
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        # Non-linearity
        self.relu1 = nn.ReLU()
        
        # Linear function 2
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        # Non-linearity 2
        self.relu2 = nn.ReLU()
        
        # Linear function (readout)
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        # Linear function
        out = self.fc1(x)
        # Non-linearity
        out = self.relu1(out)
        # Linear function 2
        out = self.fc2(out)
        # Non-linearity 2
        out = self.relu2(out)
        # Linear function
        out = self.fc3(out)
        return out

In [40]:
# Instantiate model class, loss class, optimizer class
model = FeedforwardNeuralNetModelSigmoid2(input_dim, hidden_dim, output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [41]:
iter = 0
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images.view(-1,28*28))
        labels = Variable(labels)
        
        optimizer.zero_grad()
                       
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        optimizer.step()
        
        iter += 1
        if iter % 500 == 0:
            correct = 0
            total = 0
            for images, labels in test_loader:
                images = Variable(images.view(-1, 28*28))
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                
                total += labels.size(0)
                
                correct += (predicted == labels).sum()
            
            accuracy = 100 * correct / total
            
            print(f'Model: ReLU2. Iteration: {iter}. Loss: {loss.data[0]}. Accuracy {accuracy}')

Model: ReLU2. Iteration: 500. Loss: 0.39275017380714417. Accuracy 90.75


Model: ReLU2. Iteration: 1000. Loss: 0.17468352615833282. Accuracy 93.55


Model: ReLU2. Iteration: 1500. Loss: 0.23512910306453705. Accuracy 94.69


Model: ReLU2. Iteration: 2000. Loss: 0.11508499830961227. Accuracy 95.71


Model: ReLU2. Iteration: 2500. Loss: 0.19197191298007965. Accuracy 96.14


Model: ReLU2. Iteration: 3000. Loss: 0.05572734400629997. Accuracy 96.49
