In [13]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import time

In [6]:
train_dataset = datasets.MNIST(root='./data',
                               train=True,
                              transform=transforms.ToTensor(),
                              download=True)
test_dataset = datasets.MNIST(root='./data',
                               train=False,
                              transform=transforms.ToTensor(),
                              download=True)

In [21]:
batch_size = 100
epochs = 5
iterations = epochs * len(train_dataset)/batch_size
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,
                                          shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size,
                                          shuffle=False)

In [22]:
class FeedForwardNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForwardNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.sigmoid = nn.Sigmoid()
        self.fc2 = nn.Linear(hidden_dim, output_dim)        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        return out

In [23]:
input_dim = 28*28
output_dim = 10
hidden_dim = 100
model = FeedForwardNN(input_dim, hidden_dim, output_dim)
model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

accuracy = -999
itern = 0
start_time = time.time()
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1,28*28).cuda())
        labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss_val = loss.data.cpu().numpy().reshape(1)[0]
        loss.backward()
        
        optimizer.step()
        
        itern += 1
        if itern%500 ==0:
            correct = 0
            total = 0
            for test_images, test_labels in test_loader:
                test_images = Variable(test_images.view(-1, 28*28).cuda())
                test_preds = model(test_images)
                _, predicted = torch.max(test_preds.data, 1)  
                predicted = predicted.cpu()
                total +=  test_labels.size(0)
                correct += (predicted == test_labels).sum()
            correct = correct.numpy().reshape(1)[0]
            accuracy = 100 * correct/total    
            print('Epoch {}, Iter {}, Loss {}, Accuracy {}'.format(epoch, itern,loss_val, accuracy))
print('Training time {}'.format(time.time() - start_time))

Epoch 0, Iter 500, Loss 0.5741844177246094, Accuracy 86.26
Epoch 1, Iter 1000, Loss 0.3612525165081024, Accuracy 89.44
Epoch 2, Iter 1500, Loss 0.4132201075553894, Accuracy 90.53
Epoch 3, Iter 2000, Loss 0.28452762961387634, Accuracy 91.26
Epoch 4, Iter 2500, Loss 0.237995445728302, Accuracy 91.75
Epoch 4, Iter 3000, Loss 0.3538167178630829, Accuracy 92.05
Training time 43.8708975315094


In [27]:
class FeedForwardNN_tanh(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForwardNN_tanh, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.tanh = nn.Tanh()
        self.fc2 = nn.Linear(hidden_dim, output_dim)        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        return out

In [28]:
input_dim = 28*28
output_dim = 10
hidden_dim = 100
model = FeedForwardNN_tanh(input_dim, hidden_dim, output_dim)
model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

accuracy = -999
itern = 0
start_time = time.time()
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1,28*28).cuda())
        labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss_val = loss.data.cpu().numpy().reshape(1)[0]
        loss.backward()
        
        optimizer.step()
        
        itern += 1
        if itern%500 ==0:
            correct = 0
            total = 0
            for test_images, test_labels in test_loader:
                test_images = Variable(test_images.view(-1, 28*28).cuda())
                test_preds = model(test_images)
                _, predicted = torch.max(test_preds.data, 1)  
                predicted = predicted.cpu()
                total +=  test_labels.size(0)
                correct += (predicted == test_labels).sum()
            correct = correct.numpy().reshape(1)[0]
            accuracy = 100 * correct/total    
            print('Epoch {}, Iter {}, Loss {}, Accuracy {}'.format(epoch, itern,loss_val, accuracy))
print('Training time {}'.format(time.time() - start_time))

Epoch 0, Iter 500, Loss 0.5178766846656799, Accuracy 90.77
Epoch 1, Iter 1000, Loss 0.28360798954963684, Accuracy 92.51
Epoch 2, Iter 1500, Loss 0.2293909639120102, Accuracy 93.43
Epoch 3, Iter 2000, Loss 0.20383557677268982, Accuracy 93.82
Epoch 4, Iter 2500, Loss 0.16912244260311127, Accuracy 94.6
Epoch 4, Iter 3000, Loss 0.09085430204868317, Accuracy 95.05
Training time 44.58248162269592


In [29]:
class FeedForwardNN_relu(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForwardNN_relu, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)        
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [30]:
input_dim = 28*28
output_dim = 10
hidden_dim = 100
model = FeedForwardNN_relu(input_dim, hidden_dim, output_dim)
model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

accuracy = -999
itern = 0
start_time = time.time()
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1,28*28).cuda())
        labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss_val = loss.data.cpu().numpy().reshape(1)[0]
        loss.backward()
        
        optimizer.step()
        
        itern += 1
        if itern%500 ==0:
            correct = 0
            total = 0
            for test_images, test_labels in test_loader:
                test_images = Variable(test_images.view(-1, 28*28).cuda())
                test_preds = model(test_images)
                _, predicted = torch.max(test_preds.data, 1)  
                predicted = predicted.cpu()
                total +=  test_labels.size(0)
                correct += (predicted == test_labels).sum()
            correct = correct.numpy().reshape(1)[0]
            accuracy = 100 * correct/total    
            print('Epoch {}, Iter {}, Loss {}, Accuracy {}'.format(epoch, itern,loss_val, accuracy))
print('Training time {}'.format(time.time() - start_time))

Epoch 0, Iter 500, Loss 0.3471400737762451, Accuracy 91.43
Epoch 1, Iter 1000, Loss 0.21722669899463654, Accuracy 92.81
Epoch 2, Iter 1500, Loss 0.14075396955013275, Accuracy 94.0
Epoch 3, Iter 2000, Loss 0.16439397633075714, Accuracy 94.5
Epoch 4, Iter 2500, Loss 0.3972584903240204, Accuracy 95.4
Epoch 4, Iter 3000, Loss 0.3499299883842468, Accuracy 95.56
Training time 44.519078731536865


In [31]:
class FeedForwardNN_multihidden(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(FeedForwardNN_multihidden, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim) 
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [32]:
input_dim = 28*28
output_dim = 10
hidden_dim = 100
model = FeedForwardNN_multihidden(input_dim, hidden_dim, output_dim)
model.cuda()

criterion = nn.CrossEntropyLoss()
learning_rate = 0.1
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

accuracy = -999
itern = 0
start_time = time.time()
for epoch in range(epochs):
    for i, (images, labels) in enumerate(train_loader):
        
        images = Variable(images.view(-1,28*28).cuda())
        labels = Variable(labels.cuda())
        
        optimizer.zero_grad()
        
        outputs = model(images)
        
        loss = criterion(outputs, labels)
        loss_val = loss.data.cpu().numpy().reshape(1)[0]
        loss.backward()
        
        optimizer.step()
        
        itern += 1
        if itern%500 ==0:
            correct = 0
            total = 0
            for test_images, test_labels in test_loader:
                test_images = Variable(test_images.view(-1, 28*28).cuda())
                test_preds = model(test_images)
                _, predicted = torch.max(test_preds.data, 1)  
                predicted = predicted.cpu()
                total +=  test_labels.size(0)
                correct += (predicted == test_labels).sum()
            correct = correct.numpy().reshape(1)[0]
            accuracy = 100 * correct/total    
            print('Epoch {}, Iter {}, Loss {}, Accuracy {}'.format(epoch, itern,loss_val, accuracy))
print('Training time {}'.format(time.time() - start_time))

Epoch 0, Iter 500, Loss 0.22614851593971252, Accuracy 91.46
Epoch 1, Iter 1000, Loss 0.18892601132392883, Accuracy 93.7
Epoch 2, Iter 1500, Loss 0.11356744915246964, Accuracy 95.02
Epoch 3, Iter 2000, Loss 0.10446832329034805, Accuracy 95.62
Epoch 4, Iter 2500, Loss 0.15353454649448395, Accuracy 95.95
Epoch 4, Iter 3000, Loss 0.09241490066051483, Accuracy 96.55
Training time 45.56317210197449
