In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt

# We would be using CPU while we train on OneForAll, but for future purposes, 
# we will write our code to run in the CUDA memory too
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = 'cuda:0'  # comment this line if the system is not OneForAll
print(device)

cuda:0


In [None]:
normalize = transforms.Normalize(mean = (0.1307, ), std=(0.3081, ))
dataset_train = torchvision.datasets.MNIST('data/', train=True, download=True, transform = transforms.Compose([transforms.ToTensor(), normalize,]))
dataset_test = torchvision.datasets.MNIST('data/', train=False, download=True, transform = transforms.Compose([transforms.ToTensor(), normalize,]))


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=9912422.0), HTML(value='')))


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=28881.0), HTML(value='')))


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=1648877.0), HTML(value='')))


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4542.0), HTML(value='')))


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
batchSize = 200
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batchSize, shuffle=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=batchSize, shuffle=False)

In [None]:
# create a 3 layer network with 1024 units in each hidden layer 
class Net(nn.Module):
    def __init__(self, hiddenUnits):
        super().__init__() 
        self.fc1 = nn.Linear(28 * 28, hiddenUnits[0])
        self.fc2 = nn.Linear(hiddenUnits[0], hiddenUnits[1])
        self.fc3 = nn.Linear(hiddenUnits[1], hiddenUnits[2])
        self.fc4 = nn.Linear(hiddenUnits[2], 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x))  
        return x

In [None]:
# create an object of network
net = Net([1024, 1024, 1024])
net = net.float()
net.to(device)

# create a loss function, optimizer
criterion = nn.NLLLoss()    
optimizer = optim.SGD(net.parameters(), lr = 1e-2, momentum = 0.9)  
decayRate = 0.96    # learning rate decay
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma = decayRate)  

In [None]:
# learning loop
loss_list = []
for epoch in range(20):
    scheduler.step() # learning rate decay at every epoch
    tot_loss = 0        # accumulates the loss for every epoch
    for idx, data in enumerate(dataloader_train):
        x, y = data
        x = x.view(batchSize, -1)   # put data into 2D tensor format
        x = x.to(device)    # send the data to device (cuda if available)
        y = y.to(device)

        optimizer.zero_grad()   # start with zero gradients (graph is NULL)
        out = net(x.float())    # compute the outputs
        loss = criterion(out, y.long()) # compute the loss
        tot_loss += loss.detach().cpu().data
        loss.backward() # Compyte the gradients 
        optimizer.step()    # update the weights using the gradients computed in the previous step

    loss_list.append(tot_loss)
    print('Epoch: {}, loss = {}'.format(epoch, tot_loss))

plt.plot(loss_list)
plt.show()

  


Epoch: 0, loss = 183.2996063232422
Epoch: 1, loss = 53.616294860839844
Epoch: 2, loss = 35.529022216796875
Epoch: 3, loss = 25.996177673339844
Epoch: 4, loss = 20.101022720336914
Epoch: 5, loss = 15.777689933776855
Epoch: 6, loss = 12.387591361999512


In [None]:
with torch.no_grad():
    # testing loop
    tot_loss = 0
    tot_correct = 0
    for idx, data in enumerate(dataloader_test):
        x, y = data
        x = x.view(batchSize, -1)   # put data into 2D tensor format
        x = x.to(device)

        out = net(x).cpu()
        tot_loss += criterion(out, y).data
        pred = out.data.max(1)[1]
        tot_correct += pred.eq(y.data).sum().float()

print('total loss at testing', tot_loss)
print('accuracy at testing', tot_correct/100)