In [1]:
import numpy as np
import torch
import torch.nn as nn

import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [2]:
torch.cuda.is_available()

True

In [3]:
input_size = 784        #Number of input neurons (image pixels)
hidden_size = 400       #Number of hidden neurons
out_size = 10           #Number of classes (0-9) 
epochs = 10            #How many times we pass our entire dataset into our network 
batch_size = 100        #Input size of the data during one iteration 
learning_rate = 0.001   #How fast we are learning

In [4]:
train_dataset = datasets.MNIST(root='./data',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = datasets.MNIST(root='./data',
                           train=False,
                           transform=transforms.ToTensor())

In [12]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [5]:
train_loader = torch.utils.data.DataLoader(
                    dataset = train_dataset,
                    batch_size = batch_size,
                    shuffle = True

)

test_loader = torch.utils.data.DataLoader(
                    dataset = test_dataset,
                    batch_size = batch_size,
                    shuffle = True

)

In [6]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(Net, self).__init__()                    
        self.fc1 = nn.Linear(input_size, hidden_size)    #First Layer                           
        self.fc2 = nn.Linear(hidden_size, hidden_size)      #Second Layer Activation
        self.fc3 = nn.Linear(hidden_size, out_size)
        self.relu = nn.ReLU()
        self.init_weights()
        
    def init_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)

    def forward(self, x):                          
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [7]:
#Create an object of the class, which represents our network 
net = Net(input_size, hidden_size, out_size)
CUDA = torch.cuda.is_available()
if CUDA:
    net = net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [14]:
net

Net(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (fc2): Linear(in_features=400, out_features=400, bias=True)
  (fc3): Linear(in_features=400, out_features=10, bias=True)
  (relu): ReLU()
)

In [8]:
CUDA

True

In [9]:
#Train the network
for epoch in range(epochs):
    correct_train = 0
    running_loss = 0
    for i, (images, labels) in enumerate(train_loader):   
        #Flatten the image from size (batch,1,28,28) --> (100,1,28,28) where 1 represents the number of channels (grayscale-->1),
        # to size (100,784) and wrap it in a variable
        images = images.view(-1, 28*28)    
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = net(images)      # or net.forward(images) 
        _, predicted = torch.max(outputs.data, 1)                                              
        correct_train += (predicted == labels).sum() 
        loss = criterion(outputs, labels)                 # Difference between the actual and predicted (loss function)
        running_loss += loss.item()
        optimizer.zero_grad() 
        loss.backward()                                   # Backpropagation
        optimizer.step()                                  # Update the weights
        
    print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format
          (epoch+1, epochs, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))
print("DONE TRAINING!")

Epoch [1/10], Training Loss: 0.235, Training Accuracy: 93.215%
Epoch [2/10], Training Loss: 0.085, Training Accuracy: 97.450%
Epoch [3/10], Training Loss: 0.056, Training Accuracy: 98.233%
Epoch [4/10], Training Loss: 0.040, Training Accuracy: 98.735%
Epoch [5/10], Training Loss: 0.029, Training Accuracy: 99.055%
Epoch [6/10], Training Loss: 0.023, Training Accuracy: 99.242%
Epoch [7/10], Training Loss: 0.019, Training Accuracy: 99.388%
Epoch [8/10], Training Loss: 0.017, Training Accuracy: 99.408%
Epoch [9/10], Training Loss: 0.014, Training Accuracy: 99.550%
Epoch [10/10], Training Loss: 0.012, Training Accuracy: 99.582%
DONE TRAINING!


In [10]:
with torch.no_grad():
    correct = 0
    for images, labels in test_loader:
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
        images = images.view(-1, 28*28)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / len(test_dataset)))

Accuracy of the network on the 10000 test images: 97.96 %


In [15]:
net.forward(images)

tensor([[-9.0333e+00, -1.0475e+01, -1.2522e+01, -3.3830e+00, -1.1068e+01,
          1.6525e+01, -5.0967e+00, -2.7984e+00, -5.9667e+00, -1.3520e+01],
        [-1.5367e+01,  1.6661e+01, -1.6054e+01, -1.2352e+01, -4.4291e+00,
         -6.6593e+00, -9.3461e+00, -7.7103e+00,  9.4426e-02, -4.1081e+00],
        [-1.0119e+01,  1.8365e+01, -1.3075e+01, -1.3762e+01, -4.5878e+00,
         -1.2375e+01, -9.1136e+00, -3.9374e+00, -2.0544e+00, -5.1624e+00],
        [-1.4485e+01, -3.6647e+00, -1.0975e+01, -1.7756e+01,  2.2725e+01,
         -9.8484e+00, -8.2045e+00, -8.2705e+00, -9.7952e+00, -7.5865e+00],
        [-1.9635e+01, -8.6409e+00, -8.1364e+00,  2.6337e+01, -1.7182e+01,
         -1.6424e+00, -3.2481e+01, -1.3226e+01, -9.0886e+00, -3.2145e+00],
        [-7.8065e+00, -1.3303e+01, -2.1049e+01, -6.8300e+00, -1.8771e+01,
          2.7013e+01, -9.3806e+00, -8.9378e+00, -5.7336e+00, -8.9025e+00],
        [-1.6300e+01,  4.9388e+00,  1.7932e+01, -3.7568e+00, -1.1021e+01,
         -1.0162e+01, -1.2739e+0