In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader

import torchvision
import torchvision.transforms as transforms

In [2]:
class Model(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim):
        super(Model, self).__init__()
        self.linear_1 = nn.Linear(input_dim, hidden_dim)
        self.linear_2 = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
    def forward(self, x):
        x = self.linear_1(x)
        x = self.relu(x) # Activation function
        x = self.linear_2(x)
        return x

In [None]:
nn.Sigmoid
nn.ReLU
nn.LeakyReLU
nn.Tanh;

In [None]:
# for muticlass classification we use softmax function to handle multiple class output with cross-entropy loss function.

In [3]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./', train=False, transform=transforms.ToTensor())

# Data loader
# mini batch size
train_loader = DataLoader(dataset=train_dataset, batch_size=128, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=100, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 109677610.70it/s]


Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 47006478.01it/s]


Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 40028774.15it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 6724507.15it/s]


Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



In [4]:
class NeuralNet(nn.Module):
  def __init__(self, input_size, output_size, hidden_size):
    super().__init__()
    self.h1 = nn.Linear(input_size, hidden_size)
    self.h2 = nn.Linear(hidden_size, hidden_size)
    self.h3 = nn.Linear(hidden_size, output_size)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.h1(x)
    x = self.sigmoid(x)
    x = self.h2(x)
    x = self.sigmoid(x)
    x = self.h3(x)

    return x

In [6]:
#input dim = 28*28, hidden = 32, output = 10
model = NeuralNet(784, 32, 10)

model = model.to('cuda')

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.9)

total_step = len(train_loader)

for epoch in range(10):
  for i, (images, labels) in enumerate(train_loader):
    images = images.reshape(-1, 28*28).to('cuda')
    labels = labels.to('cuda')

    outputs = model(images)  # forwardI(images): get prediction
    loss = loss_fn(outputs, labels)  # calculate the loss (crossentropy loss) with ground truth & prediction value

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()  # automatic gradient calculation (autograd)
    optimizer.step()  # update model parameter with requires_grad=True

    if (i+1) % 100 == 0:
        print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                .format(epoch+1, 10, i+1, total_step, loss.item()))

Epoch [1/10], Step [100/469], Loss: 2.3200
Epoch [1/10], Step [200/469], Loss: 2.1254
Epoch [1/10], Step [300/469], Loss: 1.5986
Epoch [1/10], Step [400/469], Loss: 1.3250
Epoch [2/10], Step [100/469], Loss: 0.8648
Epoch [2/10], Step [200/469], Loss: 0.6574
Epoch [2/10], Step [300/469], Loss: 0.7465
Epoch [2/10], Step [400/469], Loss: 0.4570
Epoch [3/10], Step [100/469], Loss: 0.4293
Epoch [3/10], Step [200/469], Loss: 0.4575
Epoch [3/10], Step [300/469], Loss: 0.3158
Epoch [3/10], Step [400/469], Loss: 0.3487
Epoch [4/10], Step [100/469], Loss: 0.4036
Epoch [4/10], Step [200/469], Loss: 0.3436
Epoch [4/10], Step [300/469], Loss: 0.3889
Epoch [4/10], Step [400/469], Loss: 0.2489
Epoch [5/10], Step [100/469], Loss: 0.2277
Epoch [5/10], Step [200/469], Loss: 0.2589
Epoch [5/10], Step [300/469], Loss: 0.3160
Epoch [5/10], Step [400/469], Loss: 0.2727
Epoch [6/10], Step [100/469], Loss: 0.4148
Epoch [6/10], Step [200/469], Loss: 0.2361
Epoch [6/10], Step [300/469], Loss: 0.3846
Epoch [6/10

In [7]:
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to('cuda')
        labels = labels.to('cuda')
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)  # classificatoin model -> get the label prediction of top 1
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 93.02 %
