<a href="https://colab.research.google.com/github/mustaqimkhan317/deep_learning_in_pytorch/blob/main/two_layer_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# importing necessary packages
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [2]:
# setting hyperparameters
batch_size = 64
input_size = 784
hidden_size = 400
num_classes = 10
num_epochs = 3
learning_rate = 0.001

In [3]:
# downalod dataset 
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=False)

# dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# adding cuda device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [5]:
# model class
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.input_size = input_size
    self.l1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.l2 = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out) 

    return out

In [6]:
# initialize model
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model

NeuralNet(
  (l1): Linear(in_features=784, out_features=400, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=400, out_features=10, bias=True)
)

In [7]:
# initialize optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
# training the model
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # original size = [64, 1, 28, 28]
    # reshaped size = [64, 784]
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)

    # forward pass
    output = model(images)

    # calculating loss
    loss = criterion(output, labels)

    # backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/938], Loss: 0.5088
Epoch [1/3], Step [200/938], Loss: 0.3359
Epoch [1/3], Step [300/938], Loss: 0.3581
Epoch [1/3], Step [400/938], Loss: 0.3227
Epoch [1/3], Step [500/938], Loss: 0.4277
Epoch [1/3], Step [600/938], Loss: 0.2461
Epoch [1/3], Step [700/938], Loss: 0.0948
Epoch [1/3], Step [800/938], Loss: 0.0163
Epoch [1/3], Step [900/938], Loss: 0.3323
Epoch [2/3], Step [100/938], Loss: 0.2049
Epoch [2/3], Step [200/938], Loss: 0.0661
Epoch [2/3], Step [300/938], Loss: 0.0848
Epoch [2/3], Step [400/938], Loss: 0.1179
Epoch [2/3], Step [500/938], Loss: 0.2171
Epoch [2/3], Step [600/938], Loss: 0.0421
Epoch [2/3], Step [700/938], Loss: 0.2130
Epoch [2/3], Step [800/938], Loss: 0.0759
Epoch [2/3], Step [900/938], Loss: 0.0524
Epoch [3/3], Step [100/938], Loss: 0.0622
Epoch [3/3], Step [200/938], Loss: 0.0715
Epoch [3/3], Step [300/938], Loss: 0.0922
Epoch [3/3], Step [400/938], Loss: 0.0901
Epoch [3/3], Step [500/938], Loss: 0.0487
Epoch [3/3], Step [600/938], Loss:

In [9]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 97.61 %
