<a href="https://colab.research.google.com/github/mustaqimkhan317/deep_learning_in_pytorch/blob/main/two_layer_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
# importing necessary packages
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [20]:
# setting hyperparameters
batch_size = 64
input_size = 784
hidden_size = 400
num_classes = 10
num_epochs = 3
learning_rate = 0.001

In [21]:
# downalod dataset 
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=False)

# dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

In [22]:
# adding cuda device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [23]:
# model class
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.input_size = input_size
    self.l1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.l2 = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out) 

    return out

In [24]:
# initialize model
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model

NeuralNet(
  (l1): Linear(in_features=784, out_features=400, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=400, out_features=10, bias=True)
)

In [25]:
# initialize optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
# training the model
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # original size = [64, 1, 28, 28]
    # reshaped size = [64, 784]
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)

    # forward pass
    output = model(images)

    # calculating loss
    loss = criterion(output, labels)

    # backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/938], Loss: 0.5118
Epoch [1/3], Step [200/938], Loss: 0.2242
Epoch [1/3], Step [300/938], Loss: 0.3865
Epoch [1/3], Step [400/938], Loss: 0.1957
Epoch [1/3], Step [500/938], Loss: 0.2689
Epoch [1/3], Step [600/938], Loss: 0.1662
Epoch [1/3], Step [700/938], Loss: 0.2177
Epoch [1/3], Step [800/938], Loss: 0.1959
Epoch [1/3], Step [900/938], Loss: 0.2168
Epoch [2/3], Step [100/938], Loss: 0.1896
Epoch [2/3], Step [200/938], Loss: 0.1098
Epoch [2/3], Step [300/938], Loss: 0.2225
Epoch [2/3], Step [400/938], Loss: 0.1329
Epoch [2/3], Step [500/938], Loss: 0.1240
Epoch [2/3], Step [600/938], Loss: 0.1703
Epoch [2/3], Step [700/938], Loss: 0.0998
Epoch [2/3], Step [800/938], Loss: 0.0709
Epoch [2/3], Step [900/938], Loss: 0.0869
Epoch [3/3], Step [100/938], Loss: 0.0369
Epoch [3/3], Step [200/938], Loss: 0.0636
Epoch [3/3], Step [300/938], Loss: 0.0762
Epoch [3/3], Step [400/938], Loss: 0.0145
Epoch [3/3], Step [500/938], Loss: 0.1227
Epoch [3/3], Step [600/938], Loss:

In [27]:
# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Accuracy of the network on the 10000 test images: 97.63 %
