In [17]:
# importing necessary packages
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [18]:
# setting hyperparameters
batch_size = 64
input_size = 784
hidden_size = 400
num_classes = 10
num_epochs = 3
learning_rate = 0.001

In [19]:
# downalod dataset 
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=False)

# dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=batch_size,
                                           shuffle=False)

In [20]:
# adding cuda device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [21]:
# model class
class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.input_size = input_size
    self.l1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.l2 = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out) 

    return out

In [22]:
# initialize model
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
model

NeuralNet(
  (l1): Linear(in_features=784, out_features=400, bias=True)
  (relu): ReLU()
  (l2): Linear(in_features=400, out_features=10, bias=True)
)

In [23]:
# initialize optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
# training the model
n_total_steps = len(train_loader)

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # original size = [64, 1, 28, 28]
    # reshaped size = [64, 784]
    images = images.reshape(-1, 28*28).to(device)
    labels = labels.to(device)

    # forward pass
    output = model(images)

    # calculating loss
    loss = criterion(output, labels)

    # backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')

Epoch [1/3], Step [100/938], Loss: 0.6802
Epoch [1/3], Step [200/938], Loss: 0.1924
Epoch [1/3], Step [300/938], Loss: 0.2298
Epoch [1/3], Step [400/938], Loss: 0.1110
Epoch [1/3], Step [500/938], Loss: 0.2203
Epoch [1/3], Step [600/938], Loss: 0.1338
Epoch [1/3], Step [700/938], Loss: 0.2731
Epoch [1/3], Step [800/938], Loss: 0.1698
Epoch [1/3], Step [900/938], Loss: 0.1887
Epoch [2/3], Step [100/938], Loss: 0.1498
Epoch [2/3], Step [200/938], Loss: 0.1316
Epoch [2/3], Step [300/938], Loss: 0.0720
Epoch [2/3], Step [400/938], Loss: 0.0851
Epoch [2/3], Step [500/938], Loss: 0.0534
Epoch [2/3], Step [600/938], Loss: 0.1417
Epoch [2/3], Step [700/938], Loss: 0.0899
Epoch [2/3], Step [800/938], Loss: 0.1242
Epoch [2/3], Step [900/938], Loss: 0.0164
Epoch [3/3], Step [100/938], Loss: 0.0506
Epoch [3/3], Step [200/938], Loss: 0.0505
Epoch [3/3], Step [300/938], Loss: 0.0182
Epoch [3/3], Step [400/938], Loss: 0.0263
Epoch [3/3], Step [500/938], Loss: 0.0435
Epoch [3/3], Step [600/938], Loss: