In [1]:
# Building the network, code from 3-building-neural-network.ipynb

import torch
import torchvision
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F

train = datasets.MNIST("", train=True, download=True, 
                        transform=transforms.Compose([transforms.ToTensor()]))

test = datasets.MNIST("", train=False, download=True, 
                        transform=transforms.Compose([transforms.ToTensor()]))

trainset = torch.utils.data.DataLoader(train, batch_size=10, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=10, shuffle=True)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)  # fc1: fully connected layer 1. 28*28 = image resolution
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.fc1(x))          # relu (rectified linear unit): Activation function
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)

        return F.softmax(x, dim=1)

net = Net()

x = torch.rand((28,28))
x = x.view(1, 28*28) # x.view(28*28) and x.view(1, 28*28) are not same, check 1-intro.ipynb

output = net(x)

output

tensor([[0.1088, 0.0987, 0.1031, 0.0982, 0.0989, 0.0945, 0.1002, 0.0891, 0.1065,
         0.1020]], grad_fn=<SoftmaxBackward>)

In [2]:
# loss: Shows the correctness of the result, less loss shows that network guessed the number better
# optimizer: Adjusts weights and biases according to the loss

import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=0.001) # Adam algorithm takes in all the params (weights and biases)

# lr = learning rate.
#   In order to get our model work better, we need to find the minimum of the very complex function. 
#   (function with all the network parameters, dozens of biases, weights etc)
#   Finding minimum is like finding the minimum loss
#   But the function graph may have local minimums too.
#   if learning rate is too small, then it will stuck in one of the first local minimums
#   if learning rate is too big, then it will never find the minimum.
#   we may also adjust the learning rate.
#   check https://youtu.be/IHZwWFHWa-w for details.

EPOCHS = 3 # One Epoch is when an ENTIRE dataset is passed forward and backward through the neural network only ONCE.
