In [1]:
import torch
import torchvision
from torchvision import transforms, datasets

import torch.nn as nn # oop
import torch.nn.functional as F # functional, pass parameter every time. 

import torch.optim as optim

In [2]:
train = datasets.FashionMNIST("", train=True, download=True, transform = transforms.Compose([transforms.ToTensor()]))
test = datasets.FashionMNIST("", train=False, download=True, transform = transforms.Compose([transforms.ToTensor()]))

# training dataset
train_set = torch.utils.data.DataLoader(train, batch_size = 128, shuffle = True)

# test dataset
test_set = torch.utils.data.DataLoader(test, batch_size = 128, shuffle = True)

# defined our batch size as 256. 

### In the cell below, we are going to be building a Feedforward Neural Network (FNN).

### In this type of network, the information moves in only one direction—forward—from the input nodes, through the hidden nodes and to the output nodes at last. There are no cycles or loops.

### More about FNN: https://www.deeplearningbook.org/contents/mlp.html

In [3]:
class Net(nn.Module): # inheriting from nn.Module
    def __init__(self):
        super().__init__() # running the initialization for nn.Module
        
        # We are going to building a Feedforward Neural Network (FNN).
        
        # In this type of network, the information moves in only one direction—forward—from the input nodes,
        # through the hidden nodes and to the output nodes at last. There are no cycles or loops.
        
        # More about FNN: https://www.deeplearningbook.org/contents/mlp.html
        
        self.layer1 = nn.Linear(784, 128) # first fully connected layer from fully connected (fc) network
        # Input: 784 (28*28)
        # 128 : number of neurons - in the hidden layer
        
        self.layer2 = nn.Linear(128, 64)
        # The output of layer1 has to be the input for layer2, which is 128. 
        
        self.layer3 = nn.Linear(64, 64)
        
        self.layer4 = nn.Linear(64, 10)
        # This our output layer. Output is set as 10 since we have 10 labels.
    
    def forward(self, x):
        
       # We have used ReLu as our activation function. 
        
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        x = self.layer4(x) # We don't need activation function in the output layer.
        
        # In the output, we want a probability distribution. So, we use soft-max for that.

        return F.softmax(x, dim=1)
        

my_net = Net() # creating a neural network object. 
print(my_net) # printing a summary of our nn object. 

Net(
  (layer1): Linear(in_features=784, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=64, bias=True)
  (layer3): Linear(in_features=64, out_features=64, bias=True)
  (layer4): Linear(in_features=64, out_features=10, bias=True)
)


In [4]:
optimizer = optim.Adam(my_net.parameters(), lr = 2e-3)

###  Adam is an optimization algorithm that can be used instead of the classical stochastic gradient descent (SGD) to update network weights and biases based in training data.

### Read about Adam more at: https://machinelearningmastery.com/adam-optimization-algorithm-for-deep-learning/

### lr: learning rate. dictates size of the step which the optimizer will take.

In [5]:
EPOCHS = 10

for epoch in range(EPOCHS):
    for data in train_set:
                    
        x, y = data
        
        my_net.zero_grad()
        # Sets the gradients of all optimized torch.Tensor s to zero.
        
        # for every mini-batch during the training phase, we typically want to explicitly set
        # the gradients to zero before starting to do backpropragation.
        # (i.e. updating weights and biases)

        x = x.view(-1, 28*28) # reshaping.

        output = my_net(x)
        loss = nn.CrossEntropyLoss()
        loss_calc = loss(output, y)
        
        # output: The result we found; y: What it should be (target).
        
        # In this case, CrossEntropyLoss is used since this is a multi-class classification problem.
        # More about loss functions: https://analyticsindiamag.com/loss-functions-in-deep-learning-an-overview/
        
        
        loss_calc.backward()
        # gradients are being computed
        
        optimizer.step()
        # calculated gradient values are plugged in. 
        
    print("Epoch:", epoch+1, "- Loss:", loss_calc.item())

Epoch: 1 - Loss: 1.7585844993591309
Epoch: 2 - Loss: 1.7413250207901
Epoch: 3 - Loss: 1.7327237129211426
Epoch: 4 - Loss: 1.7772029638290405
Epoch: 5 - Loss: 1.7265501022338867
Epoch: 6 - Loss: 1.6875442266464233
Epoch: 7 - Loss: 1.637118935585022
Epoch: 8 - Loss: 1.6576181650161743
Epoch: 9 - Loss: 1.6405973434448242
Epoch: 10 - Loss: 1.617505669593811


In [6]:
correct_cases = 0
total_cases = 0

# Let's test the network on test data.

with torch.no_grad():
    
    for data in test_set:
        
        x, y = data
        
        output = my_net(x.view(-1,28*28)) # reshaping our batch.   
    
        # let's check whether the results we found from "output" match with the target.
        for index, values in enumerate(output):
            
            if torch.argmax(values) == y[index]:
                correct_cases += 1
                
            total_cases += 1

print("Test Accuracy: ", round(correct_cases*100/total_cases, 2)) # print the test accuracy, round it up to 2 decimals. 

Test Accuracy:  84.97


### For better accuracy adjustments can be made with hyperparameters. 