<a href="https://colab.research.google.com/github/Azadshokrollahi/Advance-machine-learning/blob/develop/0-basics_intro/2-neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Build a Neural Network #

Neural networks comprise of layers/modules that perform operations on data. The torch.nn namespace provides all the building blocks you need to build your own neural network. Every module in PyTorch subclasses the nn.Module. A neural network is a module itself that consists of other modules (layers). This nested structure allows for building and managing complex architectures easily.

In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [None]:
# We define our neural network by subclassing nn.Module, and initialize the neural network layers in __init__.
# Every nn.Module subclass implements the operations on input data in the forward method.

class NeuralNetwork(nn.Module):
    def __init__(self, input_width, input_height, label_dim):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten() # Used to convert the 2D input into 1D (28x28 to 784)
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_width*input_height, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, label_dim),
            nn.ReLU()
        )
        self.softmax = nn.Softmax(dim=1) # y = [2.5, 0.9, 0.2] -> softmax(y) -> [0.768, 0.155, 0.077] (if you want to test https://keisan.casio.com/exec/system/15168444286206)
        self.softmax_result = 0

    def forward(self, x):
        x = self.flatten(x) #[64, 28, 28] -> [64, 784] [batch size, width, height]
        logits = self.linear_relu_stack(x)
        self.softmax_result = self.softmax(logits)
        return logits

### One by one the modules ###


In [None]:
input_image = torch.rand(3,28,28) # 3 is used to exemplify a mini-batch
print(input_image.size())

torch.Size([3, 28, 28])


In [None]:
# We initialize the nn.Flatten layer to convert each 2D 28x28
# image into a contiguous array of 784 pixel values ( the minibatch dimension (at dim=0) is maintained).

flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [None]:
layer1 = nn.Linear(in_features=28*28, out_features=20) # Linear transformation
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


Non-linear activations are what create the complex mappings between the model’s inputs and outputs. They are applied after linear transformations to introduce nonlinearity, helping neural networks learn a wide variety of phenomena.

In this model, we use nn.ReLU between our linear layers, but there’s other activations to introduce non-linearity in your model such as LeakyReLU, Sigmoid, TANH, etc.

In [None]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.5521, -0.1763,  0.2181,  0.3600,  0.1885, -0.4228, -0.1503,  0.5199,
          0.0754,  0.8847,  0.4356, -0.3666, -0.7521,  0.3163,  0.0755, -0.0263,
          0.3717,  0.1715, -0.2635, -0.4260],
        [-0.3759, -0.4892,  0.0807,  0.3759,  0.3675, -0.0627, -0.2016,  0.5176,
         -0.1489,  0.7896,  0.5289,  0.1737, -0.4503, -0.1660, -0.0389, -0.2756,
          0.2657, -0.1171,  0.1742, -0.6998],
        [-0.3712, -0.4284, -0.0033,  0.4337,  0.5216, -0.0041, -0.1897,  0.4734,
          0.0084,  0.5866,  0.5368, -0.1823, -0.0847, -0.2730, -0.1109, -0.1300,
          0.1685,  0.2679, -0.2757, -0.6517]], grad_fn=<AddmmBackward>)


After ReLU: tensor([[0.0000, 0.0000, 0.2181, 0.3600, 0.1885, 0.0000, 0.0000, 0.5199, 0.0754,
         0.8847, 0.4356, 0.0000, 0.0000, 0.3163, 0.0755, 0.0000, 0.3717, 0.1715,
         0.0000, 0.0000],
        [0.0000, 0.0000, 0.0807, 0.3759, 0.3675, 0.0000, 0.0000, 0.5176, 0.0000,
         0.7896, 0.5289, 0.1737, 0.0000, 0.0000, 0.000

***
nn.Sequential is an ordered container of modules. The data is passed through all the modules in the same order as defined. You can use sequential containers to put together a quick network like seq_modules.

In [None]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)
print(logits)

tensor([[ 0.2222,  0.0818,  0.4511,  0.2300, -0.2320, -0.0505, -0.2158,  0.0603,
          0.3155,  0.2240],
        [ 0.2312,  0.0899,  0.4114,  0.3050, -0.1691,  0.0097, -0.1962,  0.1820,
          0.3438,  0.2535],
        [ 0.0462,  0.2974,  0.2842,  0.1784, -0.2367,  0.0944, -0.1067,  0.0969,
          0.2591,  0.2555]], grad_fn=<AddmmBackward>)


***
Finally, we use the nn.softmax, which scale the logits values to [0, 1] from [-infty. infty], representing the models predicted probabilties.

`dim` indicates which dimension should softmax be applied (dim=1 so we don't mess with the minibatch)

In [None]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)

tensor([[0.1096, 0.0952, 0.1378, 0.1104, 0.0696, 0.0834, 0.0707, 0.0932, 0.1203,
         0.1098],
        [0.1068, 0.0928, 0.1279, 0.1150, 0.0716, 0.0856, 0.0697, 0.1017, 0.1196,
         0.1093],
        [0.0919, 0.1182, 0.1166, 0.1049, 0.0693, 0.0965, 0.0789, 0.0967, 0.1137,
         0.1133]], grad_fn=<SoftmaxBackward>)


In [None]:
model = NeuralNetwork(28, 28, 10).to(device)
print(model) # You can see the networks structure

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
  (softmax): Softmax(dim=1)
)


In [None]:
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
  (softmax): Softmax(dim=1)
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0354,  0.0149, -0.0158,  ...,  0.0004, -0.0142, -0.0113],
        [-0.0212, -0.0163, -0.0003,  ...,  0.0180, -0.0115,  0.0136]],
       device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0305, -0.0159], device='cuda:0', grad_fn=<SliceBackward>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0145,  0.0071,  0.0184,  ...,  0.0432,  0.0077, -0.0437],
        [-0.0084,  0.0142, -0.0265,  ...,  0.0267,  0.0361, -0

### Use the model ###

In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
print(pred_probab)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

print(model.softmax_result)
pred_probab = model.softmax_result.argmax(1)  # Done directly in the network?
print(f"Predicted class: {y_pred}")


tensor([[0.0969, 0.1023, 0.0969, 0.1053, 0.1056, 0.1003, 0.0969, 0.1003, 0.0969,
         0.0987]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Predicted class: tensor([4], device='cuda:0')
tensor([[0.0969, 0.1023, 0.0969, 0.1053, 0.1056, 0.1003, 0.0969, 0.1003, 0.0969,
         0.0987]], device='cuda:0', grad_fn=<SoftmaxBackward>)
Predicted class: tensor([4], device='cuda:0')
