## Implementing a MultiLayer Neural Network

In [1]:
import torch

#### defining a model class

In [2]:
class NeuralNetwork(torch.nn.Module):
    def __init__(self, num_inputs: int, hidden_size: int, num_outputs: int):
        super().__init__()
        self.layers = torch.nn.Sequential(
            # 1st hidden layer
            torch.nn.Linear(num_inputs, hidden_size),
            torch.nn.ReLU(),
            # 2nd hidden layer
            torch.nn.Linear(hidden_size, hidden_size),
            torch.nn.ReLU(),
            # output layer
            torch.nn.Linear(hidden_size, num_outputs)
        )
        
    def forward(self, x):
        logits = self.layers(x)
        return logits

In [3]:
model_0 = NeuralNetwork(5, 3, 2)

#### summary of model

In [4]:
model_0

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=5, out_features=3, bias=True)
    (1): ReLU()
    (2): Linear(in_features=3, out_features=3, bias=True)
    (3): ReLU()
    (4): Linear(in_features=3, out_features=2, bias=True)
  )
)

#### total trainable parameter of model

In [5]:
total_parameters_model_0 = sum(p.numel() for p in model_0.parameters())

total_parameters_model_0

38

#### accessing model each layer and its weight (in_features * no_of_neuron)

In [6]:
model_0.layers[0].weight

Parameter containing:
tensor([[-0.3269,  0.0640, -0.2838, -0.4184,  0.2833],
        [ 0.1141, -0.2852, -0.4234, -0.2140, -0.3054],
        [ 0.1560,  0.0231,  0.4014, -0.2042, -0.0227]], requires_grad=True)

#### accessing model layer bias (b_size = no_of_neurons)

In [7]:
model_0.layers[0].bias

Parameter containing:
tensor([ 0.1576,  0.0547, -0.0819], requires_grad=True)

#### forward propagation

In [8]:
X = torch.rand((3, 5)) # because 5 is our input feature 
X

tensor([[0.0446, 0.9921, 0.4155, 0.5430, 0.9817],
        [0.6187, 0.7089, 0.8644, 0.2504, 0.7849],
        [0.8849, 0.4689, 0.7500, 0.8020, 0.4507]])

In [9]:
out = model_0(X)

out # out is 2 prediction and for 3 input

tensor([[-0.5175,  0.0748],
        [-0.5160,  0.0891],
        [-0.5160,  0.0889]], grad_fn=<AddmmBackward0>)

#### no grad
If we just want to use the model for prediction after `training or backpropagation` it is expensive to make that `computational graph` which is used for `backpropagation` so we use context manager while inference.

In [10]:
with torch.no_grad():
    out = model_0(X)
    
out # here there is not grad_fn for backpropagation

tensor([[-0.5175,  0.0748],
        [-0.5160,  0.0891],
        [-0.5160,  0.0889]])

#### using softmax to get the prob from war logit output of inference

In [11]:
with torch.no_grad():
    out= torch.softmax(model_0(X), dim=1)
    
print(out)

tensor([[0.3561, 0.6439],
        [0.3532, 0.6468],
        [0.3532, 0.6468]])


## Setting up Efficient Data Loaders

#### creating a toy dataset

In [12]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
])

y_train = torch.tensor([0, 0, 0, 1, 1])

In [13]:
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6]
])
y_test = torch.tensor([0, 1])

#### class label numbering
Our task has two class 0 and 1 which is `Binary Classification` and if there were four classes `0 1 2 3` there should be four output neuron.

#### defining a custom dataset class for dataloader

In [14]:
from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y
        
    def __getitem__(self, index):
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y
    
    def __len__(self):
        return self.labels.shape[0]

In [15]:
train_dataset = ToyDataset(X_train, y_train)
test_dataset = ToyDataset(X_test, y_test)

In [16]:
len(train_dataset)

5

In [17]:
train_dataset[0]

(tensor([-1.2000,  3.1000]), tensor(0))

In [18]:
len(test_dataset)

2

In [19]:
test_dataset[0]

(tensor([-0.8000,  2.8000]), tensor(0))

#### data loader

In [20]:
from torch.utils.data import DataLoader

In [21]:
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=2,
    shuffle=True
)

In [22]:
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=2,
    shuffle=True
)

we can also use `num_workers` in **DataLoader** to load the next batch of data when the model is training so `CPU` and `GPU` work together to load and train data in parallel.

In [23]:
for i, (x, y) in enumerate(train_dataloader):
    print(f"Batch {i+1}************************************************** \nFeature: {x}\nLabel: {y} with")
    print(f"Batch shape: {x.shape} {y.shape}\n")

Batch 1************************************************** 
Feature: tensor([[-1.2000,  3.1000],
        [ 2.3000, -1.1000]])
Label: tensor([0, 1]) with
Batch shape: torch.Size([2, 2]) torch.Size([2])

Batch 2************************************************** 
Feature: tensor([[-0.9000,  2.9000],
        [-0.5000,  2.6000]])
Label: tensor([0, 0]) with
Batch shape: torch.Size([2, 2]) torch.Size([2])

Batch 3************************************************** 
Feature: tensor([[ 2.7000, -1.5000]])
Label: tensor([1]) with
Batch shape: torch.Size([1, 2]) torch.Size([1])



## Training Loop

In [24]:
import torch.nn.functional as F

# criterion_ = F.cross_entropy(logits, labels) # can also do by this but instantly need to provide logit and prediction.

In [25]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_0.parameters(), lr=0.1)

In [26]:
model = NeuralNetwork(num_inputs=2, hidden_size=4, num_outputs=2)

In [27]:
EPOCHS = 10

In [28]:
F.sigmoid(torch.rand(2, 1))

tensor([[0.5517],
        [0.6691]])

In [29]:
count = 0

for epoch in range(EPOCHS):
    model.train() # training mode
    for i, (X, y) in enumerate(train_dataloader):
        logits = model(X) # (2, 2) (batch size, features no)
        
        error = criterion(logits, y)

        optimizer.zero_grad()
        
        error.backward()
        
        optimizer.step()
        
        count = count + 1
        
        print(f"Epoch {epoch + 1} Batch {i + 1}")
        print(f"Generalization Error: {error}")
        
print(f"Total time updated weight: {count}")

Epoch 1 Batch 1
Generalization Error: 0.4405747056007385
Epoch 1 Batch 2
Generalization Error: 0.8143681287765503
Epoch 1 Batch 3
Generalization Error: 1.177321434020996
Epoch 2 Batch 1
Generalization Error: 0.8143681287765503
Epoch 2 Batch 2
Generalization Error: 0.8084791898727417
Epoch 2 Batch 3
Generalization Error: 0.4415125250816345
Epoch 3 Batch 1
Generalization Error: 0.8206744194030762
Epoch 3 Batch 2
Generalization Error: 0.4342684745788574
Epoch 3 Batch 3
Generalization Error: 1.177321434020996
Epoch 4 Batch 1
Generalization Error: 0.8094170093536377
Epoch 4 Batch 2
Generalization Error: 0.4342684745788574
Epoch 4 Batch 3
Generalization Error: 1.199836254119873
Epoch 5 Batch 1
Generalization Error: 0.8206744194030762
Epoch 5 Batch 2
Generalization Error: 0.4342684745788574
Epoch 5 Batch 3
Generalization Error: 1.177321434020996
Epoch 6 Batch 1
Generalization Error: 0.4405747056007385
Epoch 6 Batch 2
Generalization Error: 0.8031107187271118
Epoch 6 Batch 3
Generalization Erro