# Building 2 hidden-layer nerual network

## Imports

In [29]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
import matplotlib.pyplot as plt
import torchmetrics 
import wandb

from tqdm import tqdm
from  torch.nn import functional as F
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import Flowers102


## CONFIGS

In [None]:
Project_name = ""


In [31]:
sweep_config = {
    'method': 'grid',  # or 'random', 'bayes'
    'metric': {
        'name': 'val_accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'epochs': {'values': [5, 10]},
        'batch_size': {'values': [8, 16]},
        'img_height': {'values': [16, 32]},
        'img_width': {'values': [16, 32]},
        'learning_rate': {'values': [0.001, 0.0001]},
        'hidden_units': {'values': [64, 128]}
    }
}



In [None]:
wandb.init(project=Project_name)
config = wandb.config


In [None]:

IMG_HEIGTH = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3
BATCH_SIZE = 16
HIDDEN_UNITS = 64
EPOCH = 10


In [34]:
CLASS_NAMES = np.array(Flowers102.classes)
CLASS_LEN = len(CLASS_NAMES)


## Data

In [35]:
flower_transform = transforms.Compose([
    transforms.Resize([IMG_HEIGTH, IMG_WIDTH]),
    transforms.ToTensor()
])


In [36]:
train_dataset = Flowers102(
    root="Data/train/",
    download= True, 
    transform= flower_transform, 
    split="train"
)

val_dataset = Flowers102(
    root="Data/eval/",
    download= True, 
    transform= flower_transform, 
    split="val"
)


In [37]:
train_loader = DataLoader(
    dataset= train_dataset, 
    batch_size= BATCH_SIZE , 
    num_workers= 0, 
    shuffle= True
)

val_loader = DataLoader(
    dataset= val_dataset, 
    batch_size= BATCH_SIZE, 
    num_workers= 0, 
    shuffle= True
)


## Model

In [38]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [39]:
# We will have 2 hidden layer neural netwrok 
class Modelv2(nn.Module):
    def __init__(self, in_features, out_features, hidden_units ,bias=True, device=DEVICE):
        super(Modelv2, self).__init__()
        self.flaten = nn.Flatten()
        self.layer_1 = nn.Linear(
            in_features=in_features,
            out_features= hidden_units, 
            bias= bias,
            device= device
        )
        self.layer_2 = nn.Linear(
            in_features=hidden_units,
            out_features= out_features, 
            bias= bias,
            device= device
        )
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        """
        The below input is for only a single item from a batch
        x = self.flaten(x)      Input: (224 ,224 ,3) -> Output: (224 * 224 * 3) 
        x = self.layer_1(x)     Input: (224 * 224 * 3) -> Output: hidden_units
        x = self.relu(x)        Input: hidden_units -> Output: hidden_units
        x = self.layer_2(x)     Input: hidden_units -> Output: 102 
        """
        x = self.flaten(x)
        x = self.layer_1(x)
        x = self.relu(x)
        x = self.layer_2(x)
        return x 


In [None]:
model = Modelv2(
    in_features= IMG_CHANNELS * IMG_HEIGTH * IMG_WIDTH,
    out_features= len(CLASS_NAMES), 
    hidden_units= HIDDEN_UNITS
)
model


Modelv2(
  (flaten): Flatten(start_dim=1, end_dim=-1)
  (layer_1): Linear(in_features=150528, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=102, bias=True)
  (relu): ReLU()
)

In [48]:
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(params=model.parameters(), lr=0.5)
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=CLASS_LEN)


In [75]:
for epoch in tqdm(range(EPOCH)):
    model.train()
    loss_per_batch = []
    print(f"Epoch: {epoch}/{EPOCH}")
    for img, label in train_loader:
        # load the data in DEVICE avliable
        img, label = img.to(DEVICE), label.to(DEVICE)
        
        logits = model(img)
        # print(torch.exp(logits)[2])
        # print(torch.exp(logits).sum(dim=1).unsqueeze(1))
        # print(torch.exp(logits).shape)
        # probs = torch.exp(logits) / torch.exp(logits).sum(dim=1).unsqueeze(1)
        # print(probs[0])
        # print(torch.softmax(logits, dim=1)[0])
        loss = loss_fn(logits, label)
        loss_per_batch.append(loss)
        optim.zero_grad()
        loss.backward()
        optim.step()
    avg_loss = sum(loss_per_batch)/len(loss_per_batch)
    print(f"Average loss per batch: {avg_loss}")
        


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 0/10


 10%|█         | 1/10 [00:04<00:39,  4.36s/it]

Average loss per batch: 4.95280122756958
Epoch: 1/10


 20%|██        | 2/10 [00:08<00:34,  4.36s/it]

Average loss per batch: 4.977730751037598
Epoch: 2/10


 30%|███       | 3/10 [00:13<00:31,  4.55s/it]

Average loss per batch: 4.966175556182861
Epoch: 3/10


 40%|████      | 4/10 [00:17<00:27,  4.50s/it]

Average loss per batch: 4.95829439163208
Epoch: 4/10


 50%|█████     | 5/10 [00:22<00:22,  4.43s/it]

Average loss per batch: 4.9807891845703125
Epoch: 5/10


 60%|██████    | 6/10 [00:26<00:17,  4.42s/it]

Average loss per batch: 4.968995094299316
Epoch: 6/10


 70%|███████   | 7/10 [00:30<00:13,  4.39s/it]

Average loss per batch: 4.951077461242676
Epoch: 7/10


 80%|████████  | 8/10 [00:35<00:08,  4.38s/it]

Average loss per batch: 4.943302154541016
Epoch: 8/10


 90%|█████████ | 9/10 [00:39<00:04,  4.38s/it]

Average loss per batch: 4.953536510467529
Epoch: 9/10


100%|██████████| 10/10 [00:44<00:00,  4.41s/it]

Average loss per batch: 4.977957725524902





we are using cross entropy loss. The cross entropy loss helps us in imporving the probability assigned to the correct label 
in pytorch cross entropy use softmax to convert the logits to probabilities 

Softmax : e**logits_at_i/ sum(e**logits)

In [None]:
# bottom shape = [16] -> this should broadcast to 16, 102
# uper shape = [16, 102]

# for sum operation 
# input tensor -> 16, 102
# if we do a sum along the dim = 1 then all the values which are 102 in every row will be added 
