# Weights and Bias


## Imports

In [None]:
import torch
import numpy as np
import torchvision
import torch.nn as nn
import matplotlib.pyplot as plt
import torchmetrics 
import wandb
import ipywidgets
import nbformat

from tqdm.auto import tqdm
from  torch.nn import functional as F
from torchvision import transforms
from torch.utils.data import DataLoader
from torchvision.datasets import Flowers102


## CONFIGS

In [2]:
IMG_HEIGTH = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3
BATCH_SIZE = 16
HIDDEN_UNITS = 64
EPOCH = 10

CLASS_NAMES = np.array(Flowers102.classes)
CLASS_LEN = len(CLASS_NAMES)


## MODEL CODE

In [None]:
flower_transform = transforms.Compose([
    transforms.Resize([IMG_HEIGTH, IMG_WIDTH]),
    transforms.ToTensor()
])

train_dataset = Flowers102(
    root="Data/train/",
    download= True, 
    transform= flower_transform, 
    split="train"
)

val_dataset = Flowers102(
    root="Data/eval/",
    download= True, 
    transform= flower_transform, 
    split="val"
)

train_loader = DataLoader(
    dataset= train_dataset, 
    batch_size= BATCH_SIZE , 
    num_workers= 0, 
    shuffle= True
)

val_loader = DataLoader(
    dataset= val_dataset, 
    batch_size= BATCH_SIZE, 
    num_workers= 0, 
    shuffle= True
)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# We will have 2 hidden layer neural netwrok 
class Modelv2(nn.Module):
    def __init__(self, in_features, out_features, hidden_units ,bias=True, device=DEVICE):
        super(Modelv2, self).__init__()
        self.flaten = nn.Flatten()
        self.layer_1 = nn.Linear(
            in_features=in_features,
            out_features= hidden_units, 
            bias= bias,
            device= device
        )
        self.layer_2 = nn.Linear(
            in_features=hidden_units,
            out_features= out_features, 
            bias= bias,
            device= device
        )
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        """
        The below input is for only a single item from a batch
        x = self.flaten(x)      Input: (224 ,224 ,3) -> Output: (224 * 224 * 3) 
        x = self.layer_1(x)     Input: (224 * 224 * 3) -> Output: hidden_units
        x = self.relu(x)        Input: hidden_units -> Output: hidden_units
        x = self.layer_2(x)     Input: hidden_units -> Output: 102 
        """
        x = self.flaten(x)
        x = self.layer_1(x)
        x = self.relu(x)
        x = self.layer_2(x)
        return x 


model = Modelv2(
    in_features= IMG_CHANNELS * IMG_HEIGTH * IMG_WIDTH,
    out_features= len(CLASS_NAMES), 
    hidden_units= HIDDEN_UNITS
)

loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(params=model.parameters(), lr=0.5)
accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=CLASS_LEN)


In [None]:
def train(): 
    for epoch in tqdm(range(EPOCH)):
        model.train()
        loss_per_batch = []
        print(f"Epoch: {epoch}/{EPOCH}")
        for img, label in train_loader:
            # load the data in DEVICE avliable
            img, label = img.to(DEVICE), label.to(DEVICE)
            
            logits = model(img)
            # print(torch.exp(logits)[2])
            # print(torch.exp(logits).sum(dim=1).unsqueeze(1))
            # print(torch.exp(logits).shape)
            # probs = torch.exp(logits) / torch.exp(logits).sum(dim=1).unsqueeze(1)
            # print(probs[0])
            # print(torch.softmax(logits, dim=1)[0])
            loss = loss_fn(logits, label)
            loss_per_batch.append(loss)
            optim.zero_grad()
            loss.backward()
            optim.step()
        avg_loss = sum(loss_per_batch)/len(loss_per_batch)
        print(f"Average loss per batch: {avg_loss}")
        


## WANDB

### HyperParameters

* Epochs
* Learning Rate
* Batch size
* Nodes in hidden Layers
* Img h/w

Weights and Biases (W & B) is a powerfull tool for: 
* Tracking experiments
* Comparing model runs
* hyperparameter sweeps 
* logging metrics, graidents, models and more 

It integrates easily with popular frameworks like Pytorch, Tnesorflow, Keras, HuggingFace, Scikit-learn

**In pseudocode, what we'll do is:**

```python
# Import the library
import wandb

# start a new experiment 
wandb.init(project="my-project")

# capture a dictionary of hyperparameters with config
wandb.config = {"learning_rate": 0.001, "epochs": 100, "batch_size": 128}

# set up model and data
model, dataloader = get_model(), get_data()

# track gradients
wandb.watch(model)

for batch in dataloader:
    metrics = model.training_step()
    wandb.log(metrics)

# save model at the end
model.to_onnx()
wandb.save("model.onnx")
```


## Data loading and Model

In [13]:
PROJECT_NAME = "flower102"

wandb.init(
    project=PROJECT_NAME
)


In [14]:

flower_transform = transforms.Compose([
    transforms.Resize([IMG_HEIGTH, IMG_WIDTH]),
    transforms.ToTensor()
])

def get_data(train=True, subset=False, slice=5):
    
    path = "Data/train/" if train else "Data/val/"
    split = "train" if train else "val"
    
    full_dataset = Flowers102(
        root=path,
        download= True, 
        transform= flower_transform, 
        split= split
    )
    if subset:
        sub_dataset = torch.utils.data.Subset(
            dataset=full_dataset, 
            indices=range(0, len(full_dataset), slice)
        )
    return sub_dataset if subset else full_dataset
    

def make_loader(dataset, batch_size):
    loader = DataLoader(
        dataset= dataset,
        batch_size= batch_size,
        shuffle= True, 
        num_workers= 2, 
        pin_memory= True
    )
    return loader


In [15]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [16]:
# We will have 2 hidden layer neural netwrok 
class Modelv2(nn.Module):
    def __init__(self, in_features, out_features, hidden_units ,bias=True, device=DEVICE):
        super(Modelv2, self).__init__()
        self.flaten = nn.Flatten()
        self.layer_1 = nn.Linear(
            in_features=in_features,
            out_features= hidden_units, 
            bias= bias,
            device= device
        )
        self.layer_2 = nn.Linear(
            in_features=hidden_units,
            out_features= out_features, 
            bias= bias,
            device= device
        )
        
        self.relu = nn.ReLU()
    
    def forward(self, x):
        """
        The below input is for only a single item from a batch
        x = self.flaten(x)      Input: (224 ,224 ,3) -> Output: (224 * 224 * 3) 
        x = self.layer_1(x)     Input: (224 * 224 * 3) -> Output: hidden_units
        x = self.relu(x)        Input: hidden_units -> Output: hidden_units
        x = self.layer_2(x)     Input: hidden_units -> Output: 102 
        """
        x = self.flaten(x)
        x = self.layer_1(x)
        x = self.relu(x)
        x = self.layer_2(x)
        return x 


## Train

In [23]:


def train_log(loss, example_ct, epoch):
    wandb.log({"epoch": epoch, "loss": loss, "example_ct": example_ct})
    print(f"Loss after {str(example_ct).zfill(5)} examples: {loss:.3f}")


In [24]:
def train(model, loader, criterion, optimizer, config):
    wandb.watch(
        models= model,
        criterion= criterion,
        log='all',
        log_freq= 10
    )
    
    total_batches = len(loader) * config.epochs
    example_ct = 0
    batch_ct = 0
    model.train()
    for epoch in tqdm(range(config.epochs)):
        for _ , (images, labels) in enumerate(loader):
            loss = train_batch(
                x = images,
                y = labels,
                model = model,
                loss = criterion,
                optimizer = optimizer 
            )
            example_ct += len(images)
            batch_ct += 1
            
            if ((batch_ct + 1) % 25) == 0:
                train_log(loss, example_ct, epoch)
            

def train_batch(x, y, model, optimizer, loss):
    x, y = x.to(DEVICE), y.to(DEVICE)
    
    outputs = model(x)
    loss = loss(outputs, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return loss


## Model Pipeline

In [25]:
def make(config):
    # Make the data
    train, val = get_data(train=True), get_data(train=False)
    train_loader = make_loader(train,batch_size = config.batch_size)
    val_loader = make_loader(val, batch_size = config.batch_size)
    
    # Make the model 
    model = Modelv2(
        in_features= config.in_features,
        out_features= config.out_features, 
        hidden_units= config.hidden_units,
        device= DEVICE
    )
    
    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        params= model.parameters(),
        lr = config.learning_rate
    )
    
    return model, train_loader, val_loader, criterion, optimizer


In [26]:
def model_pipeline(hyperparameters):
    with wandb.init(project=PROJECT_NAME, config=hyperparameters):
        config = wandb.config
        
        model, train_loader, val_loader, criterion, optimizer = make(config)
        print(model)
        
        train(model, train_loader, criterion, optimizer, config)
        
        # val(model, val_loader)
        
        return model
    


## RUN

In [None]:
CLASS_NAMES = np.array(Flowers102.classes)
config = dict(
    epochs = 20,
    in_features = 3 * 224 * 224,
    hidden_units = 64,
    out_features = len(CLASS_NAMES),
    learning_rate = 0.1,
    batch_size = 16
)


In [33]:
model = model_pipeline(config)


Modelv2(
  (flaten): Flatten(start_dim=1, end_dim=-1)
  (layer_1): Linear(in_features=150528, out_features=64, bias=True)
  (layer_2): Linear(in_features=64, out_features=102, bias=True)
  (relu): ReLU()
)


  0%|          | 0/20 [00:00<?, ?it/s]

Loss after 00384 examples: 4.743
Loss after 00784 examples: 4.817
Loss after 01180 examples: 4.708
Loss after 01580 examples: 4.691
Loss after 01980 examples: 4.805
Loss after 02376 examples: 4.561
Loss after 02776 examples: 4.678
Loss after 03172 examples: 4.631
Loss after 03572 examples: 4.772
Loss after 03972 examples: 4.758
Loss after 04368 examples: 4.700
Loss after 04768 examples: 4.770
Loss after 05164 examples: 4.699
Loss after 05564 examples: 4.668
Loss after 05964 examples: 4.789
Loss after 06360 examples: 4.590
Loss after 06760 examples: 4.730
Loss after 07156 examples: 4.575
Loss after 07556 examples: 4.684
Loss after 07956 examples: 4.659
Loss after 08352 examples: 4.698
Loss after 08752 examples: 4.657
Loss after 09152 examples: 4.736
Loss after 09548 examples: 4.659
Loss after 09948 examples: 4.728
Loss after 10344 examples: 4.637
Loss after 10744 examples: 4.675
Loss after 11144 examples: 4.724
Loss after 11540 examples: 4.759
Loss after 11940 examples: 4.774
Loss after

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇████
example_ct,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
loss,▅▇▄▄▇▄▂▆▆▄▄▃▆▁▁▃▄▃▅▃▃▄▆▃▆▆▅▄▄▆▆█▂▅▃▄█▄▂▄

0,1
epoch,19.0
example_ct,20308.0
loss,4.68267
