# Import packages

In [1]:
!pip install torcheval onnx wandb kaggle --quiet

In [2]:
import os
import json
import numpy as np
import pandas as pd
import torch
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split, WeightedRandomSampler
import torch.optim as optim
from torcheval.metrics.functional import multiclass_f1_score
import wandb

# Download data if not found
### *You should upload kaggle.json to the current working dir before running this cell*

In [3]:
if not os.path.exists("Data"):
    !mkdir -p ~/.kaggle
    !mv kaggle.json ~/.kaggle/
    !ls ~/.kaggle
    !chmod 600 ~/.kaggle/kaggle.json  # set permission
    !kaggle competitions download -c cassava-leaf-disease-classification -p ./
    !mkdir Data
    !unzip -q -n cassava-leaf-disease-classification.zip -d ./Data
    !rm cassava-leaf-disease-classification.zip

# Wandb Setup
### *Change the env below to be your notebook path, also follow the prompt to enter your wandb token*

In [4]:
os.environ['WANDB_NOTEBOOK_NAME'] = "vanilla CNN.ipynb"
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mwyq[0m ([33mcassava[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# *Define Customized Dataset*

In [5]:
class LeafDataset(Dataset):
    def __init__(self, root_path, transform=None):
        self.image_path = root_path + '/train_images'
        self.labels = pd.read_csv(root_path + '/train.csv')
        self.transform = transform

    def __len__(self):
        return self.labels.shape[0]

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.image_path, self.labels['image_id'][idx])
        image = Image.open(img_name)
        if self.transform:
            image = self.transform(image)
        if "label" in self.labels.columns:
            label = self.labels['label'][idx]
            sample = (image, label)
        else:
            sample = (image)
        return sample

# Define Torch Module
### *Change this to your own model*

In [6]:
class CNN(nn.Module):
  def __init__(self):
      super().__init__()
      self.conv_layers = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=10, kernel_size=5, stride=1, padding='same'),
        nn.ReLU(),
        nn.Conv2d(in_channels=10, out_channels=10, kernel_size=5, stride=1, padding='same'),
        nn.ReLU(),
        nn.Conv2d(in_channels=10, out_channels=10, kernel_size=5, stride=1, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        
        nn.Conv2d(in_channels=10, out_channels=20, kernel_size=3, stride=1, padding='same'),
        nn.ReLU(),
        nn.Conv2d(in_channels=20, out_channels=20, kernel_size=3, stride=1, padding='same'),
        nn.ReLU(),
        nn.Conv2d(in_channels=20, out_channels=50, kernel_size=3, stride=1, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),

        nn.Conv2d(in_channels=50, out_channels=50, kernel_size=3, stride=1, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
          
        nn.Conv2d(in_channels=50, out_channels=100, kernel_size=3, stride=1, padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
      )
      self.fc_layers = nn.Sequential(
                nn.Flatten(),
                nn.Linear(100 * 8 * 8, 1000),
                nn.ReLU(),
                nn.Linear(1000, 500),
                nn.ReLU(),
                nn.Linear(500, 100),
                nn.ReLU(),
                nn.Linear(100, 5),
      )

  def forward(self, x):
      x = self.conv_layers(x)
      x = self.fc_layers(x)
      return x

# Define HyperParameters
### *Change this according to your run, add config if needed*

In [7]:
config = dict(
    split=[0.4, 0.2, 0.4],  # train / val / test split ratio
    batch_size=128,
    num_workers=2,  # number of workers per dataloader
    balance=True,  # weight balance train dataset
    epochs=50,
    learning_rate=0.001,
    weight_decay=0.01,  # L2 regularization hyperparamter for Adam Optimizer
    resize=(128, 128),
)

# Define Run Segments

## The Whole Pipeline

In [8]:
def model_pipeline(hyperparameters):
    # tell wandb to get started
    with wandb.init(project="cassava-leaf", config=hyperparameters):
        # access all HPs through wandb.config, so logging matches execution!
        config = wandb.config

        # make the model, data, and optimization problem
        model, train_loader, eval_loader, test_loader, criterion, optimizer = make(config)
        print(model)

        # and use them to train the model
        train(model, train_loader, eval_loader, criterion, optimizer, config)

        # and test its final performance
        test(model, test_loader)

    return model

## 1. Make (Model, loaders...)

### Helper functions for Make

In [9]:
def get_datasets(split, transform):
    total_dataset = LeafDataset('./Data', transform=transform)
    subsets = random_split(total_dataset,
                           split,
                           generator=torch.Generator().manual_seed(42))
    return subsets


def make_loaders(datasets, batch_size, num_workers, balance=False):
    if balance:
        # compute class weights:
        class_weights = [0] * 5
        for _, label in datasets[0]:
            class_weights[label] += 1
        class_weights = [10000 / i for i in class_weights]

        # compute sample weights
        sample_weights = [0] * len(datasets[0])
        for idx, (data, label) in enumerate(datasets[0]):
            sample_weights[idx] = class_weights[label]
        # init weighted sampler
        sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

        train_dataloader = DataLoader(datasets[0], batch_size=batch_size, num_workers=num_workers, sampler=sampler)
    else:
        train_dataloader = DataLoader(datasets[0], batch_size=batch_size, shuffle=True, num_workers=num_workers)
    eval_dataloader = DataLoader(datasets[1], batch_size=batch_size, shuffle=True, num_workers=num_workers)
    test_dataloader = DataLoader(datasets[2], batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return train_dataloader, eval_dataloader, test_dataloader

### Make main function (Adjust Transforms / Model Declaration Here)

In [10]:
def make(config):
    transform = transforms.Compose([
        transforms.Resize(config.resize),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
    ])

    # Make the data
    datasets = get_datasets(config.split, transform)
    train_loader, eval_loader, test_loader = make_loaders(datasets,
                                                          batch_size=config.batch_size,
                                                          num_workers=config.num_workers,
                                                          balance=config.balance)

    # Make the model
    model = CNN()
    model.cuda()

    # Make the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(),
                            lr=config.learning_rate,
                            weight_decay=config.weight_decay)
    return model, train_loader, eval_loader, test_loader, criterion, optimizer

## 2. Train

### Helper functions for Train

In [11]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.
    correct, total = 0, 0
    for _, (images, labels) in enumerate(train_loader):
        # move data
        images = images.cuda()
        labels = labels.cuda()

        # forward prop
        output = model(images)
        loss = criterion(output, labels)

        # backward prop
        optimizer.zero_grad()
        loss.backward()

        # update weights
        optimizer.step()

        # record performance
        running_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    train_loss = running_loss / len(train_loader)
    train_acc = correct / total
    return train_loss, train_acc


def eval_epoch(model, eval_loader, criterion):
    model.eval()
    running_loss = 0.
    correct, total = 0, 0
    for _, (images, labels) in enumerate(eval_loader):
        # move data
        images = images.cuda()
        labels = labels.cuda()

        # forward prop
        output = model(images)
        loss = criterion(output, labels)

        # record performance
        running_loss += loss.item()
        _, predicted = torch.max(output, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    eval_loss = running_loss / len(eval_loader)
    eval_acc = correct / total
    return eval_loss, eval_acc

### Train main function

In [12]:
def train(model, train_loader, eval_loader, criterion, optimizer, config):
    wandb.watch(model, criterion, log="all", log_freq=10)
    for epoch in range(config.epochs):

        # train model
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
        # evaluate model
        eval_loss, eval_acc = eval_epoch(model, eval_loader, criterion)

        # log to wandb
        wandb.log({"train_loss": train_loss,
                   "train_acc": train_acc,
                   "eval_loss": eval_loss,
                   "eval_acc": eval_acc}, step=epoch)
        print(f"Epoch {epoch}: Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.3f}, Eval Loss: {eval_loss:.3f}, Eval Acc: {eval_acc:.3f}")

## 3. Test

In [13]:
def test(model, test_loader):
    model.eval()

    # Run the model on some test examples
    with torch.no_grad():
        correct, total = 0, 0
        label_total = torch.tensor([])
        pred_total = torch.tensor([])
        for images, labels in test_loader:
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            # compute correct / total samples
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # concate labels
            label_total = torch.cat((label_total, labels.detach().cpu()), dim=0)
            pred_total = torch.cat((pred_total, predicted.detach().cpu()), dim=0)
        # compute accuracy
        acc = correct / total
        wandb.log({"test_accuracy": acc})
        # compute f1 scores
        pred_total = pred_total.to(torch.int64)
        label_total = label_total.to(torch.int64)
        f1_micro = multiclass_f1_score(pred_total,
                                       label_total,
                                       num_classes=5,
                                       average="micro")
        f1_macro = multiclass_f1_score(pred_total,
                                       label_total,
                                       num_classes=5,
                                       average="macro")
        f1_each = multiclass_f1_score(pred_total,
                                      label_total,
                                      num_classes=5,
                                      average=None)
        print(f"Test Acc: {acc:.3f}, F1 micro: {f1_micro:.3f}, F1 macro: {f1_macro:.3f}, F1 each: {f1_each}")
        f1 = [[f"f1_class_{idx}", value] for idx, value in enumerate(f1_each)]
        f1.append(["f1_micro", f1_micro])
        f1.append(["f1_macro", f1_macro])
        table = wandb.Table(data=f1, columns=["class", "f1_score"])
        wandb.log({"my_bar_chart_1": wandb.plot.bar(table, "class", "f1_score", title="F1 Score")})

        torch.save(model.state_dict(), "model_state.pth")
        wandb.save("model_state.pth")
#         torch.onnx.export(model, images, "model.onnx")
#         wandb.save("model.onnx")

# Run Pipeline

In [14]:
model = model_pipeline(config)

CNN(
  (conv_layers): Sequential(
    (0): Conv2d(3, 10, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (1): ReLU()
    (2): Conv2d(10, 10, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (3): ReLU()
    (4): Conv2d(10, 10, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (8): ReLU()
    (9): Conv2d(20, 20, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (10): ReLU()
    (11): Conv2d(20, 50, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (12): ReLU()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(50, 50, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (15): ReLU()
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(50, 100, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (18): 

0,1
eval_acc,█▅▇▆▁▂▄▆▇▇▇▇▇▇▇██▇██▇▇▇▇██▇█▇█▇▇▇██▇█▇█▇
eval_loss,▁▁▁▁▁▂▂▂▃▄▅▅▅▅▄▅▆▆▆█▆▅▅▇█▆▅▆▆▇█▅▆▅▅▆▅▆▅▄
test_accuracy,▁
train_acc,▁▂▂▂▃▅▆▆▇███████████████████████████████
train_loss,██▇▇▆▅▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
eval_acc,0.57757
eval_loss,3.15157
test_accuracy,0.56532
train_acc,0.9868
train_loss,0.03992
