# Deep Learning
<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/marcinsawinski/UEP_KIE_DL_CODE2024/blob/main/dl05_cnn.ipynb" target="_parent">
      <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab"/>
    </a>
  </td>
  <td>
    <a target="_blank" href="https://kaggle.com/kernels/welcome?src=https://github.com/marcinsawinski/UEP_KIE_DL_CODE2024/blob/main/dl05_cnn.ipynb">
      <img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open in Kaggle"/>
    </a>
  </td>
  <td>
    <a target="_blank" href="https://studiolab.sagemaker.aws/import/github/marcinsawinski/UEP_KIE_DL_CODE2024/blob/main/dl05_cnn.ipynb">
      <img src="https://studiolab.sagemaker.aws/studiolab.svg" alt="Open in SageMaker Studio Lab"/>
    </a>
  </td>
</table>

## Preparation - Setup [WandB](https://wandb.ai) and [PyTorch Lightning](https://lightning.ai)

In [None]:
# %pip install  wandb -qU

In [None]:
# %pip install lightning

# Tasks
Check tutorial:
https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html


1. Install and connect to [WandB](https://wandb.ai)
2. Build CNN model for classification on Fashion MNIST dataset. Log training to WandB in own project.
3. Create hyperparameter search with WandB sweep.
4. Try to learn best CNN for CIFAR-10
5. Use pretrained CNNs (like ResNet, VGG, MobileNet). They are trained on large datasets like ImageNet (millions of images).
 - They already know how to detect basic things: edges, textures, shapes, etc.
 - You can fine-tune them (adjust weights a bit) or just reuse them as feature extractors.
 - https://pytorch.org/hub/pytorch_vision_resnet/
 - https://pytorch.org/hub/pytorch_vision_vgg/



# Setup model and datasets

In [None]:
import torch
import torch.nn as nn
# import torch.nn.functional as F
import lightning.pytorch as pl
import torchmetrics
from typing import Any, Dict

class CnnClassifier(pl.LightningModule):
    def __init__(self, cfg: Dict[str, Any]):
        super().__init__()
        # self.save_hyperparameters(cfg)
        self.cfg = cfg
        self.model = self.build_model()
        self.loss_fn = nn.CrossEntropyLoss()

        # Metrics
        self.train_metrics = self.create_metrics(prefix="train/")
        self.valid_metrics = self.create_metrics(prefix="valid/")
        self.test_metrics = self.create_metrics(prefix="test/")

    def build_model(self):
        dropout = self.cfg.get('dropout', 0.0)
        num_classes = self.cfg['num_classes']
        activation_cls = getattr(nn, self.cfg.get('activation', 'ReLU'))
        
        layers = [
        ]
        return nn.Sequential(*layers)

    def create_metrics(self, prefix: str) -> torchmetrics.MetricCollection:
        num_classes = self.cfg['num_classes']
        return torchmetrics.MetricCollection({
            "accuracy": torchmetrics.classification.Accuracy(task="multiclass", num_classes=num_classes),
            "f1": torchmetrics.classification.F1Score(task="multiclass", num_classes=num_classes),
        }, prefix=prefix)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

    def step(self, batch: Any, metrics: torchmetrics.MetricCollection, step_type: str) -> torch.Tensor:
        x, y = batch
        logits = self(x)
        loss = self.loss_fn(logits, y)
        metrics.update(logits, y)
        self.log(f"{step_type}/loss", loss, on_step=(step_type == "train"), 
         on_epoch=(step_type != "train"), prog_bar=True,)
        return loss

    def training_step(self, batch: Any, batch_idx: int) -> torch.Tensor:
        loss = self.step(batch, self.train_metrics, "train")
        return loss

    def validation_step(self, batch: Any, batch_idx: int) -> None:
        self.step(batch, self.valid_metrics, "valid")

    def test_step(self, batch: Any, batch_idx: int) -> None:
        self.step(batch, self.test_metrics, "test")

    def on_train_epoch_end(self) -> None:
        self.log_dict(self.train_metrics.compute(), prog_bar=True)
        self.train_metrics.reset()

    def on_validation_epoch_end(self) -> None:
        self.log_dict(self.valid_metrics.compute(), prog_bar=True)
        self.valid_metrics.reset()

    def on_test_epoch_end(self) -> None:
        self.log_dict(self.test_metrics.compute(), prog_bar=True)
        self.test_metrics.reset()

    def configure_optimizers(self) -> torch.optim.Optimizer:
        return torch.optim.Adam(self.parameters(), lr=self.cfg['learning_rate'])

In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define your transforms
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)),  # Example normalization
    ]
)

# Download datasets
train_dataset = datasets.FashionMNIST(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = datasets.FashionMNIST(
    root="./data", train=False, download=True, transform=transform
)

# Split train_dataset into train and valid
train_size = int(0.8 * len(train_dataset))  # 80% for training
valid_size = len(train_dataset) - train_size  # 20% for validation

train_dataset, valid_dataset = torch.utils.data.random_split(
    train_dataset,
    [train_size, valid_size],
    generator=torch.Generator().manual_seed(42),  # For reproducibility
)

# Now you have:
# - train_dataset (80% of original train)
# - valid_dataset (20% of original train)
# - test_dataset (the original test set)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [None]:
import time
import wandb
from lightning.pytorch.loggers import WandbLogger
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks.early_stopping import EarlyStopping


def train(config=None, project = None):
    with wandb.init(config=config,project = project):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        cfg = wandb.config
        name = f"{cfg['architecture']}_lr{cfg['learning_rate']:.1e}_ep{cfg['epochs']}_bs{cfg['batch_size']}_bn{cfg['batch_norm']}_act{cfg['activation']}_do{cfg['dropout']}_hs{'-'.join([str(x) for x in cfg['hidden_sizes']])}_{time.strftime('%m%d-%H%M')}"
        wandb.run.name = name
        # Create model
        model = CnnClassifier(cfg)
        wandb_logger = WandbLogger()
        wandb_logger.watch(model, log="all")


        # Create trainer
        trainer = Trainer(
            logger=wandb_logger,
            max_epochs=cfg['epochs'],
            accelerator="auto",
            devices=1,
            callbacks=[EarlyStopping(monitor="valid/accuracy", mode="max", min_delta=0.00, patience=3)]
        )

        # Fit model
        trainer.fit(model, train_loader, valid_loader)

        # (optional) Test
        trainer.test(model, test_loader)

# Single run

In [None]:
cfg = {
    "owner": "kowalski_jan",
    "learning_rate": 1e-3,
    "architecture": "CNN",
    "activation":'ReLU',
    "batch_norm":True,
    "dropout":0.3,
    "dataset": "FMNIST",
    "epochs": 10,
    "batch_size": 32,
    "num_classes":10,
    "input_size":28*28,
    "hidden_sizes": [512, 256, 128],
    
}
user = "kowalski_jan" # your name here 
project = f"student_{user}_FMNIST_LIGHTNING"
train(config=cfg, project=project)

# Sweep

In [None]:
from pprint import pprint as pp

sweep_config = {"method": "random"}
metric = {"name": "valid/f1", "goal": "minimize"}
sweep_config["metric"] = metric

parameters_dict = {
    "owner": {"value": "kowalski_jan"},
    "epochs": {"value": 25},
    "architecture": {"value": "CNN"},
    "input_size":{"value": 28*28},
    "hidden_sizes": {"value": [512, 256, 128]},
    "dataset": {"value": 'FMNIST'},
    "num_classes": {"value": 10},
    "dropout": {"values": [0.0, 0.3, 0.5]},
    "batch_norm":{"values":[True, False]},
    "activation":{"values":['Sigmoid', 'ReLU']},
}

sweep_config["parameters"] = parameters_dict

parameters_dict.update({})

parameters_dict.update(
    {
        "learning_rate": {
            "distribution": "uniform",
            "min": 1e-3,
            "max": 1e-2,
        },
        "batch_size": {
            # integers between 32 and 256
            # with evenly-distributed logarithms
            "distribution": "q_log_uniform_values",
            "q": 8,
            "min": 16,
            "max": 64,
        },
    }
)

pp(sweep_config)

In [None]:
project = f"student_{sweep_config['parameters']['owner']['value']}_FMNIST_LIGHTNING"
sweep_id = wandb.sweep(sweep_config, project=project)

In [None]:
wandb.agent(sweep_id, train, count=10)