In [None]:
# 1. import everything you need (e.g., model, dataset, etc.)
# 2. Use Random Search to find the best hyperparams(e.g., batch_size, augmentation, optimizer, loss_fn, etc.)
# 3. Report the best train configuration
# 4. Save the best values (for hyperparams)
# 5. Use the found values in "train.ipynb" to train the model with them

# Code Cells for GoogleColab
> This way we can execute our code from github without any hassles, just:
>> 1. add all the packages needed (that is not in Colab) in "requirements.txt"
>> 2. The github repo must be public, if the Colab account (e.g., <golab_pro_owner>@gmail.com) has not been granted access to that repo.

In [None]:
# !git clone https://github.com/tekboart/semantic-segmentaion-pytorch

In [None]:
# %pwd

In [None]:
# %cd semantic-segmentaion-pytorch/

In [None]:
# !pip install -r requirements.txt

In [None]:
# reload modules
from importlib import reload

# load pretrained segmentation models (written in pytorch)
import segmentation_models_pytorch as smp
from segmentation_models_pytorch.encoders import get_preprocessing_fn

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from torch.utils.data import DataLoader

# torchvision
from torchviz import make_dot
import torchvision.transforms.v2 as TF

# torchmetrics
from torchmetrics.classification import Dice, BinaryJaccardIndex

# Ray Tune
import ray
from ray import tune
from ray.air import Checkpoint, session
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.optuna import OptunaSearch
from ray.air.config import ScalingConfig

# Serialize/Deserialize Json files
import json

# Data Augmentation
import albumentations as A
import albumentations.augmentations.functional as F
from albumentations.pytorch import ToTensorV2

# get data/time with desired format
from datetime import datetime

time_format = "%Y.%m.%d@%H-%M-%S"

# work with images
import cv2
from PIL import Image

# slice Iterables and turn to GEN
from itertools import islice

# keep numpy use to a min
# as we store our torch.Tensors to GPU Vram but numpy in RAM (it only supports CPU)
import pandas as pd
import numpy as np
import random

# to have a progress bar
from tqdm import tqdm

# To use pretrained segmentation models (implement in PyTorch)

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
from pprint import pprint
# uncomment if wan't to globally aloow sns to handle plot style
# it adds unwanted style (i.e., grid) to .imshow()
# so better to use it a context manager to style only what I want
# >>> with sns.axes_style('darkgrid'):
# >>>     plt.imshow(...)
# sns.set_theme(
#     context="notebook",
#     style="darkgrid",
#     palette="deep",
#     font="sans-serif",
#     font_scale=1,
#     color_codes=True,
#     rc={'axes.grid': False},
# )

# OS/File/Path management
import sys
import os

# Misc
from functools import partial

# load my custom Classes/Functions/etc.
from utils.training import fit_fn
from utils.dataset import get_loaders, SegmentaionDataset
from utils.models.unet import UnetScratch
from utils.visualization import (
    image_mask_plot,
    ImageAntiStandardize,
    plot_metrics
)
from utils.metrics import (
    AccuracyBinarySegment,
    DiceBinarySegment,
    JaccardBinarySegment,
    PrecisionBinarySegment,
    RecallBinarySegment,
    F1BinarySegment,
    DiceBCELoss,
)

## Remove sources of non-determinism

In [None]:
# use to seed the RNG for all devices (both CPU and CUDA).
torch.manual_seed(0)
# for custom operators, you might need to set python seed as well:
random.seed(0)
# If you or any of the libraries you are using rely on NumPy, you can seed the global NumPy RNG with:
np.random.seed(0)

# configure PyTorch to use deterministic algorithms instead of nondeterministic ones 
# A CAVEAT: throws an error if an operation is known to be nondeterministic (and without a deterministic alternative).
# that is why we used warn_only to avoid raising Error
torch.use_deterministic_algorithms(True, warn_only=True)
# CUDA convolution determinism
torch.backends.cudnn.deterministic = True

# -- DataLoader
# make it deterministic but allow it to random data order
def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

DATA_LOADER_GEN = torch.Generator()
DATA_LOADER_GEN.manual_seed(0)

# Define the Search Hyperparameters Space
> that is, the hyperparameters and their correspondign values to search for.

In [None]:
# Create the Config dict
config = {
    'lr': tune.loguniform(1e-4, 1e-3),
    'scheduler_factor': tune.choice([.5, .3, .1]),
    'batch_size': tune.choice([4, 8, 16, 32]),
    'image_size': tune.choice([352, 256, 160]),  # sizes must be divisible by 32 
    'pretrained_arch': tune.choice([smp.UnetPlusPlus, smp.FPN, smp.DeepLabV3Plus]),
    #TODO: Maybe remove ResNet152 as it's both (1) comp expensive and (2) can overfit very easily (considering its #params)
    'pretrained_encoder': tune.choice(['mobilenet_v2', 'timm-mobilenetv3_large_100', 'timm-efficientnet-b8', 'resnet152']),
    'loss_fn': tune.choice([nn.BCEWithLogitsLoss(), DiceBCELoss(from_logits=True)]),
    'train_augmentation': tune.choice([
        #TODO: As my last model's val & test metrics were very different, then maybe I need stronger augmentations for the model to generalize better.
        # We didn't included the resize/Flip/etc. that we know would cause no problem. so:
        # Just use this A.Compose([*hyper_config['train_augmentation'], A.HorizontalFlip(...), A.Resize(..), etc.])
        [
            A.Rotate(limit=5, p=0.5),  # Use only when the img_height==img_width
            A.RandomRotate90(p=0.5),  # Use only when the img_height==img_width
            A.Transpose(p=0.5),  # Use only when the img_height==img_width
            # A.CenterCrop(
                #TODO: Remember to add vars image_height & image_width in the train func
                # int(0.9 * image_height),
                # int(0.9 * image_width),
                # p=0.3,
            # ),
            A.ColorJitter(
                brightness=0.3,
                contrast=0.05,
                saturation=0.1,
                hue=0.05,
                p=0.8,
                # always_apply=True,
            ),
            A.ImageCompression(quality_lower=80, quality_upper=100, p=0.3),
            A.RGBShift(
                r_shift_limit=5,
                g_shift_limit=5,
                b_shift_limit=2,
                p=0.8,
                # always_apply=True,
            ),
        ],
        [
            A.Rotate(limit=5, p=0.5),  # Use only when the img_height==img_width
            A.RandomRotate90(p=0.5),  # Use only when the img_height==img_width
            A.Transpose(p=0.5),  # Use only when the img_height==img_width
            A.ColorJitter(
                brightness=0.3,
                contrast=0.05,
                saturation=0.1,
                hue=0.05,
                p=0.8,
                # always_apply=True,
            ),
            A.RGBShift(
                r_shift_limit=5,
                g_shift_limit=5,
                b_shift_limit=2,
                p=0.8,
                # always_apply=True,
            ),
        ],
        [
            # means use no fancy Augmentation. Just some flip and 90deg rotations.
        ]
    ])

}

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device used for calculation (CPU\Cuda):", device)

# Create The Model Func
> Everything needed for trainin a model needs to be within its scope (should not use any var from outside)

In [None]:
# Create a model_creator_for_hyperserach to create + Compile a model (to be ready to be trained)
# TODO: Can I make it reusable for other models as well?
# A: I don't think
def model_creator(config):
    """
    instantiate + Compile + train (aka .fit()) a network

    Parameters
    ----------
    hyper_config: dict
        Includes all the hyperparmeters needed to be tuned.
    """
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    device = torch.device("cuda:0")
    # print("Device used for calculation (CPU\Cuda):", device)

    # Load all the needed variables (Ray Tune asks all the used variables to be inside this func scope)
    hyper_params = {
        # "device": str(device),
        "lr": 1e-3,
        "lr_finetune": 1e-4,  # for transfer learning (phase 2) (= lr / 1000)
        "epochs": 10,  #XXX: Set the #epochs to at least 10 for the real run in Colab
        "epochs_finetune": 10,  # for transfer learning (phase 2)
        "batch_size": 16,
        "num_workers": 2,
        # Use Height == Width to use 90-rotations/transpose in data aug
        "image_height": 352,
        "image_width": 352,
        "input_channels": 3,
        "num_classes": 1,
        "data_format": "channels_first",
        "pin_mem": True,
        "scheduler_step": 5,
        "scheduler_factor": 0.5,
        "scheduler_factor_finetune": 0.5,  # for transfer learning (phase 2)
        "train_img_dir": os.path.join("data", "traincrop", "img"),
        "train_mask_dir": os.path.join("data", "traincrop", "mask"),
        "val_img_dir": os.path.join("data", "valcrop", "img"),
        "val_mask_dir": os.path.join("data", "valcrop", "mask"),
        "test_img_dir": os.path.join("data", "testcrop", "img"),
        "test_mask_dir": os.path.join("data", "testcrop", "mask"),
        "pretrained_model_encoder": "timm-mobilenetv3_large_100",
    }

    def seed_worker(worker_id):
        worker_seed = torch.initial_seed() % 2**32
        np.random.seed(worker_seed)
        random.seed(worker_seed)

    DATA_LOADER_GEN = torch.Generator()
    DATA_LOADER_GEN.manual_seed(0)

    # Load a Pretrained Model
    # TODO: Load the preprocess_fn before, so no need to create it for each run
    model_arch = config["pretrained_arch"]

    # create the model
    model = model_arch(
        # choose encoder
        encoder_name=config["pretrained_encoder"],
        # use `imagenet` pre-trained weights for encoder initialization
        encoder_weights="imagenet",
        # model input channels (1 for gray-scale images, 3 for RGB, etc.)
        in_channels=hyper_params["input_channels"],
        # model output channels (number of classes in your dataset)
        classes=hyper_params["num_classes"],
    )

    preprocess_input = get_preprocessing_fn(
        config["pretrained_encoder"], pretrained="imagenet"
    )

    # Create Data Augmentation
    train_transform = A.Compose(
        [
            *config["train_augmentation"],
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.1),
            A.Resize(height=config["image_size"], width=config["image_size"]),
        ],
    )

    # we don't want TTA, just some resize, normalization, etc.
    val_transform = A.Compose(
        [
            A.Resize(height=config["image_size"], width=config["image_size"]),
        ],
    )

    # Create the Datasets
    train_ds = SegmentaionDataset(
        image_dir=hyper_params["train_img_dir"],
        mask_dir=hyper_params["train_mask_dir"],
        transform=train_transform,
        preprocess_fn=preprocess_input,
        mask_suffix="",
        subset=[0, 10],
    )
    val_ds = SegmentaionDataset(
        image_dir=hyper_params["val_img_dir"],
        mask_dir=hyper_params["val_mask_dir"],
        transform=val_transform,
        preprocess_fn=preprocess_input,
        mask_suffix="",
        subset=[0, 5],
    )
    train_loader, val_loader = get_loaders(
        train_ds,
        val_ds,
        batch_size=config["batch_size"],
        num_workers=hyper_params["num_workers"],
        pin_memory=hyper_params["pin_mem"],
        worker_init_fn=seed_worker,
        generator=DATA_LOADER_GEN,
    )

    # Define Metrics
    metrics = {
        "jaccard (IOU)": JaccardBinarySegment(from_logits=True),
        "recall": RecallBinarySegment(from_logits=True),
        "precision": PrecisionBinarySegment(from_logits=True),
        "dice (F1-Score)": DiceBinarySegment(from_logits=True),
    }

    # Define Loss_fn, Optimizer, and Scheduler
    loss_fn = config["loss_fn"]
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])
    scheduler = ReduceLROnPlateau(
        optimizer,
        mode="min",
        factor=config["scheduler_factor"],
        # factor=hyper_params["scheduler_factor"],
        patience=2,
        min_lr=1e-6,
        threshold=1e-3,
    )

    # train the model (and get the history)
    # TODO: must change the fit_fn to use session.report(...)
    # TODO: use verbose=False to make the output of ray tune clean
    history = fit_fn(
        model,
        train_loader,
        optimizer,
        loss_fn,
        scheduler,
        metrics=metrics,
        val_loader=val_loader,
        epochs=hyper_params["epochs"],
        device=device,
        ray_tune=True,
    )

In [None]:
# Create a partial func of the model_creator (using functools.partial)
# as we only change the hyperparams of interest for each run of Ray Tune (i.e., hyperparam_tuner)

# Create Tuner Func

In [None]:
def hyperparam_tuner(
    config: dict,
    num_samples: int = 10,
    max_num_epochs: int = 10,
    tune_metric: str = "val_loss",
    tune_metric_mode: str = "min",
):
    """
    Given a model and a set of hyperparameters,
    tries to find the best combination and return the best performing model.

    Parameters
    ----------
    hyper_config: dict
    max_num_epochs: int
    num_samples: int
        Define the total number of samples/combinatin to use to train the model.
        In other words, the number of models to train for finding the best hyperparameters.
    tune_metric: str
        The name of the metric (e.g., val_loss) by which the performance of mdoels are assessed.
    tune_metric_mode: str
        Either "min" (default) or "max", depending on the chosen tune_metric. Specifies whether "min" values are desired, or vice versa.
    """
    # use ASHAScheduler to stop training early on if the model's plight is doomed
    scheduler_ray = ASHAScheduler(
        # metric="loss",  # this is specified in the Tuner's tune.TuneConifg, so only one must be present.
        # mode="min",  # this is specified in the Tuner's tune.TuneConifg, so only one must be present.
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2,
    )

    # method 1 (less control): from Pytorch Tutorials
    # results = tune.run(
    #     model_creator,
    #     resources_per_trial={"cpu": 2, "gpu": gpus_per_trial},
    #     config=hyper_config,
    #     num_samples=num_samples,
    #     scheduler=scheduler_ray,
    # )

    # method 2 (more control): from Ray Tune official documentaions
    tuner = tune.Tuner(
        # Use Only CPU
        # model_creator,
        # Use only GPU
        # tune.with_resources(model_creator, {"gpu": 1}),
        # Use both CPU & GPU --> the numbers define how many model to be trained in parallel
        tune.with_resources(model_creator, {"gpu": 1, "cpu": 2}),  #FIXME: change the "gpu" and "cou" values for Colab Pro
        param_space=config,
        tune_config=tune.TuneConfig(
            num_samples=num_samples,
            metric=tune_metric,
            mode=tune_metric_mode,
            search_alg=OptunaSearch(),  # Optuna Works better than Random Search (BOn Pytorch official YouTube)
            scheduler=scheduler_ray,
            chdir_to_trial_dir=False,  # if True (default) the Ray tune will change the current dir, thus no relative path (e.g., path to our data) doesn't work.
        ),
    )
    results = tuner.fit()

    return results

# Run the Tuner

In [None]:
if __name__ == "__main__":
    train_timestamp = datetime.today().strftime(time_format)
    # FIXME: chane num_samples=20 and max_num_epochs=10 for Colab Pro
    results = hyperparam_tuner(
        config=config,
        num_samples=60,
        max_num_epochs=10,
        tune_metric="val_loss",
        tune_metric_mode="min",
    )

# Report the best found hyperparams
> You can find the important reports of ... in:
* ~/ray_results

> Just make sure to delete them if don't need them anymore

> The files include:
1. The result.json (of the best model)
1. The saved checkpoint (of the best model)
1. The parameters (of the best model)

In [None]:
# Get the configs for the best performing model
best_trial = results.get_best_result("val_loss", 'min', "last")
print(f"Best trial config: {best_trial.config}")

# Define the used metrics
metrics = ["loss", "jaccard (IOU)", "dice (F1-Score)", "recall", "precision"]
# add "val_" at the beginning
metrics_val = [f'val_{metric}' for metric in metrics]

for metric in metrics_val:
    print(f"Best trial final {metric}: {best_trial.metrics[metric]}")

In [None]:
results.

In [None]:
df = pd.DataFrame(best_trial.metrics)
df

# Plot the Metrics (for all of the trained models)

In [None]:
# Obtain a trial dataframe from all run trials of this `tune.run` call.
dfs = {result.log_dir: result.metrics_dataframe for result in results}

# Plot by epoch
ax = None  # This plots everything on the same plot
for d in dfs.values():
    ax = d.val_loss.plot(ax=ax, legend=False)
ax.set_xlabel('Epochs')
ax.set_ylabel("val_loss")

# Save the results
> Results are the config and metrics of all the trained models (not just the best one)

> We can use results later on to report different performance in table.

In [None]:
# save hyperparams as a JSON file
#TODO: Check the type of results
hyper_params_export_name = (
    f"outputs{os.sep}hyperparams_search{os.sep}{train_timestamp}@ray_tuner_hyperparams.json"
)
with open(hyper_params_export_name, "w") as f:
    json.dump(dict(map(lambda x: (x[0], str(x[1])), results)), f)

# save the hyperparams to a csv file
# df_hyperparam = pd.DataFrame(hyper_params, index=[0]).T
# df_hyperparam.to_csv(f'outputs/hyperparams/{start_train_time}@hyperparams.csv', index=True, header=None)
# print('The HyperParameters'.ljust(79, " "))
# display(df_hyperparam)
# del df_hyperparam