# Catalyst classification tutorial

In [None]:
import os
from typing import List, Tuple, Callable

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

---

## Reproducibility first

Catalyst provides a special utils for research results reproducibility. <br/>
For example, `set_global_seed` fixes seed for all main DL frameworks (` PyTorch`, `Tensorflow`,` random` and `numpy`)

In [None]:
SEED = 42
from catalyst.utils import set_global_seed

set_global_seed(SEED)

## Dataset

In [None]:
from pathlib import Path

ROOT = "Images/"
ALL_IMAGES = list(Path(ROOT).glob("**/*.jpg"))
ALL_IMAGES = list(filter(lambda x: not x.name.startswith("."), ALL_IMAGES))
print("Number of images:", len(ALL_IMAGES))

Let's check out the data!

In [None]:
from catalyst.utils import imread
import numpy as np

import matplotlib.pyplot as plt

def show_examples(images: List[Tuple[str, np.ndarray]]):
    _indexes = [(i, j) for i in range(2) for j in range(2)]
    
    f, ax = plt.subplots(2, 2, figsize=(16, 16))
    for (i, j), (title, img) in zip(_indexes, images):
        ax[i, j].imshow(img)
        ax[i, j].set_title(title)
    f.tight_layout()

def read_random_images(paths: List[Path]) -> List[Tuple[str, np.ndarray]]:
    data = np.random.choice(paths, size=4)
    result = []
    for d in data:
        title = f"{d.parent.name}: {d.name}"
        _image = imread(d)
        result.append((title, _image))
    
    return result

You can restart the cell below to see more examples.

In [None]:
images = read_random_images(ALL_IMAGES)
show_examples(images)

## Dataset preprocessing

With Catalyst we can easily create a dataset from the following folder structure:
```
dataset/
    class_1/
        *.ext
        ...
    class_2/
        *.ext
        ...
    ...
    class_N/
        *.ext
        ...
```

`create_dataset` function goes through a given directory and creates a dictionary `Dict[class_name, List[image]]`

In [None]:
from catalyst.utils.dataset import create_dataset, create_dataframe, prepare_dataset_labeling

dataset = create_dataset(dirs=f"{ROOT}/*", extension="*.jpg")

and `create_dataframe` function creates typical `pandas.DataFrame` for further analysis

In [None]:
df = create_dataframe(dataset, columns=["class", "filepath"])
df.head()

In [None]:
len(df)

finally `prepare_dataset_labeling` creates a numerical label for each unique class name

In [None]:
tag_to_label = prepare_dataset_labeling(df, "class")
tag_to_label

Let's add a column with a numerical label value to the DataFrame. 
It can be easily done with `map_dataframe` function.

In [None]:
from catalyst.utils.pandas import map_dataframe

df_with_labels = map_dataframe(df, tag_column="class", class_column="label", tag2class=tag_to_label, verbose=True)
df_with_labels.head()

additionaly let's save the `class_names` for further usage

In [None]:
class_names = [name for name, id_ in sorted(tag_to_label.items(), key=lambda x: x[1])]
class_names

Now let's divide our dataset into the `train` and` valid` parts. 

The parameters for the split_dataframe function are the same as [sklearn.train_test_split](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html#sklearn-model-selection-train-test-split). 

We also define `test_size` (it is optional) and `random_state` for reproducibility.

In [None]:
from catalyst.utils.dataset import split_dataframe

train_data, valid_data = split_dataframe(df_with_labels, test_size=0.2, random_state=SEED)
train_data, valid_data = train_data.to_dict('records'), valid_data.to_dict('records')

In [None]:
len(train_data), len(valid_data)

## Augmentations

In order to save your time during data preparation/reading/writing, Catalyst provides a special abstraction – [Reader](https://catalyst-team.github.io/catalyst/api/data.html#reader). <br/> 
Reader allows you to read various structures, for example, images, strings, numerical values and perform some functions on top of them.

In [None]:
from catalyst.dl import utils
from catalyst.data.reader import ImageReader, ScalarReader, ReaderCompose, LambdaReader
from functools import partial

import torch

num_classes = len(tag_to_label)

# ReaderCompose collects different Readers into one pipeline
open_fn = ReaderCompose([
    
    # Reads images from the `datapath` folder using the key `input_key =" filepath "` (here should be the filename)
    # and writes it to the output dictionary by `output_key="features"` key
    ImageReader(
        input_key="filepath",
        output_key="features",
        datapath=ROOT
    ),
    
    # Reads a number from our dataframe by the key `input_key =" label "` to np.long
    # and writes it to the output dictionary by `output_key="targets"` key
    ScalarReader(
        input_key="label",
        output_key="targets",
        default_value=-1,
        dtype=np.int64
    ),
    
    # Same as above, but with one encoding
    ScalarReader(
        input_key="label",
        output_key="targets_one_hot",
        default_value=-1,
        dtype=np.int64, 
        one_hot_classes=num_classes
    )
])

For augmentation of our dataset, we will use the [albumentations library](https://github.com/albu/albumentations).  <br/>
You can view the list of available augmentations on the documentation [website](https://albumentations.readthedocs.io/en/latest/api/augmentations.html).

In [None]:
from albumentations import Compose, LongestMaxSize, PadIfNeeded
from albumentations import ShiftScaleRotate, IAAPerspective, RandomBrightnessContrast, RandomGamma, \
    HueSaturationValue, ToGray, CLAHE, JpegCompression

from albumentations import Normalize
from albumentations.torch import ToTensor

BORDER_CONSTANT = 0
BORDER_REFLECT = 2

def pre_transforms(image_size=224):
    # Convert the image to a square of size image_size x image_size
    # (keeping aspect ratio)
    result = [
        LongestMaxSize(max_size=image_size),
        PadIfNeeded(image_size, image_size, border_mode=BORDER_CONSTANT)
    ]
    
    return result

def hard_transforms():
    result = [
        # Random shifts, stretches and turns with a 50% probability
        ShiftScaleRotate( 
            shift_limit=0.1,
            scale_limit=0.1,
            rotate_limit=15,
            border_mode=BORDER_REFLECT,
            p=0.5
        ),
        IAAPerspective(scale=(0.02, 0.05), p=0.3),
        # Random brightness / contrast with a 30% probability
        RandomBrightnessContrast(
            brightness_limit=0.2, contrast_limit=0.2, p=0.3
        ),
        # Random gamma changes with a 30% probability
        RandomGamma(gamma_limit=(85, 115), p=0.3),
        # Randomly changes the hue, saturation, and color value of the input image
        HueSaturationValue(p=0.3),
        JpegCompression(quality_lower=80),
    ]
    
    return result

def post_transforms():
    # we use ImageNet image normalization
    # and convert it to torch.Tensor
    return [Normalize(), ToTensor()]

def compose(_transforms):
    # combine all augmentations into one single pipeline
    result = Compose([item for sublist in _transforms for item in sublist])
    return result

Like Reader, there is a close abstraction for handling augmentations and key-value-based dataloaders – [Augmentor](https://catalyst-team.github.io/catalyst/api/data.html#augmentor).

In [None]:
from catalyst.data.augmentor import Augmentor
from torchvision import transforms


train_transforms = compose([pre_transforms(), hard_transforms(), post_transforms()])
valid_transforms = compose([pre_transforms(), post_transforms()])

show_transforms = compose([pre_transforms(), hard_transforms()])

# Takes an image from the input dictionary by the key `dict_key` and performs `train_transforms` on it.
train_data_transforms = transforms.Compose([
    Augmentor(
        dict_key="features",
        augment_fn=lambda x: train_transforms(image=x)["image"]
    )
])


# Similarly for the validation part of the dataset. 
# we only perform squaring, normalization and ToTensor
valid_data_transforms = transforms.Compose([
    Augmentor(
        dict_key="features",
        augment_fn=lambda x: valid_transforms(image=x)["image"]
    )
])

Let's look at the augmented results. <br/>
The cell below can be restarted.

In [None]:
images = read_random_images(ALL_IMAGES)

images = [
    (title, show_transforms(image=i)["image"])
    for (title, i) in images
]
show_examples(images)

## Pytorch dataloaders

Using `catalyst.utils.get_loader`, you can immediately get loaders only from the dataset and data-converting functions.

In [None]:
import collections

bs = 64
num_workers = 4

def get_loaders(
    open_fn: Callable,
    train_transforms_fn: transforms.Compose,
    valid_transforms_fn: transforms.Compose,
    batch_size: int = 64, 
    num_workers: int = 4,
    sampler = None
) -> collections.OrderedDict:
    """
    Args:
        open_fn: Reader for reading data from a dataframe
        train_transforms_fn: Augmentor for train part
        valid_transforms_fn: Augmentor for valid part
        batch_size: batch size
        num_workers: How many subprocesses to use to load data,
        sampler: An object of the torch.utils.data.Sampler class 
            for the dataset data sampling strategy specification
    """
    train_loader = utils.get_loader(
        train_data,
        open_fn=open_fn,
        dict_transform=train_transforms_fn,
        batch_size=bs,
        num_workers=num_workers,
        shuffle=sampler is None, # shuffle data only if Sampler is not specified (PyTorch requirement)
        sampler=sampler
    )

    valid_loader = utils.get_loader(
        valid_data,
        open_fn=open_fn,
        dict_transform=valid_transforms_fn,
        batch_size=bs,
        num_workers=num_workers,
        shuffle=False, 
        sampler=None
    )

    # Catalyst expects an ordered dictionary with train/valid/infer loaders. 
    # The number of loaders can vary.
    # For example, it can easily handle even some complex logic like:
    # loaders["train_dataset1"] = train_loader_1
    # loaders["train_dataset2"] = train_loader_2
    # ....
    # loaders["valid_1"] = valid_loader_1
    # loaders["valid_2"] = valid_loader_2
    # ...
    # loaders["infer_1"] = infer_loader_1
    # loaders["infer_2"] = infer_loader_2
    # ...
    
    loaders = collections.OrderedDict()
    loaders["train"] = train_loader
    loaders["valid"] = valid_loader

    return loaders

loaders = get_loaders(open_fn, train_data_transforms, valid_data_transforms)

Let's take the classification model from [Cadene pretrain models](https://github.com/Cadene/pretrained-models.pytorch). This repository contains a huge number of pre-trained PyTorch models. <br/>
But at first, let's check them out!

In [None]:
import pretrainedmodels

pretrainedmodels.model_names

For this tutorial purposes, `ResNet18` is good enought, but you can try other models

In [None]:
model_name = "resnet18"

By `pretrained_settings` we can see what the given network expects as input and what would be the expected output.

In [None]:
pretrainedmodels.pretrained_settings[model_name]

The model returns logits for classification into 1000 classes from ImageNet. <br/>
Let's define a function that will replace the last fully-conected layer for our number of classes.

In [None]:
from torch import nn
def get_model(model_name: str, num_classes: int, pretrained: str = "imagenet"):
    model_fn = pretrainedmodels.__dict__[model_name]
    model = model_fn(num_classes=1000, pretrained=pretrained)
    
    dim_feats = model.last_linear.in_features
    model.last_linear = nn.Linear(dim_feats, num_classes)

    return model

## Model training

In [None]:
from catalyst.data.sampler import BalanceClassSampler

labels = [x["label"] for x in train_data]
sampler = BalanceClassSampler(labels, mode="upsampling")

loader = get_loaders(
    open_fn, 
    train_data_transforms, 
    valid_data_transforms, 
    sampler=sampler
)

In [None]:
from catalyst.contrib.schedulers import OneCycleLR

NUM_EPOCHS = 4

model = get_model(model_name, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
scheduler = OneCycleLR(
    optimizer, 
    num_steps=NUM_EPOCHS, 
    lr_range=(0.001, 0.0001),
    warmup_steps=1
)

To run some DL experiment, Catalyst uses a [Runner](https://catalyst-team.github.io/catalyst/api/dl.html#catalyst.dl.core.runner.Runner) abstraction. <br/>
It contains main logic about "how" you run the experiment and getting predictions.

For supervised learning case, there is an extention for Runner – [SupervisedRunner](https://catalyst-team.github.io/catalyst/api/dl.html#module-catalyst.dl.runner.supervised), which provides additional methods like `train`, `infer` and `predict_loader`.

In [None]:
from catalyst.dl.runner import SupervisedRunner

runner = SupervisedRunner()

# folder for all the experiment logs
logdir = "./logs/classification_tutorial"

In [None]:
# as we are working on classification task
from catalyst.dl.callbacks import AccuracyCallback, AUCCallback, F1ScoreCallback, ConfusionMatrixCallback

In [None]:
runner.train(
    model=model,
    logdir=logdir,
    criterion=criterion,
    optimizer=optimizer,
    scheduler=scheduler,
    loaders=loaders,
    # We can specify the callbacks list for the experiment;
    # For this task, we will check accuracy, AUC and F1 metrics
    callbacks=[
        AccuracyCallback(num_classes=num_classes),
        AUCCallback(
            num_classes=num_classes,
            input_key="targets_one_hot",
            class_names=class_names
        ),
        F1ScoreCallback(
            input_key="targets_one_hot",
            activation="Softmax"
        ),
        ConfusionMatrixCallback(
            num_classes=num_classes,
            class_names=class_names
        )
    ],
    num_epochs=NUM_EPOCHS,
    verbose=True
)

### Training analysis and model predictions 

The `utils.plot_metrics` method reads tensorboard logs from the logdir and plots beautiful metrics with `plotly` package.

In [None]:
# it can take a while (colab issue)
utils.plot_metrics(
    logdir=logdir, 
    # specify which metrics we want to plot
    metrics=["loss", "accuracy01", "auc/_mean", "f1_score", "_base/lr"]
)

The method below will help us look at the predictions of the model for each image.

In [None]:
from torch.nn.functional import softmax

def show_prediction(
    model: torch.nn.Module, 
    class_names: List[str], 
    titles: List[str],
    images: List[np.ndarray],
    device: torch.device
) -> None:
    tensor_ = torch.stack([
        valid_transforms(image=image)["image"]
        for image in images
    ]).to(device)
    
    
    logits = model.forward(tensor_)
    probabilities = softmax(logits, dim=1)
    predictions = probabilities.argmax(dim=1)
    
    images_predicted_classes = [
        (f"predicted: {class_names[x]} | correct: {title}", image)
        for x, title, image in zip(predictions, titles, images)
    ]
    show_examples(images_predicted_classes)


In [None]:
device = utils.get_device()
titles, images = list(zip(*read_random_images(ALL_IMAGES)))
titles = list(map(lambda x: x.rsplit(":")[0], titles))
show_prediction(model, class_names=class_names, titles=titles, images=images, device=device)

## Model inference

With SupervisedRunner, you can easily predict entire loader with only one method call.

In [None]:
predictions = runner.predict_loader(
    loaders["valid"], resume=f"{logdir}/checkpoints/best.pth", verbose=True
)

The resulting object has shape = (number of elements in the loader, output shape from the model)

In [None]:
print("loader", len(loaders["valid"].dataset))
print("predictions", predictions.shape)

Thus, we can obtain probabilities for our classes.

In [None]:
print("logits: ", predictions[0])

In [None]:
from torch.nn.functional import softmax

probabilities = softmax(torch.from_numpy(predictions[0]), dim=0)
print("probabilities: ", probabilities)

In [None]:
label = probabilities.argmax().item()
print(f"predicted: {class_names[label]}")
