# Tez: a simple pytorch trainer

Inference Kernel: https://www.kaggle.com/abhishek/ranzcr-tez-inference-efficientnet5

Please note: a few things have been changed in this training kernel to make it hard for you to reproduce the inference score ;)

In [None]:
tez_path = '../input/tez-lib/'
effnet_path = '../input/efficientnet-pytorch/'
import sys
sys.path.append(tez_path)
sys.path.append(effnet_path)

In [None]:
import argparse
import os

import albumentations
import pandas as pd
import tez
import torch
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from sklearn import metrics, model_selection, preprocessing
from tez.callbacks import EarlyStopping
from tez.datasets import ImageDataset
from torch.nn import functional as F

In [None]:
INPUT_PATH = "../input/ranzcr-clip-catheter-line-classification/"
IMAGE_PATH = "../input/ranzcr-clip-catheter-line-classification/train/"
FOLDS_PATH = "../input/ranzcr-folds/"
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 32
EPOCHS = 200
IMAGE_SIZE = 512

In [None]:
class RanzcrModel(tez.Model):
    def __init__(self):
        super().__init__()

        self.effnet = EfficientNet.from_pretrained("efficientnet-b5")

        self.effnet._conv_stem.in_channels = 1
        weight = self.effnet._conv_stem.weight.mean(1, keepdim=True)
        self.effnet._conv_stem.weight = torch.nn.Parameter(weight)

        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(2048, 11)
        self.step_scheduler_after = "epoch"
        self.step_scheduler_metric = "valid_auc"

    def monitor_metrics(self, outputs, targets):
        outputs = outputs.cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        # NOTE: this is not correct :)
        auc = 0
        for j in range(outputs.shape[1]):
            try:
                auc += metrics.roc_auc_score(
                    targets[:, j], outputs[:, j]
                )
            except ValueError:
                auc += 0.5
        return {"auc": auc / 11}

    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=1e-3)
        return opt

    def fetch_scheduler(self):
        rlr = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer,
            verbose=True,
            factor=0.7,
            mode="max",
            patience=2,
            threshold=0.01,
        )
        return rlr

    def forward(self, image, targets=None):
        batch_size, _, _, _ = image.shape

        x = self.effnet.extract_features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        outputs = self.out(self.dropout(x))
        if targets is not None:
            loss = nn.BCEWithLogitsLoss()(
                outputs, targets.type_as(outputs)
            )
            metrics = self.monitor_metrics(outputs, targets)
            return outputs, loss, metrics
        return outputs, None, {}

In [None]:
train_aug = albumentations.Compose(
    [
        albumentations.RandomResizedCrop(IMAGE_SIZE, IMAGE_SIZE),
        albumentations.Normalize(
            mean=[0.485],
            std=[0.229],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)

valid_aug = albumentations.Compose(
    [
        albumentations.Resize(IMAGE_SIZE, IMAGE_SIZE, p=1.0),
        albumentations.Normalize(
            mean=[0.485],
            std=[0.229],
            max_pixel_value=255.0,
            p=1.0,
        ),
    ],
    p=1.0,
)


# change folds here
current_fold = 0
dfx = pd.read_csv(os.path.join(FOLDS_PATH, "train_folds.csv"))
targets = [
    "ETT - Abnormal",
    "ETT - Borderline",
    "ETT - Normal",
    "NGT - Abnormal",
    "NGT - Borderline",
    "NGT - Incompletely Imaged",
    "NGT - Normal",
    "CVC - Abnormal",
    "CVC - Borderline",
    "CVC - Normal",
    "Swan Ganz Catheter Present",
]
df_train = dfx[dfx.kfold != current_fold].reset_index(drop=True)
df_valid = dfx[dfx.kfold == current_fold].reset_index(drop=True)

In [None]:
train_image_paths = [
    os.path.join(IMAGE_PATH, x + ".jpg") for x in df_train.StudyInstanceUID.values
]
valid_image_paths = [
    os.path.join(IMAGE_PATH, x + ".jpg") for x in df_valid.StudyInstanceUID.values
]

train_targets = df_train[targets].values
valid_targets = df_valid[targets].values

train_dataset = ImageDataset(
    image_paths=train_image_paths,
    targets=train_targets,
    augmentations=train_aug,
    grayscale=True,
)

valid_dataset = ImageDataset(
    image_paths=valid_image_paths,
    targets=valid_targets,
    augmentations=valid_aug,
    grayscale=True,
)


In [None]:
model = RanzcrModel()

es = EarlyStopping(
    monitor="valid_auc",
    model_path=f"effnet5_fold_{current_fold}.bin",
    patience=5,
    mode="max",
)

model.fit(
    train_dataset,
    valid_dataset=valid_dataset,
    train_bs=TRAIN_BATCH_SIZE,
    valid_bs=VALID_BATCH_SIZE,
    device="cuda",
    epochs=EPOCHS,
    callbacks=[es],
    fp16=True,
)
