In [None]:
# Uninstall fastai for solving dependence problems
!pip uninstall fastai -y
# Install packages without internet
!pip install ../input/packages/torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/packages/torchvision-0.8.2-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/pytorch-optimizers/')
sys.path.append('../input/repvgg/')
sys.path.append('../input/repvggmodels/')
sys.path.append('../input/bi-tempered-loss/')

import timm
import bi_tempered_loss
from torch_optimizer.radam import RAdam
from repvgg import RepVGG, create_RepVGG_B3g4, create_RepVGG_B3, repvgg_model_convert

In [None]:
import os
import cv2
import time
import copy
import random
import joblib
import sklearn
import warnings
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from skimage import io
from sklearn import metrics
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss
from IPython.display import display
from catalyst.data.sampler import BalanceClassSampler

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import models
from torchvision import transforms

from albumentations.pytorch import ToTensor, ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize)

In [None]:
# Move some weights to torch cache dir
cache_dir = os.path.expanduser(os.path.join('~', '.cache/torch/hub/checkpoints'))
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
models_dir = os.path.join(cache_dir, 'models')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
!cp ../input/pretrained-pytorch-models/* ~/.cache/torch/hub/checkpoints/

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
CFG = {
    'fold_num': 3,
    'seed'    : 42,
    'img_size': 400,
    'epochs'  : 17,
    'train_bs': 16,
    'valid_bs': 32,
    'T_0'     : 20,
    'lr'      : 0.001,
    'momentum': 0.9,
    'min_lr'  : 1e-6,
    'weight_decay'   : 1e-4,
    'accum_iter'     : 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'early_stopping' : 10,
    'verbose_step'   : 1,
    'num_workers'    : multiprocessing.cpu_count(),
    'device'         : "cuda:0" if torch.cuda.is_available() else "cpu"}

In [None]:
train      = pd.read_csv('../input/cassava-noised-label-data/noised_label_data.csv')
submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
display(train.head(2))
display(submission.head())

In [None]:
print(train.shape)
train.label.value_counts()

In [None]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

def show_images(names):
    c = 4
    r = int(np.ceil(len(names) / c))
    fig, axes = plt.subplots(nrows=r, ncols=c, figsize=(14, len(names)-3))
    cnt = 0
    for i in range(r):
        for j in range(c):
            if len(names)<=cnt: continue
            img = get_img(f"../input/cassava-leaf-disease-classification/train_images/{names[cnt]}")
            axes[i, j].imshow(img)
            axes[i, j].axis("off")
            cnt += 1
    fig.show()

In [None]:
show_num = 8
# Label 0 with low prob
show_images(list(train[train.label==0].sort_values("label_0").iloc[:show_num,0]))
# Label 0 with high prob
show_images(list(train[train.label==0].sort_values("label_0").iloc[-show_num:,0]))

In [None]:
# Label 1 with low prob
show_images(list(train[train.label==1].sort_values("label_1").iloc[:show_num,0]))
# Label 1 with high prob
show_images(list(train[train.label==1].sort_values("label_1").iloc[-show_num:,0]))

In [None]:
# Label 2 with low prob
show_images(list(train[train.label==2].sort_values("label_2").iloc[:show_num,0]))
# Label 2 with high prob
show_images(list(train[train.label==2].sort_values("label_2").iloc[-show_num:,0]))

In [None]:
# Label 3 with low prob
show_images(list(train[train.label==3].sort_values("label_3").iloc[:show_num,0]))
# Label 3 with high prob
show_images(list(train[train.label==3].sort_values("label_3").iloc[-show_num:,0]))

In [None]:
# Label 4 with low prob
show_images(list(train[train.label==4].sort_values("label_4").iloc[:show_num,0]))
# Label 4 with high prob
show_images(list(train[train.label==4].sort_values("label_4").iloc[-show_num:,0]))

# Preparation for labels with smooth

In [None]:
def do_label_smooth(df, label, thr):
    df = df.copy()
    smooth_labels    = []
    not_target_label = np.delete(np.arange(5), label)
    for row in np.array(df.loc[(df.label==label)&(df[f"label_{label}"] < thr)])[:,2:]:
        r = row.copy()
        d = (thr - row[label]) / 4
        r[not_target_label] = row[not_target_label] - d
        r[label] = thr
        while True:
            if 0 <= r[not_target_label].min():
                break
            r[not_target_label] = np.abs(r[not_target_label])
            d = (sum(r) - 1) / 4
            r[not_target_label] = r[not_target_label] - d
        smooth_labels.append(r.tolist())
    df.loc[(df.label==label)&(df[f"label_{label}"] < thr), [f"label_{l}" for l in range(5)]] = smooth_labels
    return df

In [None]:
thr = 0.6
for label in range(5):
    train = do_label_smooth(train, label, thr)

In [None]:
for label in range(5):
    train.loc[(train["label"]==label)&(train[f"label_{label}"]>thr), f"label_{label}"] = 1

In [None]:
thr = 0.9
num_classes     = 5
label_smoothing = 0.1
target_label    = [f"label_{l}" for l in range(5)]
for label in range(5):
    labels = np.array(train.loc[train[f"label_{label}"]>thr, target_label])
    labels = (1 - num_classes / (num_classes - 1) * label_smoothing) * labels + label_smoothing / (num_classes - 1)
    train.loc[train[f"label_{label}"]>thr, target_label] = labels

In [None]:
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(14, 10))
for r in range(5):
    m = round(train[train.label==r].shape[0],-2)
    for c in range(5):
        train[train.label==r][f"label_{c}"].hist(ax=axes[r,c])

# Train with pre-trained weights

In [None]:
class CassavaDataset(Dataset):
    def __init__(self, df, data_root, 
                 transforms=None, 
                 output_label=True, 
                 one_hot_label=False,
                 do_fmix=False, 
                 fmix_params={'alpha'      : 1., 
                              'decay_power': 3., 
                              'shape'      : (CFG['img_size'], CFG['img_size']),
                              'max_soft'   : True, 
                              'reformulate': False},
                 do_cutmix=False,
                 cutmix_params={'alpha': 1}):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms  = transforms
        self.data_root   = data_root
        self.do_fmix     = do_fmix
        self.fmix_params = fmix_params
        self.do_cutmix   = do_cutmix
        self.cutmix_params = cutmix_params
        self.output_label  = output_label
        self.one_hot_label = one_hot_label
        
        if output_label:
            # Get one hot labels
            self.soft_labels = self.df[[f"label_{l}" for l in range(5)]].values
            self.hard_labels = self.df.label.values
            
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            soft_target = self.soft_labels[index]
            hard_target = self.hard_labels[index]
        img = get_img(f"{self.data_root}/{self.df.loc[index]['image_id']}")
        
        if self.transforms:
            img = self.transforms(image=img)['image']
        
        if self.output_label:
            return img, soft_target, hard_target
        else:
            return img

In [None]:
def get_train_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0)], p=1)
  
def get_valid_transforms():
    return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)

In [None]:
#backbone = models.resnet50(pretrained=False)
#backbone.load_state_dict(torch.load("../input/byol-model/byol.pt"))

In [None]:
backbone = create_RepVGG_B3g4(deploy=False)
#backbone.load_state_dict(torch.load('../input/repvgg/RepVGG-B3g4-200epochs-train.pth', map_location=torch.device("cpu")))
backbone.load_state_dict(torch.load('../input/repvgg/RepVGG-B3g4-200epochs-train.pth'))

In [None]:
class FFN(nn.Module):
    def __init__(self):
        super(FFN, self).__init__()
        self.backbone = backbone
        self.lr1      = nn.Linear(1000, 256)
        self.relu     = nn.ReLU()
        self.dropout  = nn.Dropout(0.5)
        self.lr2      = nn.Linear(256, 5)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.relu(self.lr1(x))
        x = self.dropout(x)
        x = self.lr2(x)
        return x

In [None]:
def prepare_dataloader(df, trn_idx, val_idx, data_root='../input/cassava-leaf-disease-classification/train_images/'):
    train_   = df.loc[trn_idx,:].reset_index(drop=True)
    valid_   = df.loc[val_idx,:].reset_index(drop=True)
    train_ds = CassavaDataset(train_, data_root, transforms=get_train_transforms(), output_label=True, one_hot_label=False, do_fmix=False, do_cutmix=False)
    valid_ds = CassavaDataset(valid_, data_root, transforms=get_valid_transforms(), output_label=True)
    train_loader = torch.utils.data.DataLoader(train_ds,
                                               batch_size=CFG['train_bs'],
                                               pin_memory=False,
                                               drop_last=False,
                                               shuffle=True,
                                               num_workers=CFG['num_workers'])
    val_loader = torch.utils.data.DataLoader(valid_ds,
                                             batch_size=CFG['valid_bs'],
                                             num_workers=CFG['num_workers'],
                                             shuffle=False,
                                             pin_memory=False)
    return train_loader, val_loader

In [None]:
class SoftLabelLoss(nn.Module):
    def __init__(self, dim=-1):
        super(SoftLabelLoss, self).__init__()
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        return torch.mean(torch.sum(-target * pred, dim=self.dim))
    
class TemperedLoss(nn.Module):
    def __init__(self, t1=1, t2=1):
        super(TemperedLoss, self).__init__()
        self.t1  = t1
        self.t2  = t2

    def forward(self, pred, target):
        loss = bi_tempered_loss.bi_tempered_logistic_loss(pred, target, t1=self.t1, t2=self.t2)
        return torch.mean(loss)
    
def sgd_optimizer(model, lr, momentum, weight_decay):
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        apply_weight_decay = weight_decay
        apply_lr = lr
        if 'bias' in key or 'bn' in key:
            apply_weight_decay = 0
        if 'bias' in key:
            apply_lr = 2 * lr       #   Just a Caffe-style common practice. Made no difference.
        params += [{'params': [value], 'lr': apply_lr, 'weight_decay': apply_weight_decay}]
    optimizer = torch.optim.SGD(params, lr, momentum=momentum)
    return optimizer

In [None]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scheduler=None, schd_batch_update=False):
    model.train()

    t = time.time()
    running_loss = None

    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, soft_labels, hard_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = soft_labels.to(device).float()

        with autocast():
            image_preds = model(imgs)
            loss = loss_fn(image_preds, image_labels)
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 
                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                pbar.set_description(description)
                
    if scheduler is not None and not schd_batch_update:
        scheduler.step()
        
def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False):
    model.eval()

    t = time.time()
    loss_sum   = 0
    sample_num = 0
    image_preds_all   = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, soft_labels, hard_labels) in pbar:
        imgs = imgs.to(device)
        image_labels = soft_labels.to(device).float()
        
        image_preds = model(imgs)
        image_preds_all   += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [np.array(hard_labels)]
        
        loss = loss_fn(image_preds, image_labels)
        loss_sum   += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
            
    image_preds_all   = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    acc = (image_preds_all==image_targets_all).mean()
    print('validation multi-class accuracy = {:.4f}'.format(acc))
    
    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()
    return acc

In [None]:
folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(train.shape[0]), train.label.values)
    
for fold, (trn_idx, val_idx) in enumerate(folds):
    print('Training with {} started'.format(fold))
    print(len(trn_idx), len(val_idx))
    train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx, data_root='../input/cassava-leaf-disease-classification/train_images/')

    not_improved_cnt = 0
    best_acc = 0
    device   = torch.device(CFG['device'])
    model    = FFN()
    
    backbone.to(device)
    model.to(device)
    scaler    = GradScaler()
    #optimizer = RAdam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    #optimizer = torch.optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    #scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
    optimizer = sgd_optimizer(model, CFG['lr'], CFG['momentum'], CFG['weight_decay'])
    scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=CFG['T_0'])

    #loss_tr = SoftLabelLoss().to(device)
    #loss_fn = SoftLabelLoss().to(device)
    loss_tr = TemperedLoss(t1=0.8, t2=1.2).to(device)
    loss_fn = TemperedLoss(t1=0.8, t2=1.2).to(device)

    for epoch in range(CFG['epochs']):
        train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, scheduler=scheduler, schd_batch_update=False)

        with torch.no_grad():
            acc = valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False)

        if best_acc < acc:
            print('Best model will be saved to output path after completing this fold')
            converted_model = copy.deepcopy(model)
            best_acc = acc
            not_improved_cnt = 0
        elif CFG['early_stopping'] == not_improved_cnt:
            print("Met early stopping.")
            break
        else:
            not_improved_cnt += 1  
            
    converted_model.backbone = repvgg_model_convert(converted_model.backbone, create_RepVGG_B3g4)
    torch.save(converted_model.state_dict(), f'cnn_with_noise_label_fold_{fold}')
    
    del model, optimizer, train_loader, val_loader, scaler
    torch.cuda.empty_cache()