In [None]:
# Uninstall fastai for solving dependence problems
!pip uninstall fastai -y
# Install packages without internet
!pip install ../input/packages/torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/packages/torchvision-0.8.2-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
sys.path.append('../input/pytorch-optimizers/')
sys.path.append('../input/repvgg/')
sys.path.append('../input/repvggmodels/')

import timm
from torch_optimizer.radam import RAdam
from repvgg import RepVGG, create_RepVGG_B2, create_RepVGG_B3g4, create_RepVGG_B3, repvgg_model_convert

In [None]:
import os
import cv2
import time
import copy
import random
import joblib
import sklearn
import warnings
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from skimage import io
from sklearn import metrics
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss
from IPython.display import display
from catalyst.data.sampler import BalanceClassSampler

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import models
from torchvision import transforms

from albumentations.pytorch import ToTensor, ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize)

In [None]:
# Move some weights to torch cache dir
cache_dir = os.path.expanduser(os.path.join('~', '.cache/torch/hub/checkpoints'))
if not os.path.exists(cache_dir):
    os.makedirs(cache_dir)
models_dir = os.path.join(cache_dir, 'models')
if not os.path.exists(models_dir):
    os.makedirs(models_dir)
    
!cp ../input/pretrained-pytorch-models/* ~/.cache/torch/hub/checkpoints/

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
CFG = {
    'fold_num': 3,
    'seed'    : 42,
    'img_size': 400,
    'epochs'  : 18,
    'train_bs': 16,
    'valid_bs': 32,
    'tta'     : 3,
    'T_0'     : 10,
    'lr'      : 1e-3,
    'momentum': 0.9,
    'min_lr'  : 1e-6,
    'weight_decay'  : 1e-4,
    'early_stopping': 5,
    'num_workers'   : 4,
    'accum_iter'    : 2, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step'  : 1,
    'device': 'cuda:0'}

In [None]:
train      = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
display(train.head(2))
display(submission.head())

In [None]:
print(train.shape)
train.label.value_counts()

In [None]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

img = get_img('../input/cassava-leaf-disease-classification/train_images/1000015157.jpg')
plt.imshow(img)
plt.show()

# Define re-train dataset with public models

In [None]:
class CassavaDataset(Dataset):

    def __init__(self, df, data_root, transforms=None, output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms   = transforms
        self.data_root    = data_root
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        path = f"{self.data_root}/{self.df.iloc[index]['image_id']}"
        img  = get_img(path)
        if self.transforms:
            img = self.transforms(image=img)['image']
        if self.output_label == True:
            return img, self.df.iloc[index]['label']
        else:
            return img

In [None]:
def get_inference_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)

In [None]:
class CassvaImgClassifier(nn.Module):
    
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
        
    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def inference_one_epoch(model, data_loader, device):
    model.eval()
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    image_preds_all = []
    for step, imgs in pbar:
        imgs = imgs.to(device).float()
        image_preds      = model(imgs)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
        
    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [None]:
valid_ds = CassavaDataset(train.copy(),
                          '../input/cassava-leaf-disease-classification/train_images/',
                          transforms=get_inference_transforms(),
                          output_label=False)
val_loader = torch.utils.data.DataLoader(valid_ds, 
                                         batch_size=CFG['valid_bs'],
                                         num_workers=CFG['num_workers'],
                                         shuffle=False,
                                         pin_memory=False)
device = torch.device(CFG['device'])
model  = CassvaImgClassifier('tf_efficientnet_b4_ns', train.label.nunique()).to(device)

val_preds = []
for i, epoch in enumerate([9]):
    model.load_state_dict(torch.load(f'../input/cassava-pytorch-efficientnet-baseline-models/tf_efficientnet_b4_ns_fold_0_{epoch}'))
    with torch.no_grad():
        for _ in range(CFG['tta']):
            val_preds += [1/CFG['tta']*inference_one_epoch(model, val_loader, device)]
val_preds = np.sum(val_preds, axis=0)

In [None]:
df_train_result = pd.DataFrame({"image_id": train.image_id,
                                "label"   : train.label,
                                "pred"    : np.argmax(val_preds,1),
                                "prob"    : np.max(val_preds,1)})
df_train_result["res"] = df_train_result.label == df_train_result.pred

thr = 0.6
retrain = pd.DataFrame()
noised  = pd.DataFrame()
for l in sorted(df_train_result.label.unique()):
    df_l = df_train_result[(df_train_result.res)&(df_train_result.label==l)].copy()
    retrain = retrain.append(df_l[df_l.prob>=thr].copy())
    noised  = noised .append(df_l[df_l.prob< thr].copy())
noised  = noised.append(df_train_result[(df_train_result.res==False)])
retrain = retrain.reset_index(drop=True).iloc[:,:2]
noised  = noised .reset_index(drop=True).iloc[:,:2]

In [None]:
train.label.value_counts(normalize=True, sort=False)

In [None]:
df = pd.concat([retrain.label.value_counts(sort=False), noised.label.value_counts(sort=False)], axis=1)
df.columns = ["retrain", "noised"]
df["retrain_rate"] = df.retrain / df.retrain.sum()
df["noised_rate"]  = df.noised  / df.noised.sum()
print(df.retrain.sum(), df.noised.sum())
df

# Train with pre-trained weights

In [None]:
class CassavaDataset(Dataset):
    
    def __init__(self, df, data_root, 
                 transforms=None, 
                 output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms    = transforms
        self.data_root     = data_root
        self.output_label  = output_label
        
        if output_label:
            self.labels = self.df['label'].values
            
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.labels[index]
        img = get_img(f"{self.data_root}/{self.df.loc[index]['image_id']}")
        
        if self.transforms:
            img = self.transforms(image=img)['image']
        
        if self.output_label:
            return img, target
        else:
            return img

In [None]:
def get_train_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0)], p=1)
  
def get_valid_transforms():
    return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0)], p=1)

In [None]:
#backbone = models.resnet50(pretrained=True)
backbone = create_RepVGG_B3g4(deploy=False)
#backbone.load_state_dict(torch.load("../input/byol-model/byol.pt"))
backbone.load_state_dict(torch.load("../input/repvgg/RepVGG-B3g4-200epochs-train.pth"))

# Freeze the weights
#for param in backbone.parameters():
#    param.requires_grad = False

In [None]:
class FFN(nn.Module):
    def __init__(self):
        super(FFN, self).__init__()
        self.backbone = backbone
        self.lr1      = nn.Linear(1000, 256)
        self.relu     = nn.ReLU()
        self.dropout  = nn.Dropout(0.5)
        self.lr2      = nn.Linear(256, 5)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.relu(self.lr1(x))
        x = self.dropout(x)
        x = self.lr2(x)
        return x
    
def sgd_optimizer(model, lr, momentum, weight_decay):
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        apply_weight_decay = weight_decay
        apply_lr = lr
        if 'bias' in key or 'bn' in key:
            apply_weight_decay = 0
        if 'bias' in key:
            apply_lr = 2 * lr       #   Just a Caffe-style common practice. Made no difference.
        params += [{'params': [value], 'lr': apply_lr, 'weight_decay': apply_weight_decay}]
    optimizer = torch.optim.SGD(params, lr, momentum=momentum)
    return optimizer

In [None]:
def prepare_dataloader(df, trn_idx, val_idx, data_root='../input/cassava-leaf-disease-classification/train_images/'):
    train_   = df.loc[trn_idx,:].reset_index(drop=True)
    valid_   = df.loc[val_idx,:].reset_index(drop=True)
    train_ds = CassavaDataset(train_, data_root, transforms=get_train_transforms(), output_label=True)
    valid_ds = CassavaDataset(valid_, data_root, transforms=get_valid_transforms(), output_label=True)
    train_loader = torch.utils.data.DataLoader(train_ds,
                                               batch_size=CFG['train_bs'],
                                               pin_memory=False,
                                               drop_last=False,
                                               shuffle=True,
                                               num_workers=CFG['num_workers'])
    val_loader = torch.utils.data.DataLoader(valid_ds,
                                             batch_size=CFG['valid_bs'],
                                             num_workers=CFG['num_workers'],
                                             shuffle=False,
                                             pin_memory=False)
    return train_loader, val_loader

In [None]:
def train_one_epoch(epoch, model, loss_fn, optimizer, train_loader, device, scheduler=None, schd_batch_update=False):
    model.train()

    running_loss = None
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device).float()
        image_labels = image_labels.to(device).long()

        with autocast():
            image_preds = model(imgs)
            loss = loss_fn(image_preds, image_labels)
            scaler.scale(loss).backward()

            if running_loss is None:
                running_loss = loss.item()
            else:
                running_loss = running_loss * .99 + loss.item() * .01

            if ((step + 1) %  CFG['accum_iter'] == 0) or ((step + 1) == len(train_loader)):
                # may unscale_ here if desired (e.g., to allow clipping unscaled gradients)
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad() 
                if scheduler is not None and schd_batch_update:
                    scheduler.step()

            if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(train_loader)):
                description = f'epoch {epoch} loss: {running_loss:.4f}'
                pbar.set_description(description)
                
    if scheduler is not None and not schd_batch_update:
        scheduler.step()
        
def valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False):
    model.eval()

    loss_sum   = 0
    sample_num = 0
    image_preds_all   = []
    image_targets_all = []
    
    pbar = tqdm(enumerate(val_loader), total=len(val_loader))
    for step, (imgs, image_labels) in pbar:
        imgs = imgs.to(device)
        image_labels = image_labels.to(device).long()
        
        image_preds = model(imgs)
        image_preds_all   += [torch.argmax(image_preds, 1).detach().cpu().numpy()]
        image_targets_all += [image_labels.detach().cpu().numpy()]
        
        loss = loss_fn(image_preds, image_labels)
        loss_sum   += loss.item()*image_labels.shape[0]
        sample_num += image_labels.shape[0]  

        if ((step + 1) % CFG['verbose_step'] == 0) or ((step + 1) == len(val_loader)):
            description = f'epoch {epoch} loss: {loss_sum/sample_num:.4f}'
            pbar.set_description(description)
            
    image_preds_all   = np.concatenate(image_preds_all)
    image_targets_all = np.concatenate(image_targets_all)
    acc = (image_preds_all==image_targets_all).mean()
    print('validation multi-class accuracy = {:.4f}'.format(acc))
    
    if scheduler is not None:
        if schd_loss_update:
            scheduler.step(loss_sum/sample_num)
        else:
            scheduler.step()
    return acc

In [None]:
#sample_batch = next(iter(train_loader))[0]
#model.to("cuda:0")
#with torch.no_grad():
#    output = model(sample_batch.to("cuda:0").float())
#output

In [None]:
folds = StratifiedKFold(n_splits=CFG['fold_num'], shuffle=True, random_state=CFG['seed']).split(np.arange(retrain.shape[0]), retrain.label.values)
    
for fold, (trn_idx, val_idx) in enumerate(folds):
    print('Training with {} started'.format(fold))
    print(len(trn_idx), len(val_idx))
    train_loader, val_loader = prepare_dataloader(retrain, trn_idx, val_idx, data_root='../input/cassava-leaf-disease-classification/train_images/')

    not_improved_cnt = 0
    best_acc = 0
    device   = torch.device(CFG['device'])
    model    = FFN()
    
    backbone.to(device)
    model.to(device)
    scaler    = GradScaler()
    #optimizer = RAdam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])
    optimizer = sgd_optimizer(model, CFG['lr'], CFG['momentum'], CFG['weight_decay'])
    scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=CFG['T_0'])
    
    loss_tr = nn.CrossEntropyLoss().to(device)
    loss_fn = nn.CrossEntropyLoss().to(device)

    for epoch in range(CFG['epochs']):
        train_one_epoch(epoch, model, loss_tr, optimizer, train_loader, device, scheduler=scheduler, schd_batch_update=False)

        with torch.no_grad():
            acc = valid_one_epoch(epoch, model, loss_fn, val_loader, device, scheduler=None, schd_loss_update=False)

        if best_acc < acc:
            print('Best model will be saved to output path after completing this fold')
            converted_model = copy.deepcopy(model)
            best_acc = acc
            not_improved_cnt = 0
        elif CFG['early_stopping'] == not_improved_cnt:
            print("Met early stopping.")
            break
        else:
            not_improved_cnt += 1
            
    converted_model.backbone = repvgg_model_convert(converted_model.backbone, create_RepVGG_B3g4)
    torch.save(converted_model.state_dict(), f'tuned_cnn_fold_{fold}')

    del model, converted_model, optimizer, train_loader, val_loader, scaler
    torch.cuda.empty_cache()

# Inference with trained RepVGG to get noise labels

In [None]:
class CassavaDataset(Dataset):
    
    def __init__(self, df, data_root, transforms=None, output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms   = transforms
        self.data_root    = data_root
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        path = f"{self.data_root}/{self.df.iloc[index]['image_id']}"
        img  = get_img(path)
        if self.transforms:
            img = self.transforms(image=img)['image']
        if self.output_label == True:
            return img, self.df.iloc[index]['label']
        else:
            return img

In [None]:
valid_ds = CassavaDataset(noised.copy(),
                          '../input/cassava-leaf-disease-classification/train_images/',
                          transforms=get_inference_transforms(),
                          output_label=False)
val_loader = torch.utils.data.DataLoader(valid_ds, 
                                         batch_size=CFG['valid_bs'],
                                         num_workers=CFG['num_workers'],
                                         shuffle=False,
                                         pin_memory=False)
device   = torch.device(CFG['device'])
backbone = create_RepVGG_B3g4(deploy=True)

val_preds = []
for fold in range(CFG['fold_num']):
    model = FFN()
    model.load_state_dict(torch.load(f"./tuned_cnn_fold_{fold}"))
    backbone.to(device)
    model.to(device)
    with torch.no_grad():
        for _ in range(CFG['tta']):
            val_preds += [1/(CFG['fold_num']*CFG['tta'])*inference_one_epoch(model, val_loader, device)]
val_preds = np.sum(val_preds, axis=0) 

In [None]:
df_labels   = pd.DataFrame(np.vstack([np.identity(5)[retrain.label], val_preds]),
                           columns=[f"label_{i}" for i in range(5)])
all_dataset = pd.concat([retrain, noised]).reset_index(drop=True)
all_dataset = all_dataset.join(df_labels)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(16, 4))
all_dataset[all_dataset.label==0].label_0.hist(ax=axes[0])
all_dataset[all_dataset.label==1].label_1.hist(ax=axes[1])
all_dataset[all_dataset.label==2].label_2.hist(ax=axes[2])
all_dataset[all_dataset.label==3].label_3.hist(ax=axes[3])
all_dataset[all_dataset.label==4].label_4.hist(ax=axes[4])

In [None]:
all_dataset.to_csv("./noised_label_data.csv", index=False)