- Thanks for Organizers and congrats to all kagglers. This is my first time of image competition, and I have learned a lot in this competition. I could not get high socre this time, however, I wanted to share my solution for helping anyone to find a new idea. 

- My solution is consisted of three parts.  
 **Part 1** - Make a clean model with expected clean data.  
 https://www.kaggle.com/ttkagglett/cassava-cnn-training-with-pre-trained-weights  
 **Part 2** - Make a noise model with clean data and labels that clean model inferenced.  
 https://www.kaggle.com/ttkagglett/cassava-cnn-training-with-noise-labels  
 **Part 3** - Inference with clean model and noise model  
 this notebook. I added a ViT model from public to ensemble them.
 
- I tried Self-Supervised Learning, however, that does not work for me in this copmpetition.  
  Notebook is here. If there are any mistakes, please feel free to let me know with comments.  
  https://www.kaggle.com/ttkagglett/cassava-byol-training
  
- Finally, I got some amazing ideas from the following notebook. Thanks.  
  https://www.kaggle.com/khyeh0719/pytorch-efficientnet-baseline-train-amp-aug  
  https://www.kaggle.com/szuzhangzhi/vision-transformer-vit-cuda-as-usual

In [None]:
# Uninstall fastai for solving dependence problems
!pip uninstall fastai -y
# Install packages without internet
!pip install ../input/packages/torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/packages/torchvision-0.8.2-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
import sys
sys.path.append('../input/repvggmodels/')
sys.path.append('../input/vision-transformer-pytorch/VisionTransformer-Pytorch')

from repvgg import RepVGG, create_RepVGG_B3g4, create_RepVGG_B3, repvgg_model_convert
from vision_transformer_pytorch import VisionTransformer

In [None]:
import os
import cv2
import time
import random
import joblib
import sklearn
import warnings
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from skimage import io
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss
from IPython.display import display
from catalyst.data.sampler import BalanceClassSampler

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.nn.modules.loss import _WeightedLoss
from torchvision import models
from torchvision import transforms

from albumentations.pytorch import ToTensor, ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize)

pd.set_option("max_rows", 100)

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
CFG = {
    'seed'       : 42,
    'fold'       : 0 if len(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))==1 else 99,
    'tta'        : 1 if len(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))==1 else 4,
    'img_size'   : 400,
    'valid_bs'   : 32,
    'num_workers': multiprocessing.cpu_count(),
    'device'     : "cuda:0" if torch.cuda.is_available() else "cpu"}

In [None]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

# Make a model with noise and clean models

In [None]:
class CassavaDataset(Dataset):
    def __init__(self, df, data_root, 
                 transforms=None, 
                 output_label=True):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms  = transforms
        self.data_root   = data_root
        self.output_label  = output_label
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        img = get_img(f"{self.data_root}/{self.df.loc[index]['image_id']}")
        if self.transforms:
            img = self.transforms(image=img)['image']
        return img

In [None]:
def get_inference_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

In [None]:
class FFN(nn.Module):
    def __init__(self, backbone=None):
        super(FFN, self).__init__()
        self.backbone = backbone
        self.lr1      = nn.Linear(1000, 256)
        self.relu     = nn.ReLU()
        self.dropout  = nn.Dropout(0.5)
        self.lr2      = nn.Linear(256, 5)
        
    def forward(self, x):
        x = self.backbone(x)
        x = self.relu(self.lr1(x))
        x = self.dropout(x)
        x = self.lr2(x)
        return x
    
class ViT(nn.Module):
    def __init__(self, backbone=None):
        super(ViT, self).__init__()
        self.backbone = backbone
        
    def forward(self, x):
        x = self.backbone(x)
        return x

In [None]:
def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        image_preds = model(imgs)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
        
    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [None]:
def predict(df, data_path, model_name, model_path, backbone, load=True):
    results = np.zeros((len(os.listdir(data_path)), 5))
    models  = [m for m in os.listdir(model_path) if m.find("csv")==-1]
    for model_file in models:
        device  = torch.device(CFG['device'])
        dataset = CassavaDataset(df, data_path, transforms=get_inference_transforms(), output_label=False)

        data_loader = torch.utils.data.DataLoader(
            dataset, 
            batch_size =CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False)

        model = model_name(backbone)
        if load:
            if CFG['device'] == "cpu":
                model.load_state_dict(torch.load(f"{model_path}{model_file}", map_location="cpu"))
            else:
                model.load_state_dict(torch.load(f"{model_path}{model_file}"))
        backbone.to(device)
        model.to(device)

        preds = []
        with torch.no_grad():
            for _ in range(CFG['tta']):
                preds += [(1/CFG['tta'])*inference_one_epoch(model, data_loader, device)]
        preds    = np.sum(preds, 0)
        results += preds
        del model
        torch.cuda.empty_cache()
        
        if CFG['fold'] == 0:
            return results
        
    return results / len(models)

In [None]:
train      = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train_path = '../input/cassava-leaf-disease-classification/train_images/'
model_path = '../input/cassava-models-trained-with-noise-labels/'
backbone   = create_RepVGG_B3g4(deploy=True)

results_from_noise_model = predict(train, train_path, FFN, model_path, backbone)

In [None]:
model_path = '../input/cassava-noised-label-data/'
backbone   = create_RepVGG_B3g4(deploy=True)

results_from_clean_model = predict(train, train_path, FFN, model_path, backbone)

In [None]:
df_noise = pd.DataFrame(results_from_noise_model, columns=[f"l{l}_noise" for l in range(5)])
df_clean = pd.DataFrame(results_from_clean_model, columns=[f"l{l}_clean" for l in range(5)])
df_all_labels = pd.concat([df_noise, df_clean], axis=1)

# Feature engineering
df_all_labels["pred_noise"] = np.argmax(results_from_noise_model, 1)
df_all_labels["pred_clean"] = np.argmax(results_from_clean_model, 1)
df_all_labels["diff_noise_proba"] = np.max(results_from_noise_model, 1) - (results_from_clean_model * np.identity(5)[np.argmax(results_from_noise_model, 1)]).sum(1)
df_all_labels["diff_clean_proba"] = np.max(results_from_clean_model, 1) - (results_from_noise_model * np.identity(5)[np.argmax(results_from_clean_model, 1)]).sum(1)

print(df_all_labels.shape)
print(list(train.label[:5]))
df_all_labels.head(5)

In [None]:
def show_accuracy(p, l):
    if p.ndim==2:
        print(np.sum(np.argmax(p, 1) == np.array(l)) / l.shape[0])
    else:
        print(np.sum(p == np.array(l)) / l.shape[0])

show_accuracy(results_from_noise_model, train.label)
show_accuracy(results_from_clean_model, train.label)

I tried three patterns(SVM/Random Forest/LightGBM). The best private socre came from RF.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(np.array(df_all_labels), np.array(train.label),
                                                    test_size=0.3, random_state=42, stratify=np.array(train.label))
rfc = RandomForestClassifier(n_estimators=200, max_depth=10, min_samples_leaf=10, random_state=42)
rfc.fit(X_train, y_train)
preds_with_rfc = rfc.predict(X_test)
show_accuracy(preds_with_rfc, y_test)

# Inference

In [None]:
test_path  = '../input/cassava-leaf-disease-classification/test_images/'
test = pd.DataFrame()
test['image_id'] = list(os.listdir(test_path))
model_path = '../input/cassava-models-trained-with-noise-labels/'
backbone   = create_RepVGG_B3g4(deploy=True)

results_from_noise_model = predict(test, test_path, FFN, model_path, backbone)

In [None]:
model_path = '../input/cassava-noised-label-data/'
backbone   = create_RepVGG_B3g4(deploy=True)

results_from_clean_model = predict(test, test_path, FFN, model_path, backbone)

In [None]:
model_path = '../input/vit-model-1/'
backbone   = VisionTransformer.from_name('ViT-B_16', num_classes=5)
backbone.load_state_dict(torch.load(model_path+"/ViT-B_16.pt"))

CFG["img_size"] = 384

results_from_vit = predict(test, test_path, ViT, model_path, backbone, load=False)

In [None]:
def expand_ndim2(ar):
    if ar.ndim==1:
        return ar.reshape(-1,1)
    return ar

ar_pred_noise = np.argmax(results_from_noise_model, 1)
ar_pred_clean = np.argmax(results_from_clean_model, 1)
ar_diff_noise_proba = np.max(results_from_noise_model, 1) - (results_from_clean_model * np.identity(5)[np.argmax(results_from_noise_model, 1)]).sum(1)
ar_diff_clean_proba = np.max(results_from_clean_model, 1) - (results_from_noise_model * np.identity(5)[np.argmax(results_from_clean_model, 1)]).sum(1)

ar_pred_noise = expand_ndim2(ar_pred_noise)
ar_pred_clean = expand_ndim2(ar_pred_clean)
ar_diff_noise_proba = expand_ndim2(ar_diff_noise_proba)
ar_diff_clean_proba = expand_ndim2(ar_diff_clean_proba)
                             
ar_all_labels = np.hstack([results_from_noise_model, results_from_clean_model, 
                           ar_pred_noise, ar_pred_clean, ar_diff_noise_proba, ar_diff_clean_proba])

# Make final result
preds_final   = 0.6*rfc.predict_proba(ar_all_labels) + 0.4*results_from_vit

In [None]:
test['label'] = np.argmax(preds_final, 1)
test.head()

In [None]:
test.to_csv('submission.csv', index=False)