In [None]:
#!pip install tez
#!pip install efficientnet-pytorch

**Setting up the tez library and efficent pytorch for offline use**

In [None]:
tez_path = '../input/tez-lib/'
effnet_path = '../input/efficientnet-pytorch/'
import sys
sys.path.append(tez_path)
sys.path.append(effnet_path)

In [None]:
import os
import albumentations
import pandas as pd

import tez
from tez.datasets import ImageDataset
from tez.callbacks import EarlyStopping

import torch
import torch.nn as nn
from torch.nn import functional as F

from efficientnet_pytorch import EfficientNet
from sklearn import metrics, model_selection, preprocessing

In [None]:
class LeafModel(tez.Model):
    def __init__(self, num_classes):
        super().__init__()

        self.effnet = EfficientNet.from_pretrained("efficientnet-b4")
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(1792, num_classes)
        self.step_scheduler_after = "epoch"
        
    def monitor_metrics(self, outputs, targets):
        if targets is None:
            return {}
        outputs = torch.argmax(outputs, dim=1).cpu().detach().numpy()
        targets = targets.cpu().detach().numpy()
        accuracy = metrics.accuracy_score(targets, outputs)
        return {"accuracy": accuracy}
    
    def fetch_optimizer(self):
        opt = torch.optim.Adam(self.parameters(), lr=3e-4)
        return opt
    
    def fetch_scheduler(self):
        sch = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            self.optimizer, T_0=10, T_mult=1, eta_min=1e-6, last_epoch=-1
        )
        return sch

    def forward(self, image, targets=None):
        batch_size, _, _, _ = image.shape

        x = self.effnet.extract_features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        outputs = self.out(self.dropout(x))
        
        if targets is not None:
            loss = nn.CrossEntropyLoss()(outputs, targets)
            metrics = self.monitor_metrics(outputs, targets)
            return outputs, loss, metrics
        return outputs, None, None

In [None]:
train_aug = albumentations.Compose([
            albumentations.RandomResizedCrop(256, 256),
            albumentations.Transpose(p=0.5),
            albumentations.HorizontalFlip(p=0.5),
            albumentations.VerticalFlip(p=0.5),
            albumentations.ShiftScaleRotate(p=0.5),
            albumentations.HueSaturationValue(
                hue_shift_limit=0.2, 
                sat_shift_limit=0.2, 
                val_shift_limit=0.2, 
                p=0.5
            ),
            albumentations.RandomBrightnessContrast(
                brightness_limit=(-0.1,0.1), 
                contrast_limit=(-0.1, 0.1), 
                p=0.5
            ),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            ),
            albumentations.CoarseDropout(p=0.5),
            albumentations.Cutout(p=0.5)], p=1.)
  
        
valid_aug = albumentations.Compose([
            albumentations.CenterCrop(256, 256, p=1.),
            albumentations.Resize(256, 256),
            albumentations.Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, 
                p=1.0
            )], p=1.)

In [None]:
dfx = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
dfx = dfx.dropna().reset_index(drop=True)
dfx["kfold"] = -1

dfx = dfx.sample(frac=1).reset_index(drop=True)

kf = model_selection.StratifiedKFold(n_splits=5)

for fold, (trn_, val_) in enumerate(kf.split(X=dfx, y=dfx.label.values)):
    print(len(trn_), len(val_))
    dfx.loc[val_, 'kfold'] = fold

In [None]:
def run(fold):
    model_p = f"model_fold{fold}.bin"
    df_train = dfx[dfx.kfold != fold].reset_index(drop=True)
    df_valid = dfx[dfx.kfold == fold].reset_index(drop=True)

    df_train = df_train.reset_index(drop=True)
    df_valid = df_valid.reset_index(drop=True)

    image_path = "../input/cassava-leaf-disease-classification/train_images/"
    train_image_paths = [os.path.join(image_path, x) for x in df_train.image_id.values]
    valid_image_paths = [os.path.join(image_path, x) for x in df_valid.image_id.values]
    train_targets = df_train.label.values
    valid_targets = df_valid.label.values

    train_dataset = ImageDataset(
        image_paths=train_image_paths,
        targets=train_targets,
        #resize=None,
        augmentations=train_aug,
    )

    valid_dataset = ImageDataset(
        image_paths=valid_image_paths,
        targets=valid_targets,
        #resize=None,
        augmentations=valid_aug,
    )
    
    model = LeafModel(num_classes=dfx.label.nunique())
    es = EarlyStopping(monitor="valid_loss", model_path=model_p, patience=3, mode="min")
    model.fit(
        train_dataset,
        valid_dataset=valid_dataset,
        train_bs=64,
        valid_bs=64,
        device="cuda",
        epochs=10,
        callbacks=[es],
        fp16=True,
    )
    model.save(model_p)

In [None]:
for fold in range(5):
    run(fold)

Inference Code

In [None]:
import os
import albumentations
import pandas as pd
import numpy as np

import tez
from tez.datasets import ImageDataset

import torch
import torch.nn as nn
from torch.nn import functional as F

from efficientnet_pytorch import EfficientNet

In [None]:
test_aug = albumentations.Compose([
    albumentations.RandomResizedCrop(256, 256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.HueSaturationValue(
        hue_shift_limit=0.2, 
        sat_shift_limit=0.2,
        val_shift_limit=0.2, 
        p=0.5
    ),
    albumentations.RandomBrightnessContrast(
        brightness_limit=(-0.1,0.1), 
        contrast_limit=(-0.1, 0.1), 
        p=0.5
    ),
    albumentations.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225], 
        max_pixel_value=255.0, 
        p=1.0
    )
], p=1.)

In [None]:
dfx = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
image_path = "../input/cassava-leaf-disease-classification/test_images/"
test_image_paths = [os.path.join(image_path, x) for x in dfx.image_id.values]
# fake targets
test_targets = dfx.label.values
test_dataset = ImageDataset(
    image_paths=test_image_paths,
    targets=test_targets,
    #resize=None,
    augmentations=test_aug,
)

In [None]:
train_dfx = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
model0 = LeafModel(num_classes=train_dfx.label.nunique())
model0.load("model_fold0.bin")

model1 = LeafModel(num_classes=train_dfx.label.nunique())
model1.load("model_fold1.bin")

model2 = LeafModel(num_classes=train_dfx.label.nunique())
model2.load("model_fold2.bin")

model3 = LeafModel(num_classes=train_dfx.label.nunique())
model3.load("model_fold3.bin")

model4 = LeafModel(num_classes=train_dfx.label.nunique())
model4.load("model_fold4.bin")

In [None]:
from statistics import mode
def run_inference(model):
    # run inference 5 times
    final_preds = None
    for j in range(5):
        preds = model.predict(test_dataset, batch_size=32, n_jobs=-1, device="cuda")
        temp_preds = None
        for p in preds:
            if temp_preds is None:
                temp_preds = p
            else:
                temp_preds = np.vstack((temp_preds, p))
        if final_preds is None:
            final_preds = temp_preds
        else:
            final_preds += temp_preds
    final_preds /= 5
    final_preds = final_preds.argmax(axis=1)
    return final_preds
    

In [None]:
new_df = pd.DataFrame()
new_df['model0'] = run_inference(model0)
new_df['model1'] = run_inference(model1)
new_df['model2'] = run_inference(model2)
new_df['model3'] = run_inference(model3)
new_df['model4'] = run_inference(model4)

In [None]:
new_df.head()

final_output = list()
for index, row in new_df.iterrows(): 
    mode_row = list()
    mode_row = [row['model0'], row['model1'], row['model2'], row['model3'], row['model4']]
    output = mode(mode_row)
    if output not in [0,1,2,3,4]:
        output = row['model0']
    final_output.append(output)

In [None]:
dfx.label = final_output
dfx.to_csv("submission.csv", index=False)
