<a href="https://colab.research.google.com/github/quang-vo-ds/banana_leaf_disease_detection/blob/main/banana_leaf_disease_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Initial Setup

In [1]:
!pip -q install pydicom
!pip -q install timm
!pip -q install catalyst

In [2]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss
import torch.nn.functional as F

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom
import timm
#from efficientnet_pytorch import EfficientNet
from scipy.ndimage import zoom
import pickle

In [3]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Vin_ML_Course/Final_Project
root_dir = os.getcwd()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Vin_ML_Course/Final_Project


## Global Config

In [4]:
CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 512,
    'epochs': 10,
    'train_bs': 32,
    'valid_bs': 32,
    'lr': 1e-4,
    'num_workers': 4,
    'accum_iter': 1, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0',
    'tta': 3,
    'used_epochs': [6,7,8,9],
    'weights': [1,1,1,1]
}

## Input

In [5]:
test_dir = os.path.join(root_dir, "test")
save_data_dir = os.path.join(root_dir, "output/processed_data")
save_model_dir = os.path.join(root_dir, "output/checkpoints")
with open(os.path.join(save_data_dir, "class_int_to_name.dict"), 'rb') as f:
    class_int_to_name = pickle.load(f)

In [6]:
test = {'id': [], 'path': []}
for f in glob(test_dir + '/*'):
    f_name = f[f.rfind("/")+1:]

    # id and class
    test['id'].append(f_name)
    test['path'].append(f)

test = pd.DataFrame(test)
test = test.dropna()
test.head()

Unnamed: 0,id,path
0,9.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...
1,6.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...
2,10.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...
3,4.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...
4,8.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...


## Global Config

## Utils

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    #print(im_rgb)
    return im_rgb

## Dataset

In [8]:
class BananaDataset(Dataset):
    def __init__(self, df,
                 transforms=None,
                 output_label=True,
                 one_hot_label=False,
                ):

        super().__init__()
        self.df = df.copy()
        self.transforms = transforms
        self.output_label = output_label
        self.one_hot_label = one_hot_label

        if output_label == True:
            self.labels = self.df['label'].values
            if one_hot_label is True:
                self.labels = np.eye(self.df['label'].max()+1)[self.labels]

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index: int):

        # get labels
        if self.output_label:
            target = self.labels[index]

        img_dir = self.df.iloc[index].path
        img  = get_img(img_dir)

        if self.transforms:
            img = self.transforms(image=img)['image']

        if self.output_label == True:
            return img, target
        else:
            return img

## Image Augmentation

In [9]:
from albumentations import Normalize, Resize, Compose
from albumentations.pytorch import ToTensorV2

def get_inference_transforms():
    return Compose([
        Resize(CFG['img_size'], CFG['img_size']),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0),
        ], p=1.)

## Model

In [10]:
class MyImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
        '''
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.3),
            #nn.Linear(n_features, hidden_size,bias=True), nn.ELU(),
            nn.Linear(n_features, n_class, bias=True)
        )
        '''
    def forward(self, x):
        x = self.model(x)
        return x

## Main

In [11]:
def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []

    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()

        image_preds = model(imgs)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]


    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [12]:
if __name__ == '__main__':

    seed_everything(CFG['seed'])

    for fold in range(CFG['fold_num']):
        # we'll train fold 0 first
        if fold > 0:
            break

        test_ds = BananaDataset(test, transforms=get_inference_transforms(), output_label=False)
        tst_loader = torch.utils.data.DataLoader(
            test_ds,
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

        device = torch.device(CFG['device'])
        model = MyImgClassifier(CFG['model_arch'], len(class_int_to_name.keys())).to(device)

        tst_preds = []

        for i, epoch in enumerate(CFG['used_epochs']):
            model.load_state_dict(torch.load(os.path.join(save_model_dir,'{}_fold_{}_{}'.format(CFG['model_arch'], fold, epoch))))

            with torch.no_grad():
                for _ in range(CFG['tta']):
                    tst_preds += [CFG['weights'][i]/sum(CFG['weights'])/CFG['tta']*inference_one_epoch(model, tst_loader, device)]

        tst_preds = np.mean(tst_preds, axis=0)

        del model
        torch.cuda.empty_cache()

  model = create_fn(
100%|██████████| 1/1 [00:01<00:00,  1.40s/it]
100%|██████████| 1/1 [00:00<00:00,  2.58it/s]
100%|██████████| 1/1 [00:00<00:00,  2.50it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
100%|██████████| 1/1 [00:00<00:00,  1.85it/s]
100%|██████████| 1/1 [00:00<00:00,  1.96it/s]
100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.49it/s]
100%|██████████| 1/1 [00:00<00:00,  2.55it/s]
100%|██████████| 1/1 [00:00<00:00,  2.55it/s]


## Submission

In [13]:
test['label'] = np.argmax(tst_preds, axis=1)
test['label_name'] = test['label'].replace(class_int_to_name)
test.head()

Unnamed: 0,id,path,label,label_name
0,9.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...,4,xanthomonas_wilt
1,6.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...,4,xanthomonas_wilt
2,10.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...,0,healthy
3,4.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...,0,healthy
4,8.jpg,/content/drive/MyDrive/Vin_ML_Course/Final_Pro...,3,black_segatoka
