In [None]:
package_paths = ['../input/pytorch-library/pytorch_library/pytorch-image-models-master',]
import sys;
for pth in package_paths:
    sys.path.append(pth)
# load the external python package
import timm

In [None]:
import os
import numpy as np
import torch
import pandas as pd
import pytorch_lightning as pl
import torch.nn as nn
import torch.nn.functional as F

from PIL import Image
from torchvision import transforms as tsfm
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning.metrics import Metric

In [None]:
class CFG:
    # dir
    test_imgs_dir = "../input/plant-pathology-2021-fgvc8/test_images"
    submit_csv_path = "../input/plant-pathology-2021-fgvc8/sample_submission.csv"
    # data info
    label_num2str = {0: 'powdery_mildew',
                     1: 'scab',
                     2: 'complex',
                     3: 'frog_eye_leaf_spot',
                     4: 'rust'}
    
    label_str2num = {'powdery_mildew': 0,
                     'scab': 1,
                     'complex': 2,
                     'frog_eye_leaf_spot': 3,
                     'rust': 4}
    # model info
    model_name = 'tf_efficientnet_b4_ns'
    pretrained_dir = '../input/plant512b4'
    which_to_load = 'best_perform'  # last or best_perform
    needed_fold = [0, 1, 2, 3, 4, 5]
    #needed_fold = [0, 1, 2] @B5
    seed = 77
    num_classes = 5
    #img_size = [427, 640] @B5
    img_size = [360, 512]

In [None]:
"""
Define dataset class
"""

class PlantDataset(Dataset):
    def __init__(self, img_dir, img_names: list, labels: list, transform=None):
        self.img_dir = img_dir
        self.img_names = img_names
        self.labels = labels
        self.transform = transform
    
    def __len__(self):
        return len(self.img_names)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_names[idx])
        img = Image.open(img_path).convert('RGB')
        img_ts = self.transform(img)
        label_ts = self.labels[idx]
        return img_ts, label_ts

In [None]:
"""
Define test image transformation
"""

test_transform_normal = tsfm.Compose([tsfm.Resize(CFG.img_size),
                                      tsfm.ToTensor(),
                                      tsfm.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),])

In [None]:
"""
Init dataset instance and dataloader
"""
test_img_names = os.listdir(CFG.test_imgs_dir)
test_dataset = PlantDataset(CFG.test_imgs_dir, test_img_names, range(len(test_img_names)), test_transform_normal)

test_loader = DataLoader(test_dataset, batch_size=1, num_workers=0, shuffle=False)

In [None]:
"""
Define Focal-Loss
"""

class FocalLoss(nn.Module):
    """
    The focal loss for fighting against class-imbalance
    """
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = 1e-12  # prevent training from Nan-loss error 
    
    def forward(self, logits, target):
        """
        logits & target should be tensors with shape [batch_size, num_classes]
        """
        probs = F.sigmoid(logits)
        one_subtract_probs = 1.0 - probs
        # add epsilon
        probs_new = probs + self.epsilon
        one_subtract_probs_new = one_subtract_probs + self.epsilon
        # calculate focal loss
        log_pt =  target * torch.log(probs_new) + (1.0 - target) * torch.log(one_subtract_probs_new)
        pt = torch.exp(log_pt)
        focal_loss = -1.0 * (self.alpha * (1 - pt) ** self.gamma) * log_pt
        return torch.mean(focal_loss)
        

In [None]:
"""
Define F1 score metric
"""
class MyF1Score(Metric):
    def __init__(self, cfg, threshold: float=0.5, dist_sync_on_step=False):
        super().__init__(dist_sync_on_step=dist_sync_on_step)
        self.cfg = cfg
        self.threshold = threshold
        self.add_state("tp", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("fp", default=torch.tensor(0), dist_reduce_fx="sum")
        self.add_state("fn", default=torch.tensor(0), dist_reduce_fx="sum")

    def update(self, preds: torch.Tensor, target: torch.Tensor):
        assert preds.shape == target.shape
        preds_str_batch = self.num_to_str(preds)
        target_str_batch = self.num_to_str(target)
        tp, fp, fn = 0, 0, 0
        for pred_str_list, target_str_list in zip(preds_str_batch, target_str_batch):
            for pred_str in pred_str_list:
                if pred_str in target_str_list:
                    tp += 1
                if pred_str not in target_str_list:
                    fp += 1
            
            for target_str in target_str_list:
                if target_str not in pred_str_list:
                    fn += 1
        self.tp += tp
        self.fp += fp
        self.fn += fn

    def compute(self):
        f1 = 2.0 * self.tp / (2.0 * self.tp + self.fn + self.fp)
        return f1
    
    def num_to_str(self, ts: torch.Tensor) -> list:
        batch_bool_list = (ts > self.threshold).detach().cpu().numpy().tolist()
        batch_str_list = []
        for one_sample_bool in batch_bool_list:
            lb_str_list = [self.cfg.label_num2str[lb_idx] for lb_idx, bool_val in enumerate(one_sample_bool) if bool_val]
            if len(lb_str_list) == 0:
                lb_str_list = ['healthy']
            batch_str_list.append(lb_str_list)
        return batch_str_list

In [None]:
"""
Define neural network model
"""

class MyNetwork(pl.LightningModule):
    def __init__(self, cfg):
        super(MyNetwork, self).__init__()
        self.cfg = cfg
        self.model = timm.create_model(cfg.model_name, pretrained=False, num_classes=cfg.num_classes)
        self.criterion = FocalLoss()
        self.metric = self.metric = MyF1Score(cfg)
       
    def forward(self, x):
        return self.model(x)
    
    def configure_optimizers(self):
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.cfg.lr)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer,
                                                                    T_max=self.cfg.t_max,
                                                                    eta_min=self.cfg.min_lr,
                                                                    verbose=True)
        return {'optimizer': self.optimizer, 'lr_scheduler': self.scheduler}
    
    def training_step(self, batch, batch_idx):
        img_ts, lb_ts = batch
        pred_ts = self.model(img_ts)
        loss = self.criterion(pred_ts, lb_ts)
        score = self.metric(pred_ts, lb_ts)
        logs = {'train_loss': loss, 'train_f1': score, 'lr': self.optimizer.param_groups[0]['lr']}
        self.log_dict(logs, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        img_ts, lb_ts = batch
        pred_ts = self.model(img_ts)
        loss = self.criterion(pred_ts, lb_ts)
        score = self.metric(pred_ts, lb_ts)
        logs = {'valid_loss': loss, 'valid_f1': score}
        self.log_dict(logs, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

In [None]:
"""
Init models
"""
models_list = []
for fold_idx in CFG.needed_fold:
    ckpt_path = os.path.join(CFG.pretrained_dir,
                             f"fold{fold_idx}_logs/{CFG.which_to_load}.ckpt")
    
    model = MyNetwork.load_from_checkpoint(ckpt_path, cfg=CFG)
    model.cuda()
    model.eval()
    models_list.append(model)
    
    

In [None]:
threshold = np.array([0.4333, 0.4333, 0.4333, 0.4333, 0.4333])
submit_df = pd.read_csv(CFG.submit_csv_path)

def convert_num_to_str(pred: np.ndarray) -> str:
    """convert the numerical labels to string labels"""
    lb_str_list = []
    for lb_idx, bool_val in enumerate(pred):
        if bool_val:
            lb_str = CFG.label_num2str[lb_idx]
            lb_str_list.append(lb_str)
    if len(lb_str_list) == 0:
        final_label = 'healthy'
    else:
        final_label = ' '.join(lb_str_list)
    return final_label

with torch.no_grad():
    test_img_idx = 0
    for img_ts, lb_ts in test_loader:
        img_ts = img_ts.cuda()
        n_fold_pred_list = []
        for model in models_list:
            pred_ts = torch.sigmoid(model(img_ts)).detach().cpu()
            n_fold_pred_list.append(pred_ts)
        pred_np = torch.cat(n_fold_pred_list).mean(dim=0).numpy()
        pred = (pred_np > threshold).tolist()
        # convert numerical label into string
        final_label = convert_num_to_str(pred)
        img_name = test_img_names[test_img_idx]
        row_idx = submit_df[submit_df.image == img_name].index.tolist()[0]
        submit_df.iloc[row_idx, 1] = final_label
        test_img_idx += 1

In [None]:
# save prediction into csv file
submit_df.to_csv("./submission.csv", index=False)

In [None]:
submit_df