In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
from collections import OrderedDict
import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
# test = pd.DataFrame()
# test['image_id'] = list(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))
# print(test.iloc[0,0])

In [None]:
! pip install ../input/timm-package/timm-0.1.26-py3-none-any.whl

In [None]:
import os
import cv2
import copy
import time
import random
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
import torchvision
from torchvision import models
from torch.utils.data import DataLoader, Dataset
from torch.cuda import amp
#from apex import amp
#from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight

from collections import defaultdict
import albumentations as A

# from albumentations import (
#     HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
#     Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
#     IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
#     IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
#     ShiftScaleRotate, CenterCrop, Resize
# )
from albumentations.pytorch import ToTensorV2

import timm
#import pretrainedmodels

class CFG:
    model_name = 'tf_efficientnet_b4_ns'
    img_size = 512
    scheduler = 'CosineAnnealingWarmRestarts'
    T_max = 10
    T_0 = 10
    lr = 1e-4
    min_lr = 1e-6
    batch_size = 16
    weight_decay = 1e-6
    seed = 42
    num_classes = 5
    num_epochs = 10
    n_fold = 5
    NUM_FOLDS_TO_RUN = [2, ]
    smoothing = 0.2
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)

class CassavaLeafDataset(nn.Module):
    def __init__(self, root_dir, df, transforms=None):
        self.root_dir = root_dir
        self.df = df
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        img_path = os.path.join(self.root_dir, self.df.iloc[index, 0])
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        #label = self.df.iloc[index, 1]

        if self.transforms:
            img = self.transforms(image=img)["image"]

        return img#, label

data_transforms = {
    "train": A.Compose([
        A.RandomResizedCrop(CFG.img_size, CFG.img_size),
        A.Transpose(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        A.ShiftScaleRotate(p=0.5),
        A.HueSaturationValue(
            hue_shift_limit=0.2,
            sat_shift_limit=0.2,
            val_shift_limit=0.2,
            p=0.5
        ),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1),
            contrast_limit=(-0.1, 0.1),
            p=0.5
        ),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        A.CoarseDropout(p=0.5),
        A.Cutout(p=0.5),
        ToTensorV2()], p=1.),

    "valid": A.Compose([
        A.CenterCrop(CFG.img_size, CFG.img_size, p=1.),
        A.Resize(CFG.img_size, CFG.img_size),
        A.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0,
            p=1.0
        ),
        ToTensorV2()], p=1.),
    
    "test":A.Compose([
        RandomResizedCrop(CFG.img_size, CFG.img_size),
        Transpose(p=0.5),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0),
    ], p=1.)
    
}
def get_inference_transforms():
    return Compose([
        RandomResizedCrop(CFG.img_size, CFG.img_size),
        Transpose(p=0.5),
        HorizontalFlip(p=0.5),
        VerticalFlip(p=0.5),
        HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        RandomBrightnessContrast(brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5),
        Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
        ToTensorV2(p=1.0),
    ], p=1.)

class TaylorSoftmax(nn.Module):

    def __init__(self, dim=1, n=2):
        super(TaylorSoftmax, self).__init__()
        assert n % 2 == 0
        self.dim = dim
        self.n = n

    def forward(self, x):
        fn = torch.ones_like(x)
        denor = 1.
        for i in range(1, self.n + 1):
            denor *= i
            fn = fn + x.pow(i) / denor
        out = fn / fn.sum(dim=self.dim, keepdims=True)
        return out

class LabelSmoothingLoss(nn.Module):

    def __init__(self, classes, smoothing=0.0, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        """Taylor Softmax and log are already applied on the logits"""
        # pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

class TaylorCrossEntropyLoss(nn.Module):

    def __init__(self, n=2, ignore_index=-1, reduction='mean', smoothing=0.2):
        super(TaylorCrossEntropyLoss, self).__init__()
        assert n % 2 == 0
        self.taylor_softmax = TaylorSoftmax(dim=1, n=n)
        self.reduction = reduction
        self.ignore_index = ignore_index
        self.lab_smooth = LabelSmoothingLoss(CFG.num_classes, smoothing=smoothing)

    def forward(self, logits, labels):
        log_probs = self.taylor_softmax(logits).log()
        # loss = F.nll_loss(log_probs, labels, reduction=self.reduction,
        #        ignore_index=self.ignore_index)
        loss = self.lab_smooth(log_probs, labels)
        return loss

def train_model(model, criterion, optimizer, scheduler, num_epochs, dataloaders, dataset_sizes, fold):
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    history = defaultdict(list)
    scaler = amp.GradScaler()

    for epoch in range(1, num_epochs + 1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
            if (phase == 'train'):
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluation mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data
            for inputs, labels in tqdm(dataloaders[phase]):
                inputs = inputs.cuda()#to(CFG.device)
                labels = labels.cuda()#to(CFG.device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    with amp.autocast():
                        outputs = model(inputs)
                        _, preds = torch.max(outputs, 1)
                        loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        scaler.scale(loss).backward()
                        scaler.step(optimizer)
                        scaler.update()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data).double().item()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            history[phase + ' loss'].append(epoch_loss)
            history[phase + ' acc'].append(epoch_acc)

            if phase == 'train' and scheduler != None:
                scheduler.step()

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'valid' and epoch_acc >= best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                PATH = f"Fold{fold}_{best_acc}_epoch{epoch}.bin"
                torch.save(model.state_dict(), PATH)

        print()

    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Accuracy ", best_acc)

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, history, best_acc
#cbf device
def run_fold(model, criterion, optimizer, scheduler, fold, num_epochs=10):
    valid_df = df[df.kfold == fold]
    train_df = df[df.kfold != fold]

    train_data = CassavaLeafDataset(TRAIN_DIR, train_df, transforms=data_transforms["train"])
    valid_data = CassavaLeafDataset(TRAIN_DIR, valid_df, transforms=data_transforms["valid"])

    dataset_sizes = {
        'train': len(train_data),
        'valid': len(valid_data)
    }

    train_loader = DataLoader(dataset=train_data, batch_size=CFG.batch_size, num_workers=4, pin_memory=True,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data, batch_size=CFG.batch_size, num_workers=4, pin_memory=True,
                              shuffle=False)

    dataloaders = {
        'train': train_loader,
        'valid': valid_loader
    }

    model, history, best_acc = train_model(model, criterion, optimizer, scheduler, num_epochs, dataloaders,
                                           dataset_sizes, fold)

    return model, history, best_acc#

def fetch_scheduler(optimizer):
    if CFG.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr)
    elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr)
    elif CFG.scheduler == None:
        return None

    return scheduler

def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []

    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()

        image_preds = model(imgs)  # output = model(input)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]

    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

    
#     TEST_DIR = "../input/cassava-leaf-disease-classification/test_images"

#     set_seed(CFG.seed)
#     df = pd.read_csv('/home/cvpr/pycharmproject/leaf_disease_classification/cassava-leaf-disease-classification/csv/train_20_21397.csv')

#     skf = StratifiedKFold(n_splits=CFG.n_fold)
#     for fold, (_, val_) in enumerate(skf.split(X=df, y=df.label)):
#         df.loc[val_, "kfold"] = int(fold)

#     df['kfold'] = df['kfold'].astype(int)

#     model = timm.create_model(CFG.model_name, pretrained=True)
#     num_features = model.classifier.in_features
#     model.classifier = nn.Linear(num_features, CFG.num_classes)
#     model.cuda()#cbf to(CFG.device)
#     #cbf
#     if torch.cuda.device_count() > 1:
#        model=nn.DataParallel(model)

#     optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.weight_decay, amsgrad=False)
#     criterion = TaylorCrossEntropyLoss(n=2, smoothing=0.2)

#     scheduler = fetch_scheduler(optimizer)

#     accs = []
#     for fold in CFG.NUM_FOLDS_TO_RUN:
#         print(f"\n\nFOLD: {fold}\n\n")
#         model, history, ba = run_fold(model, criterion, optimizer, scheduler, fold=fold,
#                                       num_epochs=CFG.num_epochs) #cbf device=CFG.device
#         accs.append(ba)


#     print(f"MEAN_ACC - {sum(accs) / len(accs)}")
if __name__ == '__main__':
    # for training only, need nightly build pytorch

    set_seed(CFG.seed)

    print('Inference started')

    test = pd.DataFrame()
    test['image_id'] = list(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))
    #print(test.iloc[0,0])
    
    test_ds = CassavaLeafDataset('../input/cassava-leaf-disease-classification/test_images/',test,transforms=data_transforms['test'])#cbf, output_label=False)


    tst_loader = torch.utils.data.DataLoader(
            test_ds,
            batch_size=16,
            num_workers=4,
            shuffle=False,
            pin_memory=True,
        )

    device = torch.device(CFG.device)
    #model = CassvaImgClassifier(CFG['model_arch'], train.label.nunique()).to(device)
    model = timm.create_model(CFG.model_name, pretrained=False)
    num_features = model.classifier.in_features
    model.classifier = nn.Linear(num_features, CFG.num_classes)
    model.to(CFG.device)

#         val_preds = []
    tst_preds = []

        # for epoch in range(CFG['epochs']-3):
        #for i, epoch in enumerate(CFG['used_epochs']):
    for i in range(1):
            # model.load_state_dict(torch.load(
            #     '../input/pytorch-efficientnet-baseline-train-amp-aug/{}_fold_{}_{}'.format(CFG['model_arch'], fold,
            #                                                                                 epoch)))
            #model.load_state_dict(torch.load('../input/models-2-3/tf_efficientnet_b4_ns_fold_4_9.pth',map_location={'cuda:1':'cuda:0'}))
        ckpt=torch.load('../input/model-2-4-new/Fold2_0.8995092311287683_epoch7.bin',map_location={'cuda:1':'cuda:0'})
            #ckpt_model_dict = remove_prefix(ckpt, 'module.')
        #print(ckpt.keys())
        ckpt_model_dict = OrderedDict([(k[7:],v) if 'module.' in k else (k,v) for k, v in ckpt.items()])
        model.load_state_dict(ckpt_model_dict)


        with torch.no_grad():
                #for _ in range(CFG['tta']):
                #                     val_preds += [
#                         CFG['weights'][i] / sum(CFG['weights']) / CFG['tta'] * inference_one_epoch(model, val_loader,
#                                                                                                    device)]
                   # tst_preds += [
                    #    CFG['weights'][i] / sum(CFG['weights']) / CFG['tta'] * inference_one_epoch(model, tst_loader,
                                                                                                  # device)]
            tst_preds += [inference_one_epoch(model, tst_loader,device)]

#         val_preds = np.mean(val_preds, axis=0)
        tst_preds = np.mean(tst_preds, axis=0)

#         print('fold {} validation loss = {:.5f}'.format(fold, log_loss(valid_.label.values, val_preds)))
#         print('fold {} validation accuracy = {:.5f}'.format(fold, (
#                     valid_.label.values == np.argmax(val_preds, axis=1)).mean()))

        del model
        torch.cuda.empty_cache()
        test['label'] = np.argmax(tst_preds, axis=1)
        test.head()
        test.to_csv('submission.csv', index=False)
        print(test)