In [None]:
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
from torch.optim.lr_scheduler import ReduceLROnPlateau
import copy
import timm
from torch.optim import Adam, SGD
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


## Python
import pandas as pd
import numpy as np
import os
import random
from sklearn.model_selection import KFold,StratifiedKFold
import seaborn as sns
from sklearn.metrics import accuracy_score
from matplotlib import pyplot as plt
import cv2
import time,datetime 
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
### albumentations
import albumentations as A
from albumentations.pytorch import ToTensorV2
from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )

In [None]:
class cfg:
    num_workers=4
    model_name = 'resnet50'
    size = 384
    split = 5
    num_epochs = 12
    lr=1e-4
    T_0=10 # CosineAnnealingWarmRestarts
    min_lr = 1e-6
    batch_size = 16
    smoothing=0.05
    target_size=5
    seed = 42
    weight_decay=1e-6
    patience = 3
    target_size=5
 

In [None]:
import logging
import random

def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')


def init_logger(log_file = 'F_384_resnet50_multihead_LB.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
seed_everything(cfg.seed)

In [None]:
LOGGER.info(f"{'batch_size',cfg.batch_size}")
LOGGER.info(f"{'model_name',cfg.model_name}")
LOGGER.info(f"{'size',cfg.size}")
LOGGER.info(f"{'num_epochs',cfg.num_epochs}")
LOGGER.info(f"{'lr',cfg.lr}")

In [None]:
train_img_path = '/kaggle/input/cassava-leaf-disease-classification/train_images'
train_file_path = '/kaggle/input/cassava-leaf-disease-classification/train.csv'

In [None]:
df_train = pd.read_csv(train_file_path)
print('length of dataset',len(df_train))

In [None]:
## Stratified KFold
skf  = StratifiedKFold(n_splits = cfg.split ,shuffle = True)
for n,(train_idx,val_idx) in enumerate(skf.split(df_train.image_id ,df_train.label)):
    df_train.loc[val_idx,'stf_Kfold'] = int(n)
print('Number of Unique folds in dataset',df_train['stf_Kfold'].unique())

In [None]:
## Train dataset
class Train_dataset(Dataset):
    def __init__(self,df ,transform = None ):
        
        self.df = df
        self.train_img_path = train_img_path
        self.transform =transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        lab = self.df.loc[index,'label']
        img = os.path.join(self.train_img_path,self.df.loc[index,'image_id'])
        img = cv2.imread(img)
        img =  cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        if self.transform:
            img = self.transform(image = img)['image']
        
        return img,lab

In [None]:

def get_train_transform():
    return A.Compose([
        A.RandomResizedCrop(cfg.size,cfg.size),        
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
        A.Transpose(p=0.5),
        A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    
    
        A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0, p=1.0
            ),
        A.CoarseDropout(p=0.5),
        #A.Cutout(p=0.5),

        ToTensorV2(p=1.0),
        
    ],p=1)

def get_valid_transform():
    return A.Compose([
        
        A.Resize(cfg.size,cfg.size ),
        A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            max_pixel_value=255.0, p=1.0
            ),
        ToTensorV2(p=1.0)
        
    ], p=1.)


In [None]:
class AdaptiveConcatPool2d(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.average_pool = torch.nn.AdaptiveAvgPool2d((1,1))
        self.max_pool = torch.nn.AdaptiveMaxPool2d(((1,1)))

    def forward(self, x):
        return torch.cat([self.max_pool(x), self.average_pool(x)], 1).squeeze(3).squeeze(2)


class cnn_resnet50(nn.Module):
    def __init__(self, model_name='resnet50', pretrained=False):
        super().__init__()
        
        self.num_class = 5
        
        
        self.model = timm.create_model(model_name, pretrained=pretrained)
        
        self.model.global_pool = nn.Sequential(AdaptiveConcatPool2d())
        
        self.model.fc = nn.Sequential(
            nn.BatchNorm1d(4096),
            nn.Dropout(.35),
            nn.Linear(4096,512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(.35),
            nn.Linear(512, self.num_class)
            )
        


    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
def run_train(model , loader , optim , loss_func):
    
    
    trn_epoch_loss = 0
    num_correct = 0
    model.train()
    for idx,(x,y) in enumerate(loader):
        x_train , y_train = x.to(device) , y.to(device)
        
        optim.zero_grad()
        z_train = model(x_train)    
        loss = loss_func(z_train,y_train)
        
        loss.backward()
        optim.step()
            
        pred_train = torch.argmax( z_train,1 )
        num_correct += (pred_train == y_train).sum().item()        
        trn_epoch_loss +=  loss.item()
        
    return trn_epoch_loss/len(loader),  num_correct/len(loader.dataset)


def run_valid( loader, model ):    
    
    model.eval()
    pred_val = 0
    num_correct = 0
   # accuracy = 0         
    lst_pred_val = list()
    with torch.no_grad():       
        for val_index,(x,y) in enumerate(loader):
            
            x_valid = x.to(device)
            y_valid = y.to(device)
            z_valid = model(x_valid)
            
            pred_val = torch.argmax( z_valid,1 )
            #_, top_class = z_valid.topk(1, dim=1)
            #equals = top_class == y_valid.view(*top_class.shape)
            #accuracy += torch.mean(equals.type(torch.FloatTensor))
            num_correct += (pred_val == y_valid).sum().item()  
            
            
            lst_pred_val.append(pred_val.detach().cpu().numpy())
        
        predictions = np.concatenate(lst_pred_val)
    
    return predictions , num_correct/len(loader.dataset)#,accuracy/len(loader)

In [None]:
class LabelSmoothingLoss(nn.Module): 
    def __init__(self, classes, smoothing=0.0, dim=-1): 
        super(LabelSmoothingLoss, self).__init__() 
        self.confidence = 1.0 - smoothing 
        self.smoothing = smoothing 
        self.cls = classes 
        self.dim = dim 
    def forward(self, pred, target): 
        pred = pred.log_softmax(dim=self.dim) 
        with torch.no_grad():
            true_dist = torch.zeros_like(pred) 
            true_dist.fill_(self.smoothing / (self.cls - 1)) 
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) 
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [None]:
LOGGER.info(f'  ****Train Augmentaion**** \n\n {get_train_transform()} \n\n')

oof_labels = np.zeros((len(df_train)))
for fold_num in range(cfg.split):
    
    LOGGER.info(f"==================== fold: {fold_num+1} training ====================")
    model_path = f"F_264_resnet50_multihead{fold_num+1}.pth"
    
    trn_idx = df_train[df_train['stf_Kfold'] != fold_num].index
    val_idx = df_train[df_train['stf_Kfold'] == fold_num].index
    
    df_trn = df_train.loc[trn_idx,['image_id','label']].reset_index(drop=True)
    df_val = df_train.loc[val_idx,['image_id','label']].reset_index(drop=True)
    
    ## train dataset

    trainset = Train_dataset(df_trn ,transform = get_train_transform() )
    train_loader = torch.utils.data.DataLoader( trainset ,shuffle=True , batch_size = cfg.batch_size )

    ## valid dataset

    validset = Train_dataset(df_val ,transform = get_valid_transform() )
    valid_loader = torch.utils.data.DataLoader( validset ,shuffle=False , batch_size = cfg.batch_size )
    
    model = cnn_resnet50(cfg.model_name, pretrained=True).to(device)
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print('Number of params used',pytorch_total_params)
    ## Defining optimizer and loss function

    optimizer = Adam(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay, amsgrad=False)
    #criterion = nn.CrossEntropyLoss()
    criterion = LabelSmoothingLoss(classes=cfg.target_size, smoothing=cfg.smoothing).to(device)
    scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=cfg.T_0, T_mult=1, eta_min=cfg.min_lr, last_epoch=-1)
    #scheduler = ReduceLROnPlateau(optimizer=optimizer, mode='max', patience = 1 ,verbose=True, factor=0.2)

    best_acc = 0
    counter = 0
    
    for epoch in range(cfg.num_epochs):
        
        start_time = time.time()
        
        trn_loss,trn_acc    =  run_train(model , train_loader , optimizer , criterion)
        predictions,val_acc = run_valid( valid_loader, model )
        
        train_time = str(datetime.timedelta(seconds=time.time() - start_time))[:7]           
            
        #scheduler.step(val_acc) --- Use for ReduceLROnPlateau 
        scheduler.step()
        LOGGER.info(f'Epoch {epoch+1} | Train Loss: {trn_loss:.4f} | Train acc: {trn_acc:.4f} | Val Acc: {val_acc:.4f} | best_acc: {best_acc:.4f} | time: {train_time}')
        if val_acc > best_acc:
            print('saving best validation acc')
            best_acc = val_acc
            oof_labels[val_idx] = predictions.reshape(-1,)
            torch.save(copy.deepcopy(model.state_dict()), model_path)
                
        else:
            print('patience starts .........')
            counter+=1
            if (counter > cfg.patience):
                LOGGER.info(f'Early stopping. Best correct accuracy: {best_acc:.4f}')
                break;
                
print('Training Completed')

In [None]:
oof = accuracy_score(df_train['label'].values, oof_labels)
LOGGER.info(f'\noof score : {oof:.4f}')

In [None]:
## Dataframe
df_oof = pd.DataFrame({'original' : df_train['label'].values,
              'pred' : oof_labels,})
df_oof.to_csv('oof_F_384_Resnet50_multihead.csv',index= False)