# About the Notebook

- This is my baseline pytorch implementation for the competition
- For now it's only single fold with smaller images/lighter model training for timing purposes.


If you find this notebook, please don't forget to upvote :)

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
    print('and then re-execute this cell.')
else:
    print(gpu_info)

# Loading Libraries

In [None]:
import sys
sys.path.append('../input/nfnets/pytorch-image-models-master')
import timm

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


from sklearn.model_selection import StratifiedKFold

import albumentations as A
from albumentations.pytorch import ToTensorV2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
import math

import os
import time
import cv2
import PIL.Image
import random
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import CosineAnnealingLR 
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import gc

from sklearn.metrics import f1_score
from sklearn.preprocessing import LabelEncoder

from warnings import filterwarnings
filterwarnings("ignore")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
plt.style.use('ggplot')


# Configuration

In [None]:
def seed_everything(seed):
    
    """Seeding everything for consistent experiments..."""
    
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
class CFG:
    
    n_splits = 5 
    
    fold_id = 0 # Fold to train

    image_size = 224 
    seed = 42
    init_lr = 1e-4
    batch_size = 64
    valid_batch_size = 64
    n_epochs = 15
    num_workers = 8

    use_amp = True  
    early_stop = 5

    model_name = 'vit_base_patch16_224'
    train_dir = '../input/plant-pathology-2021-fgvc8/train.csv'
    data_dir = '../input/plant-path-2021-256'
    
    target_size=12
    
    
model_dir = f'weights/'
! mkdir $model_dir

# Basic EDA

In [None]:
train = pd.read_csv(CFG.train_dir)

In [None]:
# Counting target values.

targ_cts=train.labels.value_counts()
fig = plt.figure(figsize=(12,6))
sns.barplot(y=targ_cts.sort_values(ascending=False).index, x=targ_cts.sort_values(ascending=False).values, palette='summer')
plt.title('Target Distribution')
plt.show()

In [None]:
le = LabelEncoder()

le.fit(train.labels)
train['labels'] = le.transform(train.labels)

In [None]:
le_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(le_mapping)

# Setting Folds

In [None]:
folds = train.copy()
Fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for n, (train_index, val_index) in enumerate(Fold.split(folds, folds['labels'])):
    folds.loc[val_index, 'fold'] = int(n)
train['fold'] = folds['fold'].astype(int)

# Augmentations

In [None]:
# applying some augmentations for regularizing effect

transforms_train = A.Compose([
   A.RandomResizedCrop(CFG.image_size, CFG.image_size, scale=(0.85, 1), p=1), 
   A.HorizontalFlip(p=0.5),   
   A.Transpose(p=0.5),            
   A.VerticalFlip(p=0.5),
   A.ShiftScaleRotate(p=0.5),
  A.Normalize(
         mean=[0.5, 0.5, 0.5],
         std=[0.5, 0.5, 0.5], max_pixel_value=255.0, p=1.0),
         ToTensorV2(p=1.0)
])

transforms_valid = A.Compose([
    A.Resize(CFG.image_size, CFG.image_size),
    A.Normalize(
         mean=[0.5, 0.5, 0.5],
         std=[0.5, 0.5, 0.5], max_pixel_value=255.0, p=1.0),
    ToTensorV2(p=1.0)
])

# Augmentations

In [None]:
# applying some augmentations for regularizing effect

transforms_train = A.Compose([
   A.RandomResizedCrop(CFG.image_size, CFG.image_size, scale=(0.85, 1), p=1), 
   A.HorizontalFlip(p=0.5),   
   A.Transpose(p=0.5),            
   A.VerticalFlip(p=0.5),
   A.ShiftScaleRotate(p=0.5),
  A.Normalize(
         mean=[0.5, 0.5, 0.5],
         std=[0.5, 0.5, 0.5], max_pixel_value=255.0, p=1.0),
         ToTensorV2(p=1.0)
])

transforms_valid = A.Compose([
    A.Resize(CFG.image_size, CFG.image_size),
    A.Normalize(
         mean=[0.5, 0.5, 0.5],
         std=[0.5, 0.5, 0.5], max_pixel_value=255.0, p=1.0),
    ToTensorV2(p=1.0)
])

# Data Loader

In [None]:
class TrainDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.file_names = df['image'].values
        self.labels = df['labels'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{CFG.data_dir}/{file_name}'
        image = cv2.imread(file_path)        
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return image, label

In [None]:
# loading the images with augmentations

train_dataset = TrainDataset(train, transform=transforms_train)

fig, axs = plt.subplots(1, 5, figsize=(40,12))

for i in range(5):
    image, label = train_dataset[i]
    axs[i].imshow(image.T)
    axs[i].title.set_text(f'Target Labels: {label}')

plt.show() 

# Train/Valid Function

In [None]:
def train_func(train_loader):
    
    """ Main training function: Takes loaded images to predict labels, computes losses between predicted and training labels, clip gradients, return updated losses. """
    
    model.train()
    bar = tqdm(train_loader)
    if CFG.use_amp:
        scaler = torch.cuda.amp.GradScaler()
    losses = []
    scores = []
    for batch_idx, (images, targets) in enumerate(bar):

        images, targets = images.to(device), targets.to(device)
        
        if CFG.use_amp:           
            with torch.cuda.amp.autocast():
                preds = model(images)
                loss = trn_criterion(preds, targets)
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
                
        else:
            output = model(images)
            loss = trn_criterion(output, targets)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        losses.append(loss.item())
        bar.set_description(f'Mean Loss: {np.mean(losses):.5f}')

    loss_train = np.mean(losses)
    
    return loss_train


def valid_func(valid_loader):
    
    """ Main validation function: Takes loaded images to predict labels, computes losses between predicted and valid labels, clip gradients, return updated losses. """
    
    
    model.eval()
    bar = tqdm(valid_loader)

    PROB = []
    TARGETS = []
    losses = []
    PREDS = []
    
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(bar):

            images, targets = images.to(device), targets.to(device)
            output = model(images)
            PREDS += [output.softmax(1).to('cpu').numpy().argmax(1)]
            TARGETS += [targets.detach().to('cpu').numpy()]
            loss = val_criterion(output, targets)
            losses.append(loss.item())
            bar.set_description(f'Loss: {loss.item():.5f}')   
    TARGETS=np.concatenate(TARGETS)
    PREDS=np.concatenate(PREDS)
    f1_val = f1_score(TARGETS, PREDS, average='macro')
    loss_valid = np.mean(losses)
    return loss_valid, f1_val

# Custom Model Class

In [None]:
class ViTModel(nn.Module):    
   

    def __init__(self, model_name=CFG.model_name, pretrained=False, target_size=CFG.target_size):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)            
        #self.model.head = nn.Linear(self.model.head.in_features, target_size)
        
        self.custom_layers = nn.Sequential(nn.Linear(1000, 1000),
                                           nn.ReLU(),
                                           nn.BatchNorm1d(1000),
                                           nn.Dropout(0.5),
                                           nn.Linear(1000, 500),
                                           nn.ReLU(),   
                                           nn.BatchNorm1d(500),
                                           nn.Dropout(0.5),
                                           nn.Linear(500, CFG.target_size))
        
    def forward(self, x):
        x = self.model(x)
        x = self.custom_layers(x)
        return x

In [None]:
model = ViTModel(pretrained=True)
model = model.to(device)

# Training Settings

In [None]:
# setting criterions, optimizers, folds to train etc.

val_criterion = nn.CrossEntropyLoss()
trn_criterion = nn.CrossEntropyLoss()

# for sam optimizer you can change the base optimizer to get better results


optimizer = torch.optim.Adam(model.parameters(),lr=CFG.init_lr)
    

    
# here you can experiment with other schedulers too, they have decent impact on this competition

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, CFG.n_epochs, eta_min=1e-7)


train_df_this = train[train['fold'] != CFG.fold_id]
df_valid_this = train[train['fold'] == CFG.fold_id]

dataset_train = TrainDataset(train_df_this, transform=transforms_train)
dataset_valid = TrainDataset(df_valid_this, transform=transforms_valid)

train_loader = torch.utils.data.DataLoader(dataset_train, batch_size=CFG.batch_size, shuffle=True,  num_workers=CFG.num_workers, drop_last=True, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(dataset_valid, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers, pin_memory=True)

# Training

In [None]:
# single fold training

log = {}
f1_max = 0.
loss_min = 99999
not_improving = 0


for epoch in range(1, CFG.n_epochs+1):
    
    
    loss_train = train_func(train_loader)
    loss_valid, f1 = valid_func(valid_loader)

    log['loss_train'] = log.get('loss_train', []) + [loss_train]
    log['loss_valid'] = log.get('loss_valid', []) + [loss_valid]
    log['lr'] = log.get('lr', []) + [optimizer.param_groups[0]["lr"]]
    log['f1'] = log.get('f1', []) + [f1]

    content = time.ctime() + ' ' + f'Fold: 0, Epoch: {epoch}/{CFG.n_epochs}, lr: {optimizer.param_groups[0]["lr"]:.7f}, loss_train: {loss_train:.5f}, loss_valid: {loss_valid:.5f}, f1: {f1:.6f}.'
    print(content)
    not_improving += 1
    
    scheduler.step()
    
    if f1 > f1_max:
        print(f'f1_max ({f1_max:.6f} --> {f1:.6f}). Saving model ...')
        torch.save(model.state_dict(), f'{model_dir}{CFG.model_name}_fold{CFG.fold_id}_best_f1.pth')
        f1_max = f1
        not_improving = 0

    if loss_valid < loss_min:
        loss_min = loss_valid
        torch.save(model.state_dict(), f'{model_dir}{CFG.model_name}_fold{CFG.fold_id}_best_loss.pth')
        
    if not_improving == CFG.early_stop:
        print('Early Stopping...')
        break
        


torch.save(model.state_dict(), f'{model_dir}{CFG.model_name}_fold{CFG.fold_id}_final.pth')

# Final Notes

### I created this notebook for baseline purposes, you can easily modify, improve this code to get better results. I might update some parts of the code when I have more GPU time available on kaggle, happy coding :)

