In [None]:
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
import albumentations as A
import pytorch_lightning as pl
import matplotlib.pyplot as plt



from torch.utils.data import Dataset,DataLoader
import torchvision
from torchvision import models,datasets,transforms
import torch.optim as optim
from torch.optim import lr_scheduler

from albumentations.core.composition import Compose,OneOf
from albumentations.augmentations.transforms import CLAHE , GaussNoise ,ISONoise

from albumentations.pytorch import ToTensorV2

from pytorch_lightning import Trainer,seed_everything
from pytorch_lightning import callbacks
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import model_checkpoint, EarlyStopping


from sklearn.model_selection import StratifiedKFold


import time
import os
import copy

In [None]:
!pip install timm

In [None]:
class CFG:
    
    seed = 42
    model_name = 'resnet50'
    pretrained = False
    img_size = 640
    num_classes = 12
    batch_size = 32
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
PATH = "../input/plant-pathology-2021-fgvc8/"

train_path =  PATH+'train_images/'
test_path = PATH+'test_images/'

In [None]:
df_train = pd.read_csv(PATH+'train.csv')

In [None]:
df_train

In [None]:
df_train['labels'].value_counts()

In [None]:
list(df_train['labels'].value_counts().keys())

In [None]:
lbl_dict = dict(zip(list(df_train['labels'].value_counts().keys()),range(12)))

In [None]:
lbl_dict

In [None]:
df_train1= df_train.copy()

In [None]:
df_train1['labels'] = df_train['labels'].map(lbl_dict)

## Data Class

In [None]:
def get_transform(phase: str):
    if phase == 'train':
        return Compose([
            A.RandomResizedCrop(height=CFG.img_size, width=CFG.img_size),
            A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(),
            ToTensorV2(),
        ])
    else:
        return Compose([
            A.Resize(height=CFG.img_size, width=CFG.img_size),
            A.Normalize(),
            ToTensorV2(),
        ])

In [None]:
class PlantPatho(Dataset):
    
    def __init__(self,df,transform=None):
        
        self.image_id = df['image'].values
        self.labels = df['labels'].values
        
        self.transform = transform
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self,idx):
        
        image_id = self.image_id[idx]
        label = self.labels[idx]
        
        image_path = train_path + image_id 
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        
        augmented = self.transform(image=image)
        
        image = augmented['image']
        
        #image = np.transpose(image,(2,0,1)).astype(np.float32)
        
        return {'image':image ,'label':label}

In [None]:
train_dataset = PlantPatho(df_train1,get_transform('train'))

In [None]:
?torch.utils.data.DataLoader

In [None]:
train_loader = DataLoader(train_dataset,batch_size=CFG.batch_size,shuffle=False,num_workers=4)

### Let see how the image tensor looks like 

In [None]:
for x in train_dataset:
    print(x['image'][0].shape)
    break
    

In [None]:
for data in train_loader:
    print(data)
    break

## Finetuning and Convert the Pretrained model - RESNET18

In [None]:
model_ft = models.resnet18(pretrained=True)

In [None]:
num_ftrs =  model_ft.fc.in_features

In [None]:
model_ft.fc = nn.Linear(num_ftrs,12)

In [None]:
model_ft.fc 

In [None]:
model_ft = model_ft.to(CFG.device)

In [None]:
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized

In [None]:
optimizer = optim.SGD(model_ft.parameters(),lr=0.001,momentum=0.9)

In [None]:
optimizer

### Decay LR by a factor of 0.1 every 7 epochs

In [None]:
exp_lr_scheduler = lr_scheduler.StepLR(optimizer,step_size = 7,gamma=0.1)

### Referance -- Plant Pathology 2020 - Pytorch

https://www.kaggle.com/pestipeti/plant-pathology-2020-pytorch
https://www.kaggle.com/akasharidas/plant-pathology-2020-in-pytorch


In [None]:
def train_model(data_loader,model,criterion,optimizer,sheduler,device,num_epochs=25):
    
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    
    for epoch in range(num_epochs):
        
        print('Epoch {}/{}'.format(epoch,num_epochs-1))
        print('='*15)
        
        ## Each Epoch have training and Validation Phase
        
        for phase in ['train']:
            
            if phase=='train':
                
                model.train()
                
            else:
                
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
            
            ## Iterate over the data
            i=0 
            
            for data in data_loader:
                
                
                image = data['image'][i].to(device)
                labels =data['label'][i].to(device)
                
                i=i+1
                
                # Zero the optimizer gradients 
                
                optimizer.zero_grad()
                
                
                ## Forword Pass
                
                with torch.set_grad_enabled(phase=='train'):
                    
                    output = model(image)
                    
                    _,preds = torch.max(output,1)
                    
                    loss = criterion(output,labels)
                    
                    
                    if phase=='train':
                        loss.backward()
                        optimizer.step()
                        
                # statistics
                running_loss += loss.item() * image.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
                
            if phase=='train':
                
                sheduler.step()
                
            epoch_loss = running_loss / ((len(train_dataset)/CFG.batch_size))
            epoch_acc = running_corrects.double()/ ((len(train_dataset)/CFG.batch_size))
                                                    
                                                    
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
                                         
    time_elapsed = time.time() - since
        
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
                                         
    return model
    

In [None]:
model_ft = train_model(train_loader,model_ft, criterion, optimizer, exp_lr_scheduler,CFG.device,num_epochs=25)

# Model Training

In [None]:
i=0
for data in train_loader:
    
    print(data['image'][i])
    i=i+1
    
    break 