In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
r = plt.imread
def p(x):plt.imshow(x);plt.show()
    
import math
def subplotter(img_list,ncols=6,figsize=14,names=None):
    nrows= math.ceil(len(img_list)/ncols)
    
    plt.figure(figsize=(figsize,figsize))
    for i,img in enumerate(img_list):
        plt.subplot(nrows,ncols,i+1)
        plt.imshow(img)
        if names:plt.title(names[i])
    plt.show()
    
import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/cassava-leaf-disease-classification/train.csv')
df.head()

In [None]:
df['label'].value_counts()

In [None]:
df['label'].unique()

In [None]:
img_dir='/kaggle/input/cassava-leaf-disease-classification/train_images/'
num_imgs=42

for e in df['label'].unique():
    print(e)
    small_df=df[df.label==e].sample(frac=1)
    img_list=[];name_list=[]
    for i in range(num_imgs):
        img_name=small_df.iloc[i]['image_id']
        img_list.append(r(img_dir+img_name))
        name_list.append(img_name.split('.')[0])
    subplotter(img_list,names=name_list,figsize=28)
        
        

Images clicked in wide variety of lighting,angle,,number and position distribution. Differences betweeen classes is quite noticible

# Trainer

In [None]:
!pip install timm
!pip install pytorch-lightning

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import torch, timm
import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from pytorch_lightning.loggers import TensorBoardLogger


In [None]:
class PlantLoader(Dataset):
    def __init__(self, img_names,targets, transform=None):
        self.img_names=img_names
        self.targets=targets
        self.transform=transform
        self.img_dir='/kaggle/input/cassava-leaf-disease-classification/train_images/'
    def __len__(self):
        return len(self.img_names)
    def __getitem__(self, idx):
        
        img_name=self.img_names[idx]
        label=int(self.targets[idx])
        
        image=r(img_dir+img_name)
        
        if self.transform: image = self.transform(image)
            
        return image,label


In [None]:
np.shape(df[df.label==2].iloc[1:200] )

In [None]:
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
num_samples=2600 ; df_balanced=None

for e in df['label'].unique():
    df_class = df[df.label==e].iloc[:num_samples]
    
    df_upsampled = resample(df_class, 
                                 replace=True,     # sample with replacement
                                 n_samples=num_samples,    # to match majority class
                                 random_state=123) # reproducible results
 

    if df_balanced is None: df_balanced = df_upsampled
    else: df_balanced = pd.concat([df_balanced, df_upsampled])
 

print(df_balanced['label'].value_counts() )

X_train, X_test, y_train, y_test = train_test_split( df_balanced.image_id, df_balanced.label, test_size=0.2, random_state=42)

print(y_train.value_counts() )
print(y_test.value_counts() )


In [None]:
batch_size=32

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
train_augs=transforms.Compose([
            transforms.ToPILImage(),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize ])

val_augs=transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            normalize ])


dataset = PlantLoader(list(X_train),list(y_train),train_augs)
train_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)


dataset = PlantLoader(list(X_test),list(y_test),val_augs)
val_dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)


In [None]:
for i, (images, target) in enumerate(train_dataloader):
    print(torch.min(images),torch.max(images),target)
    break

In [None]:
class PlantModel(pl.LightningModule):

    def __init__(self):
        super(PlantModel, self).__init__()
        self.model = timm.create_model('resnest26d', pretrained=True)

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_nb):
        x, y = batch
        loss = F.cross_entropy(self(x), y)
        self.log('train_loss', loss, prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)

        # validation metrics
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)

In [None]:
# lr finder
plant_model = PlantModel()
trainer = pl.Trainer(gpus=1)

lr_finder = trainer.tuner.lr_find(plant_model,train_dataloader=train_dataloader)

fig = lr_finder.plot(suggest=True)
fig.show()


print(lr_finder.suggestion())



In [None]:
%load_ext tensorboard
%tensorboard --logdir logs

In [None]:
plant_model = PlantModel()
logger = TensorBoardLogger('tb_logs', name='my_model')


trainer = pl.Trainer(gpus=1, max_epochs=3, progress_bar_refresh_rate=20,logger=logger)

trainer.fit(plant_model, train_dataloader,val_dataloader)