In [None]:
import pytorch_lightning as pl
import pandas as pd
import cv2
import os 
from torch import nn
from torch.utils.data import Dataset ,DataLoader
import numpy as np
import torch
from sklearn.model_selection import train_test_split 

IMG_SIZE = 64
PATH = "../input/cassava-leaf-disease-classification/train_images/"
CLASSES = 5

CNN Model by using Lightning Module

In [None]:
class CassavaModel(pl.LightningModule):
    def __init__(self):
      #image_size = 64
      super().__init__()
      self.cnv = nn.Conv2d(3,128,5,4)
      self.rel = nn.ReLU()
      self.bn = nn.BatchNorm2d(128)
      self.mxpool = nn.MaxPool2d(4)
      self.flat = nn.Flatten()
      self.fc1 = nn.Linear(1152,64)
      self.fc2 = nn.Linear(64,64)
      self.fc3 = nn.Linear(64,CLASSES)
      self.softmax = nn.Softmax()
      self.accuracy = pl.metrics.Accuracy()

    def forward(self,x):
      out = self.bn(self.rel(self.cnv(x)))
      out = self.flat(self.mxpool(out))
      out = self.rel(self.fc1(out))
      out = self.rel(self.fc2(out))
      out = self.fc3(out)
      return out

    def loss_fn(self,out,target):
      return nn.CrossEntropyLoss()(out.view(-1,CLASSES),target)
    
    def configure_optimizers(self):
      LR = 1e-3
      optimizer = torch.optim.AdamW(self.parameters(),lr=LR)
      return optimizer

    def training_step(self,batch,batch_idx):
      x,y = batch["x"],batch["y"]
      img = x.view(-1,3,IMG_SIZE,IMG_SIZE)
      label = y.view(-1)
      out = self(img)
      loss = self.loss_fn(out,label)
      self.log('train_loss', loss)
      return loss       

    def validation_step(self,batch,batch_idx):
      x,y = batch["x"],batch["y"]
      img = x.view(-1,3,IMG_SIZE,IMG_SIZE)
      label = y.view(-1)
      out = self(img)
      loss = self.loss_fn(out,label)
      out = nn.Softmax(-1)(out) 
      logits = torch.argmax(out,dim=1)
      accu = self.accuracy(logits, label)        
      self.log('valid_loss', loss)
      self.log('train_acc_step', accu)
      return loss, accu


Getting The Dataset as image

In [None]:
class CassavaDataset(Dataset):
    def __init__(self,path,image_ids,labels,image_size):
        self.image_ids = image_ids
        self.labels = labels
        self.path = path
        self.image_size = image_size

    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self,item):
      image_ids = str(self.image_ids[item])
      labels = self.labels[item]
      img_file = cv2.imread(self.path+image_ids)
      img = cv2.resize(img_file,(self.image_size,self.image_size))
      img = img.astype(np.float64)

      return {
            "x":torch.tensor(img,dtype=torch.float),
            "y":torch.tensor(labels,dtype=torch.long),
        } 

Data Module - loading the data and batches

In [None]:

class CassavaLightDataset(pl.LightningDataModule):
    def __init__(self,batch_size=64):
      super().__init__()
      self.batch_size = batch_size
    
    def setup(self,stage=None):
      dfx = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
      xtrain, xval, ytrain, yval = train_test_split(dfx["image_id"].values,
                                                      dfx.label.values,
                                                      test_size = 0.1)
      self.train_dataset = CassavaDataset(PATH,xtrain,ytrain,IMG_SIZE)
      self.validation_dataset = CassavaDataset(PATH,xval,yval,IMG_SIZE)

    def train_dataloader(self):
      train_loader = DataLoader(self.train_dataset,
                            batch_size=self.batch_size,
                            shuffle=True)
      return train_loader
    def val_dataloader(self):
      valid_loader = DataLoader(self.validation_dataset,
                            batch_size=self.batch_size,
                            shuffle=False)       
      return valid_loader


Training the model

In [None]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    monitor='valid_loss',
    dirpath='./',
    filename='models-{epoch:02d}-{valid_loss:.2f}',
    save_top_k=3,
    mode='min') 

mod = CassavaModel()
dx = CassavaLightDataset()
trainer = pl.Trainer(gpus=-1,max_epochs=6,callbacks=[checkpoint_callback])
trainer.fit(model=mod,datamodule=dx) 

Reloading the models weights for Evaluation

In [None]:
BEST_MODEL_PATH = "../input/cassava-leaf-disease-clf-model/models-epoch03-valid_loss1.05.ckpt" #checkpoint_callback.best_model_path
pretrained_model = CassavaModel().load_from_checkpoint(BEST_MODEL_PATH)
pretrained_model.eval()
pretrained_model.freeze()

Prediction On Testing Data

In [None]:
TEST_FILE_PATH = "../input/cassava-leaf-disease-classification/test_images/"
class CassavaTestDataset(Dataset):
    def __init__(self,path,image_ids,image_size):
        self.image_ids = image_ids
        self.path = path
        self.image_size = image_size

    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self,item):
      image_ids = str(self.image_ids[item])
      img_file = cv2.imread(self.path+image_ids)
      img = cv2.resize(img_file,(self.image_size,self.image_size))
      img = img.astype(np.float64)

      return {
            "x":torch.tensor(img,dtype=torch.float),
        } 
sample = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
test_dataset = CassavaTestDataset(TEST_FILE_PATH,sample.image_id,IMG_SIZE)
test_loader = DataLoader(test_dataset,
                      batch_size=1,
                      shuffle=False)
fin_y = []
for data in test_loader:
  y_hat = pretrained_model(data["x"].view(-1,3,IMG_SIZE,IMG_SIZE))
  y_hat = nn.Softmax(dim=-1)(y_hat)
  y_hat = torch.argmax(y_hat,dim=1)
  fin_y.append(y_hat.cpu().detach().numpy())
sample["label"] = np.array(fin_y).reshape(-1)
sample[["image_id","label"]].to_csv("submission.csv",index=False)
sample.head()