# Importing necessary libraries

In [None]:
!pip install efficientnet_pytorch #download pretrained effieceint model

In [None]:
import numpy as np
import pandas as pd #
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from skimage import io
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn  
import torch.optim as optim 
import torch.nn.functional as F 
import torchvision
import torchvision.transforms as transforms 
from tqdm import tqdm
from efficientnet_pytorch import EfficientNet
from torch.utils.data import (
    Dataset,
    DataLoader,
)
import plotly.express as px
import seaborn as sns
import time
import json
import os
import sys
import copy
import math
%matplotlib inline


In [None]:
#setting up device
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
base_dir = '../input/cassava-leaf-disease-classification'

# Exploring dataset

In [None]:
with open(os.path.join(base_dir,'label_num_to_disease_map.json')) as file:
    map_classes = json.loads(file.read())
    map_classes = {int(k): v for k, v in map_classes.items()}
    
print(json.dumps(map_classes, indent=4))

In [None]:
train_img_dir = os.path.join(base_dir, 'train_images' )
test_img_dir = os.path.join(base_dir, 'test_images' )
train_img_dir, test_img_dir

In [None]:
df_train = pd.read_csv(os.path.join(base_dir,'train.csv'))
print(df_train.head())
print(df_train.shape)

In [None]:
plt.figure(figsize=(8, 6))
sns.countplot(x="label", data=df_train, palette="Set3").set_title("Images distribution in Dataset")

In [None]:
pie_df = df_train['label'].value_counts().reset_index()
pie_df.columns = ['label', 'count']
fig = px.pie(pie_df, values = 'count', names = 'label', color_discrete_sequence = px.colors.qualitative.Pastel)
fig.show()


<p> Here, we have unbalanced training data. This is one of the biggest probelm that we face when apllying machine learning. Three main ways of solving this problem are: <p/>
<ul>
    <li>Under sampling</li>
    <li>Under sampling</li>
    <li>Synthetic sampling(SMOTE)</li>

</ul>
</p>

[more_details](https://towardsdatascience.com/deep-learning-unbalanced-training-data-solve-it-like-this-6c528e9efea6)

## Visualize images :

In [None]:
start_index = 25
end_index = 37
ncols = 4
nrows = math.ceil((end_index - start_index)/ncols)
fig = plt.gcf()
fig.set_size_inches(ncols*6, nrows*6)
sample_imgs = [os.path.join(train_img_dir, fname) for fname in os.listdir(train_img_dir)[start_index:end_index] ]

for i, img_path in enumerate(sample_imgs) :
    # Set up subplot; subplot indices start at 1
    sp = plt.subplot(nrows, ncols, i + 1)
    sp.axis('Off') # Don't show axes (or gridlines)

    img = mpimg.imread(img_path)
    plt.imshow(img)
    label = df_train['label'].iloc[start_index+i]
    plt.title(f"Class: {map_classes[label]}")

plt.show()

# Initialize hyperparameters

In [None]:
input_size = 300
num_classes = 5
learning_rate = 0.001
train_bs = 32
valid_bs = 32
num_epochs = 20
num_workers = 4

# Image Data Generator

In [None]:
class CassavaDataset(Dataset):
    def __init__(
        self, df, data_root, transforms=None, output_label=True
    ):
        
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.data_root = data_root
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.df.iloc[index]['label']
          
        path = "{}/{}".format(self.data_root, self.df.iloc[index]['image_id'])
        
        img  = io.imread(path)
        
        if self.transforms:
            img = self.transforms(img)
            
        # do label smoothing
        if self.output_label == True:
            return img, target
        else:
            return img

## Applying image data augumentation

In [None]:
train_transforms = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((input_size,input_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=2, fill=0),
    transforms.RandomRotation(degrees=45),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
test_transform =  transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((input_size,input_size)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomPerspective(distortion_scale=0.5, p=0.5, interpolation=2, fill=0),
    transforms.RandomRotation(degrees=45),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# Load Data
dataset = CassavaDataset(
    df = df_train,
    data_root = train_img_dir,
    transforms = train_transforms,
    output_label=True
)

## Splitting data into train set and validation set
<p> Here 20% of training images is reserved for validation.  </p>

In [None]:
train_set, valid_set = torch.utils.data.random_split(dataset, [17118, 4279])

In [None]:
train_loader = DataLoader(dataset=train_set, batch_size=train_bs, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(dataset=valid_set, batch_size=train_bs, shuffle=True, num_workers=num_workers)
dataloaders_dict = {'train': train_loader, 'val': valid_loader}

<h1 style="text-align:center;"> Visualizing augumented Images</h1>

## Image before augumentation

In [None]:
train_set2, valid_set = torch.utils.data.random_split(dataset, [1, 21396])
train_loader2 = DataLoader(dataset=train_set, batch_size=4, shuffle=True, num_workers=num_workers)
current_img = 0
for i in train_set2:
    current_img = i[1]
    img = mpimg.imread(os.path.join(train_img_dir, df_train['image_id'].iloc[current_img] ))
    plt.imshow(img)

## Images after applying augumentation

In [None]:
for i in range(8):
    fig = plt.gcf()
    fig.set_size_inches(ncols*5, nrows*6)
    for batch_idx, (inputs, labels) in enumerate(train_set2):
        inputs = inputs.permute(1, 2, 0).numpy()
        sp = plt.subplot(4, 4, batch_idx+ 1+i)

        plt.imshow(inputs)
        label = df_train['label'].iloc[start_index+i]

plt.show()

## Defining Model 

In [None]:
class CassavaClassifier(nn.Module):
    def __init__(self, classes_to_predict=5):
        super(CassavaClassifier, self).__init__()
        self.model = EfficientNet.from_pretrained('efficientnet-b3')
        self.classifier_layer = nn.Sequential(
            nn.Linear(1536 , 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512 , 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256 , classes_to_predict)
        )
        # forward function of Efficient-Net model 
    def forward(self, inputs):
        x = self.model.extract_features(inputs)
        x = self.model._avg_pooling(x)
        x = x.flatten(start_dim=1)
        x = self.model._dropout(x)
        x = self.classifier_layer(x)
        return x
    
model = CassavaClassifier().to(device)

# Defining loss function 

In [None]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25):
    since = time.time()
    history = {
        'train_acc':[],
        'train_loss':[],
        'val_acc':[],
        'val_loss': []
    }
    val_acc_history = []
    num_samples = 0
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            loop = tqdm(enumerate(dataloaders[phase]), total=len(dataloaders[phase]))
    
            for batch_idx, (inputs, labels) in loop:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    _, preds = torch.max(outputs, 1)
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                num_samples += preds.size(0)
                loop.set_description(f"Epoch [{epoch}/{num_epochs-1}]")
                loop.set_postfix({
                    "phase":phase,
                    "loss" :"{:.4f} ".format(loss.item())
                })

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            
            
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
                history['val_acc'].append(epoch_acc)
                history['val_loss'].append(epoch_loss)
                
            elif phase == 'train':
                history['train_acc'].append(epoch_acc)
                history['train_loss'].append(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, history

# Initializing loss function and optimizer 

In [None]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training the model 

In [None]:
# Train and evaluate
model_ft, history = train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

# Visualizing model performance

In [None]:
def performance_plot(hist, epochs=20):
    acc = hist['train_acc']
    val_acc = hist['val_acc']
    loss = hist['train_loss']
    val_loss = hist['val_loss']
    x_range = range(epochs)
    
    plt.figure(figsize=(8,6))
    plt.plot(x_range, acc, 'b-', label='Training accuracy')
    plt.plot(x_range, val_acc, 'r-', label='Validation accuracy')
    plt.title('Training and validation accuracy')
    plt.ylim(0, 1)
    plt.legend()

    plt.figure(figsize=(8,6))

    plt.plot(x_range, loss, 'b-', label='Training Loss')
    plt.plot(x_range, val_loss, 'r-', label='Validation Loss')
    plt.title('Training and validation loss')
    plt.ylim(0, 1)
    plt.legend()

    plt.show()

performance_plot(history, epochs=num_epochs)

Our model has lots of room for improvement. Things that you can do to improve the performance are:
<ul>
    <li>Hyperparameters tuining</li>
    <li>Cross-validation</li>
    <li>Try different Data augumentation technique</li>
    <li>Handel unbalanced training dataset</li>
    <li>Ensampling</li>
    <li>Try different architectures</li>
</ul>

**Work in Progress!**