In [None]:
!pip install neptune-client

import os
import json

import cv2
import numpy as np
import neptune.new as neptune
import pandas as pd
import seaborn as sns
from pathlib import Path
import matplotlib.pyplot as plt


In [None]:
run = neptune.init(
    project="petr.square/Cassava-Leaf-Disease-Classification",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIwMjlhMTk5NS04ODcwLTQ3MzgtYmVlYy0xZWNkM2NkZGQwMmYifQ==",
)

parameters = {
    'criterion': 'CrossEntropyLoss',
    'optimizer': 'Adam',
    'learning_rate': 0.0012,
    'batch_size': 64,
    'n_epochs': 20,
    'auto_break_n': 3,
    'seed': 11
    'image_size': (256, 256)
    'path_save_model': 'mobnet.torch'
}
run['model/parameters'] = parameters

In [None]:
def seed_everything(seed: int) -> None:
    """Seed all
    Parameters
    ----------
    seed : int
        The seed value
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    if torch.cuda.is_available():
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [None]:
seed_everything(parameters['seed'])

# Data Load

In [None]:
base_path = Path('../input/cassava-leaf-disease-classification')
train_img_dir = base_path /'train_images'
test_img_dir = base_path /'test_images'

In [None]:
train_img_dir

In [None]:
train = pd.read_csv(base_path/'train.csv')
print(f"train.shape = {train.shape}")
train.sample(2)

# Data distributions

In [None]:
train.label.nunique()

In [None]:
distribution_info = train.groupby('label', as_index=False).agg('count')
distribution_info

In [None]:
distribution_info['Percent'] = distribution_info.image_id / train.shape[0] * 100
distribution_info.rename({'image_id': 'Count'}, axis=1, inplace=True)
distribution_info

In [None]:
plt.title('Countplot')
sns.countplot(train.label);

In [None]:
path_labels_info = '../input/cassava-leaf-disease-classification/label_num_to_disease_map.json'

# Load labels info

In [None]:
with open(path_labels_info) as jsonFile:
    labels_info = json.load(jsonFile)
    jsonFile.close()
labels_info

In [None]:
training_files_name = os.listdir(train_img_dir) 
assert len(training_files_name) == train.shape[0]
len(training_files_name)

# Show image samples

In [None]:
columns = 4
list_images_path = []
list_labels = []
for el in train.label.unique():
    for i in range(columns):
        random_image_path = np.random.choice(train.query(f'label == {el}')['image_id'].values)
        list_images_path.append(random_image_path)
        list_labels.append(str(el))
list_images_path

In [None]:
fig = plt.figure(figsize=(16, 16))
rows = 5
for i in range(1, columns * rows + 1):
    image = cv2.imread(str(train_img_dir / list_images_path[i - 1]))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    fig.add_subplot(rows, columns, i)
    plt.title(labels_info.get(list_labels[i - 1]))
    plt.imshow(image)
plt.show()

In [None]:
def plot_random_image(class_label_num, df,labels_dict, img_dir, random_seed=None,  n_rows=3, n_cols=3):
    class_label = labels_dict[class_label_num]
    
    label_df = df.query(f"label == {class_label_num}")
    
    if random_seed:
        np.random.seed(random_seed)
    rand_idx = np.random.randint(0, len(label_df), n_rows*n_cols)
    
    fig, axs = plt.subplots(n_rows, n_cols, figsize=(10,10))
    
    for row in range(n_rows):
        for col in range(n_cols):
            idx = rand_idx[row*n_cols + col]
            img_path = os.path.join(img_dir, label_df.image_id.values[idx])
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axs[row, col].imshow(img)
            axs[row, col].axis('off')
            axs[row, col].set_title(label_df.image_id.values[idx])
            
    plt.suptitle(class_label)
    plt.show()

In [None]:
for el in train.label.unique():
    plot_random_image(class_label_num=str(el), df=train, labels_dict=labels_info, img_dir=train_img_dir)

Create the notebook with the first model for the Cassava Leaf Disease Classification challenge:
- You can use pre-trained models such as MobileNetV2 or EfficientNetB0. You can add functionality to try different models.
- Try to make your own pipeline

- Try to make it clean (Try using more OOP)

# Split data

In [None]:
# train = train.head(200)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(train.image_id.values, #.apply(lambda x: f"{str(train_img_dir)}/{x}").values,
                                                    train.label.values,
                                                    stratify=train.label.values,
                                                    test_size=0.2, random_state=1)

# X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1) # 0.25 x 0.8 = 0.2

In [None]:
# assert (X_train.shape[0] + X_val.shape[0] + X_test.shape[0]) == train.shape[0]
assert (X_train.shape[0] + X_test.shape[0]) == train.shape[0]

# Dataloader

In [None]:
import torch
import torch.nn as nn
from tqdm import tqdm
import albumentations as A
import torchvision.transforms as tfs
from torch.utils.data import Dataset, DataLoader

In [None]:
class Data_gen(Dataset):
    def __init__(self, path:str, image_names:str, target, transforms):
        self.path = path
        self.image_names = image_names
        self.target = target
        self.transforms = transforms
    
        
    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, index):
        x = cv2.imread(os.path.join(self.path, self.image_names[index]))
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)

        if self.transforms:
            x = self.transforms(image=x)['image']
        
        y = torch.tensor(self.target[index], dtype=torch.long)
            
        return {'X': x, 'y': y}

In [None]:
from albumentations.pytorch import ToTensorV2

# Transform

In [None]:
transforms_train = A.Compose(
        [
            A.RandomResizedCrop(*parameters['image_size']),
            A.Transpose(p=0.5),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2()
        ],
        p=1.0
    )


transforms_valid =  A.Compose(
    [

    A.Resize(*image_size),
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
    ],
    p=1
)

# DataLoader

In [None]:
train_set = Data_gen(str(train_img_dir), X_train, y_train, transforms_train)
valid_set = Data_gen(str(train_img_dir), X_test, y_test, transforms_valid)
# test_set = Data_gen(str(train_img_dir), X_test, y_test, transforms, preprocess, image_size)

In [None]:
train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=4)
valid_loader = DataLoader(valid_set, batch_size=64, num_workers=4)

In [None]:
# train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=4)
# valid_loader = DataLoader(valid_set, batch_size=64, num_workers=4)
# test_loader = DataLoader(test_set, batch_size=64, num_workers=4)

# Model

In [None]:
mobilenet = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', pretrained=True)
mobilenet.fc = nn.Linear(1280, 5, bias=True)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mobilenet.parameters(), lr=parameters['learning_rate'])

In [None]:
# efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
# efficientnet.fc = nn.Linear(1280, 5, bias=True)
# criterion = nn.CrossEntropyLoss()
# optimizer_efficientnet = torch.optim.Adam(efficientnet.parameters(), lr=parameters['learning_rate'])

# Change device

In [None]:
torch.cuda.is_available()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

# Class Trainer

In [None]:
class Trainer:
    
    def __init__(self, model, criterion, optimizer, epochs, device, path_save_model='model.torch', auto_break_n=1000, run=None):
        self.model = model
        self.criterion = criterion
        self.optimizer = optimizer
        self.epochs = epochs
        self.history_loss = []
        self.history_acc = []
        self.history_loss_val = []
        self.history_acc_val = []
        self.best_valid_score = 0
        self.device = device
        self.path_save_model = path_save_model
        self.auto_break_n = auto_break_n
        self.run = run
        
        self.no_improvement_epoch = 0

        self.model.to(self.device)

    

    def fit(self, train_loader, valid_loader):
        
        for epoch in range(self.epochs):
            self.model.train()
            loss_score, correct = 0, 0
            total = 0
            for batch in tqdm(train_loader):

                x_batch, y_batch = batch["X"].to(self.device), batch["y"].to(self.device)
                
                # .forward()
                logits = self.model(x_batch)

                loss = self.criterion(logits, y_batch)
                                

                loss_score += loss.item()
                _, predicted = logits.max(1)
                total += y_batch.shape[0] 
                correct += predicted.eq(y_batch).sum().item()


                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            
            loss_score = loss_score / len(train_loader)
            acc_score = correct/total
            self.history_loss.append(loss_score)
            self.history_acc.append(acc_score)
            print('=' * 100)
            print(f"\033[1;34m=> {epoch + 1} <= epoch")
            print(f'\033[1;31m Train Loss: {loss_score} | Accuracy: {acc_score}%')
            print('- ' * 50)

            if self.run:
                self.run['train/epoch/loss'].log(loss_score)
                self.run['train/epoch/accuracy'].log(acc_score)

            if valid_loader:
                loss_score_val, acc_score_val = self.test_score(valid_loader, "\033[1;36m Validation")
                
                self.history_loss_val.append(loss_score_val)
                self.history_acc_val.append(acc_score_val)

                if self.run:
                    self.run['valid/epoch/loss'].log(loss_score_val)
                    self.run['valid/epoch/accuracy'].log(acc_score_val)
                
                if acc_score_val > self.best_valid_score:
                    self.no_improvement_epoch = 0
                    self.best_valid_score = acc_score_val
                    self.save_model(epoch + 1, self.path_save_model)
                else:  
                    self.no_improvement_epoch += 1
                
                    
                print(f"no improvement_iter = {self.no_improvement_epoch}")
                if self.no_improvement_epoch == self.auto_break_n:
                    print('Auto_break !!!')
                    self.run.stop()
                    break
                

        
            
    def test_score(self, valid_loader, info=""):
        self.model.eval() # switch for some specific layers/parts
        loss_score, correct = 0, 0
        total = 0

        for batch in tqdm(valid_loader):
            
            x_batch, y_batch = batch["X"].to(self.device), batch["y"].to(self.device)
            
            with torch.no_grad():
                
                logits = self.model(x_batch)
                loss = criterion(logits, y_batch)

                loss_score += loss.item()
                _, predicted = logits.max(1)
                total += y_batch.shape[0] 
                correct += predicted.eq(y_batch).sum().item()
                
        loss_score = loss_score / len(valid_loader)
        acc_score = correct/total
        print(f'{info} Loss : {loss_score} | Accuracy: {acc_score}%')
        
        return loss_score, acc_score
    
    def save_model(self, n_epoch, save_path):
        torch.save(
            {
                "model_state_dict": self.model.state_dict(),
                "optimizer_state_dict": self.optimizer.state_dict(),
                "best_valid_score": self.best_valid_score,
                "n_epoch": n_epoch,
            },
            save_path,
        )
        if run:
            run[f'model/saved_model'].upload(save_path)

In [None]:
parameters.keys()

# Fit

In [None]:
mod = Trainer(mobilenet, criterion, optimizer, parameters['n_epochs'], device, parameters['path_save_model'], parameters['auto_break_n'], run)

In [None]:
mod.fit(train_loader, valid_loader)

In [None]:
# mod.test_score(test_loader, "\033[0;1m Test")

In [None]:
np.array(mod.history_acc_val)

In [None]:
#plot accuracy

plt.plot(np.array(mod.history_acc),'-o')
plt.plot(np.array(mod.history_acc_val),'-o')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.ylim(0, 1)
# plt.yticks([i * 10 ** (-1) for i in range(0, 11)]);
plt.xticks(range(len(mod.history_acc)));

plt.legend(['Train','Valid'])
plt.title('Train vs Valid Accuracy')

plt.show()

In [None]:
#plot loss

plt.plot(mod.history_loss,'-o')
plt.plot(mod.history_loss_val,'-o')
plt.xlabel('epoch')
plt.xticks(range(len(mod.history_acc)));
plt.ylabel('loss')
plt.legend(['Train','Valid'])
plt.title('Train vs Valid loss')

plt.show()

# Save full model

In [None]:
states = torch.load(mod.path_save_model)
efficientnet.load_state_dict(states['model_state_dict'])
torch.save(efficientnet, "full_model.pt")

In [None]:
# efficientnet = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)
# efficientnet.fc = nn.Linear(1280, 5, bias=True)
# optimizer_efficientnet = torch.optim.Adam(efficientnet.parameters(), lr=0.001)
# mod_eff = Trainer(efficientnet, criterion, optimizer_efficientnet, 3, device, path_save_model='mod_eff.torch')
# mod_eff.fit(train_loader, valid_loader)
# mod_eff.test_score(test_loader, "\033[0;1m Test")
# mod_eff = Trainer(efficientnet, criterion, optimizer_efficientnet, 3, device, path_save_model='mod_eff.torch')
# mod_eff.fit(train_loader, valid_loader)

In [None]:
# mod_eff = Trainer(efficientnet, criterion, optimizer_efficientnet, 3, device, path_save_model='mod_eff.torch')
# mod_eff.fit(train_loader, valid_loader)

In [None]:
# mod_eff.test_score(test_loader, "\033[0;1m Test")