In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip3 install --quiet albumentations pytorch-ignite

In [None]:
import cv2
from tqdm import tqdm, trange
import torch
from torch import nn
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from ignite.metrics import Accuracy
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [None]:
NUM_CLASSES = 5

df = pd.read_csv("/kaggle/input/cassava-leaf-disease-classification/train.csv")
df = list(df.itertuples(index=False, name=None))
train_df, val_df = train_test_split(df, test_size=0.25, random_state=42)

def getClassDistribution(arr):
    res = [0] * NUM_CLASSES
    for fname, label in arr:
        res[label] += 1
    return res

label_weights = []
train_dist = getClassDistribution(train_df)
for fname, label in train_df:
    label_weights.append(sum(train_dist) / train_dist[label])

print('Train class distribution', getClassDistribution(train_df))
print('Valid class distribution', getClassDistribution(val_df))

In [None]:
class CassavaLeafDataset(Dataset):
    def __init__(
        self, df, root_dir, transforms=None, output_label=True
    ):
        super().__init__()
        self.df = df
        self.length = len(df)
        self.transforms = transforms
        self.root_dir = root_dir
        self.output_label = output_label
    
    def __len__(self):
        return self.length
    
    def read_img(self, image_id):
        path_img = os.path.join(self.root_dir, str(image_id))
        img = cv2.imread(path_img)[:,:,::-1]
        return img
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.df[index][1]
        
        img  = self.read_img(self.df[index][0])
        
        if self.transforms:
            img = self.transforms(image=img)['image']
            
        # do label smoothing
        if self.output_label == True:
            return img, target
        else:
            return img

In [None]:
IMG_SIZE = 224

train_transform = A.Compose([
    A.Resize(256, 256),
    A.CenterCrop(IMG_SIZE, IMG_SIZE),
#     A.RandomCrop(IMG_SIZE, IMG_SIZE),
#     A.Transpose(p=0.5),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
#     A.ShiftScaleRotate(p=0.5),
#     A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
#     A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], p=1.0),
#     A.CoarseDropout(p=0.5),
#     A.Cutout(p=0.5),
    ToTensorV2(p=1.0),
])

test_transform = A.Compose([
    A.Resize(256, 256),
    A.CenterCrop(IMG_SIZE, IMG_SIZE),
#     A.Transpose(p=0.5),
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.5),
#     A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
#     A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], p=1.0),
    ToTensorV2(p=1.0),
])

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

EPOCHES = 20
BATCH_SIZE = 64
NUM_WORKERS = 2
IMG_PATH = '/kaggle/input/cassava-leaf-disease-classification/train_images'

train_dataset = CassavaLeafDataset(train_df, IMG_PATH, train_transform, output_label=True)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_dataset = CassavaLeafDataset(val_df, IMG_PATH, test_transform, output_label=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

model = torch.hub.load('PingoLH/Pytorch-HarDNet', 'hardnet68', pretrained=True)
model.base[16][3] = nn.Linear(in_features=1024, out_features=NUM_CLASSES, bias=True)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, nesterov=True, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

def train_one_epoch():
    model.train()
    losses = []
    for imgs, labels in train_loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
    scheduler.step()
    return sum(losses) / len(losses)

def evaluate():
    model.eval()
    with torch.no_grad():
        metrics = Accuracy()
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            metrics.update((outputs, labels))
        return metrics.compute()

history = {'loss': [], 'acc': []}
tbar = trange(EPOCHES)
best_acc = 0
for epoch in tbar:
    loss = train_one_epoch()
    acc = evaluate()
    
    if acc > best_acc:
        best_acc = acc
        torch.save(model, '/kaggle/working/model.pth')
    
    tbar.set_postfix({
        'loss' : loss,
        'acc' : acc
    })
    history['loss'].append(loss)
    history['acc'].append(acc)

In [None]:
for i in range(EPOCHES):
    print('loss: {}, acc: {}'.format(history['loss'][i], history['acc'][i]))