In [None]:
# [1] - Import Modules
"""import modules"""

import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import pandas as pd

import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.autograd import Variable
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR, CosineAnnealingWarmRestarts


import PIL
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import itertools

from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
import torchvision.models as models

from sklearn import metrics, model_selection, preprocessing
from imblearn.over_sampling import SMOTE

from tqdm.notebook import tqdm
import time
import datetime
import timm

# Load Data

In [None]:
# [2] - Dataset Loader
"""Dataset loader class"""

class CassavaDataset(Dataset):                    # Override torch.utils.data.Dataset
  def __init__(self, data, targets, dataset, transform=None):
    """
    Args:
      csv_file    (string): path of csv file
      dir         (string): path of images
      transform  (callable, optional): Optional transform
    """
    self.files = data
    self.targets = targets
    self.classes = list(set(targets))
    self.transform = transform
    self.dataset = dataset

  def __len__(self):
    return len(self.files)

  def __getitem__(self, idx):
    if torch.is_tensor(idx):
      idx = idx.tolist()
    name = self.files[idx]
    img_name = os.path.join(name)
    image = Image.open(img_name)
    """
    ------------------------------------------------------------"""
    input_size = 384
    imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    
    if self.dataset == 'train':
        transform = transforms.Compose([transforms.RandomResizedCrop((input_size, input_size)),
                                        transforms.RandomHorizontalFlip(p=0.5),
                                        transforms.RandomVerticalFlip(p=0.5),
                                        transforms.ToTensor(),
                                        transforms.Normalize(*imagenet_stats)])
        image = transform(image)
    elif self.dataset == 'test':
        transform = transforms.Compose([transforms.Resize((input_size, input_size)),
                                        transforms.ToTensor(),
                                        transforms.Normalize(*imagenet_stats)])   
        image = transform(image)
    """---------------------------------------------------------"""

    label = self.targets[idx]

    if self.transform:                    ## IDK
      sample = self.transform(sample)

    return image, label

In [None]:
# [2] - load csv file
"""load from csv"""

dfx = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

df_train, df_valid = model_selection.train_test_split(dfx, test_size=0.1, random_state=42, stratify=dfx.label.values)

_train = df_train.reset_index(drop=True)
df_valid = df_valid.reset_index(drop=True)

image_path = "../input/cassava-leaf-disease-classification/train_images/"

train_image_paths = [os.path.join(image_path, x) for x in df_train.image_id.values]
valid_image_paths = [os.path.join(image_path, x) for x in df_valid.image_id.values]

train_targets = df_train.label.values
valid_targets = df_valid.label.values

In [None]:
# [3] - load data
"""load datasets"""

cassava_train = CassavaDataset(train_image_paths, train_targets, 'train')
cassava_test = CassavaDataset(valid_image_paths, valid_targets, 'test')

batch_size = 16

train_loader = DataLoader(cassava_train, batch_size=batch_size, shuffle=False, num_workers=2)
test_loader = DataLoader(cassava_test, batch_size=batch_size, shuffle=False, num_workers=2)

# Training

Module to summarize loss and accuracy from (reference: https://github.com/rwightman/pytorch-image-models/blob/master/timm/utils/metrics.py )

In [None]:
# [4] - Module to print loss and accuracy
"""load datasets"""

class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)
    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.reshape(1, -1).expand_as(pred))
    return [correct[:k].reshape(-1).float().sum(0) * 100. / batch_size for k in topk]

# Load Models


In [None]:
# [5] - Train one epoch
"""Function to train one epoch"""

def train_epoch(model, loader, device, loss_func, optimizer, scheduler):
    model.train()
    summary_loss = AverageMeter() # track running loss
    summary_acc = AverageMeter() # track running accuracy
    start = time.time() # track time
   
    n = len(loader)
    
    for batch in tqdm(loader):

        images, labels = batch
        images = images.to(device)
        labels = labels.to(device)

        out = model(images)                  # Generate predictions
        loss = loss_func(out, labels)  # Calculate loss   



        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    
        with torch.no_grad():
            acc = accuracy(out, labels)[0]
            
        summary_loss.update(loss.detach().item(), batch_size)
        summary_acc.update(acc.detach().item(), batch_size)
        
    train_time = str(datetime.timedelta(seconds=time.time() - start))
    print('Train loss: {:.5f} - Train acc: {:.2f}% - time: {}'.format(summary_loss.avg, 
                                                                      summary_acc.avg,
                                                                      train_time))
    return summary_loss, summary_acc

In [None]:
# [6] - Validate one epoch
"""Function to test one epoch"""

def validate_epoch(model, loader, device, loss_func):
    model.eval()
    summary_loss = AverageMeter() # track running loss
    summary_acc = AverageMeter() # track running accuracy
    start = time.time() # track time
    
    n = len(loader)
    
    for batch in tqdm(loader):
        with torch.no_grad():
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)

            out = model(images)                  # Generate predictions
            loss = loss_func(out, labels)  # Calculate loss   

            acc = accuracy(out, labels)[0]
            
            summary_loss.update(loss.detach().item(), batch_size)
            summary_acc.update(acc.detach().item(), batch_size)

        
    eval_time = str(datetime.timedelta(seconds=time.time() - start))
    print('Val loss: {:.5f} - Val acc: {:.2f}% - time: {}'.format(summary_loss.avg,
                                                                  summary_acc.avg,
                                                                  eval_time))
    return summary_loss, summary_acc

In [None]:
# [7] - Load the ResNext Model
"""Load the ResNext model"""

resnet = timm.create_model('resnext50_32x4d', pretrained=True)

num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet.to(device)

In [None]:
# [8] - Train ResNext Model
"""Train the ResNext model"""

num_epochs = 1
best_acc = 0
best_epoch = 0

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.01, momentum=0.9)
scheduler =  ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, eps=1e-6)

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch + 1, num_epochs))

    train_loss, train_acc = train_epoch(resnet, train_loader, device, criterion, optimizer, scheduler)
    val_loss, val_acc = validate_epoch(resnet, test_loader, device, criterion)
    scheduler.step(val_loss.avg)

    if val_acc.avg > best_acc:
        best_acc = val_acc.avg
        best_epoch = epoch
    
    if epoch == 9:
        print('Saving model...')
        PATH = './timm_resnext_epoch{}_384.pth'.format(epoch + 1)
        torch.save(resnet.state_dict(),PATH)

In [None]:
# [9] - Load the Xception Model
"""Load the Xception model"""

xception = timm.create_model('xception', pretrained=True)

num_ftrs = xception.fc.in_features
xception.fc = nn.Linear(num_ftrs, 5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
xception.to(device)

In [None]:
# [10] - Train Xception Model
"""Train the Xception model"""

num_epochs = 10
best_acc = 0
best_epoch = 0

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(xception.parameters(), lr=0.01, momentum=0.9)
scheduler =  ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, verbose=True, eps=1e-6)

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch + 1, num_epochs))

    train_loss, train_acc = train_epoch(xception, train_loader, device, criterion, optimizer, scheduler)
    val_loss, val_acc = validate_epoch(xception, test_loader, device, criterion)
    scheduler.step(val_loss.avg)

    if val_acc.avg > best_acc:
        best_acc = val_acc.avg
        best_epoch = epoch
    
    if epoch == 9:
        print('Saving model...')
        PATH = './timm_xception_epoch{}_384.pth'.format(epoch + 1)
        torch.save(xception.state_dict(),PATH)

# Testing - Inference

In [None]:
# [11] - Load ResNext Model
"""Load the ResNext model"""

PATH = './timm_resnext_epoch10_384.pth'

resnet = timm.create_model('resnext50_32x4d', pretrained=False)

num_ftrs = resnet.fc.in_features
resnet.fc = nn.Linear(num_ftrs, 5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
resnet.to(device)

resnet.load_state_dict(torch.load(PATH))

resnet.eval()

In [None]:
# [12] - Load Xception Model
"""Load the ResNext model"""

PATH = './timm_xception_epoch10_384.pth'

xception = timm.create_model('xception', pretrained=False)

num_ftrs = resnet.fc.in_features
xception.fc = nn.Linear(num_ftrs, 5)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
xception.to(device)

xception.load_state_dict(torch.load(PATH))

xception.eval()

In [None]:
# [13] - For submission
"""Create submission csv"""

submission_df = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
submission_df.head()

In [None]:
# [14] - TTA
"""Test Time Augmentation"""

input_size = 384
stats = ([0.4914, 0.4822, 0.4465], [0.247, 0.243, 0.261])

trans1 = transforms.Compose([transforms.Resize((input_size, input_size)),
                             transforms.Pad(8, padding_mode='reflect'),
                             transforms.ToTensor(),
                             transforms.Normalize(*stats)])

trans2 = transforms.Compose([transforms.Resize((input_size, input_size)),
                             transforms.RandomHorizontalFlip(p=0.3),
                             transforms.RandomResizedCrop(input_size),
                             transforms.ToTensor(),
                             transforms.Normalize(*stats)])

trans3 = transforms.Compose([transforms.Resize((input_size, input_size)),
                             transforms.RandomVerticalFlip(p=0.3),
                             transforms.RandomResizedCrop(input_size),
                             transforms.ToTensor(),
                             transforms.Normalize(*stats)])

trans4 = transforms.Compose([transforms.Resize((input_size, input_size)),
                             transforms.RandomHorizontalFlip(p=0.5),
                             transforms.RandomVerticalFlip(p=0.5),
                             transforms.RandomResizedCrop(input_size),
                             transforms.ToTensor(),
                             transforms.Normalize(*stats)])
transs = [trans1, trans2, trans3, trans4]

In [None]:
# [13]
"""Inference"""

from PIL import Image

test_path = '/kaggle/input/cassava-leaf-disease-classification/test_images/'
test_images = os.listdir(test_path)
train_image_paths = [os.path.join(test_path, x) for x in test_images]

y_preds = []
y2_preds = []



p = 0
for i in test_images:
    res = []
    image = Image.open(f'/kaggle/input/cassava-leaf-disease-classification/test_images/{i}')
    input_size = 384
    
    outs = torch.Tensor(np.zeros((len(transs), 5)))
    outs2 = torch.Tensor(np.zeros((len(transs), 5)))
    k = 0
    for trans in transs:
        img = trans(image)
        img = img.reshape(1, img.shape[0], img.shape[1], img.shape[2])
        img = Variable(img.to(device))
        out = resnet(img)
        out2= xception(img)
        outs[k,:] = 4*out
        outs2[k,:] = 5*out2
        k += 1

    out = outs.mean(axis=0)
    out2 = outs2.mean(axis=0)
    res.append(out2)

    mean = torch.mean(torch.stack(res), dim = 0)

    _, predicted = torch.max(out2.data, 0)
    y_preds.append(predicted.item())

In [None]:
df_sub = pd.DataFrame({'image_id': test_images, 'label': y_preds})
display(df_sub)

In [None]:
df_sub.to_csv('submission.csv', index=False)