In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from PIL import Image
import random
from sklearn.model_selection import train_test_split
import time
from tqdm.notebook import tqdm

import torch
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torchvision.models as models
from torchvision.utils import make_grid

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

# Seed

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
SEED = 17
seed_everything(SEED)

# Data Folder

In [None]:
data_dir = '../input/cassava-leaf-disease-classification'
train_dir = data_dir + '/train_images'
train_csv = data_dir + '/train.csv'
test_dir = data_dir + '/test_images'
name_json = data_dir + '/label_num_to_disease_map.json'
sample_csv = data_dir + '/sample_submission.csv'

# Read CSV

In [None]:
train_df = pd.read_csv(train_csv)
train_df.head()

In [None]:
train_df.label.value_counts()

the dataset seems heavily unbalanced towards label 3.

In [None]:
sub_df = pd.read_csv(sample_csv)
sub_df.head()

# Dataset

In [None]:
class CassavaDS(Dataset):
    def __init__(self, df, data_dir, transforms=None):
        super().__init__()
        self.df_data = df.values
        self.transforms = transforms
        self.data_dir = data_dir

    def __len__(self):
        return len(self.df_data)

    def __getitem__(self, index):
        img_name, label = self.df_data[index]
        img_path = os.path.join(self.data_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        if self.transforms is not None:
            image = self.transforms(img)
        return image, label

In [None]:
X_train, X_valid = train_test_split(train_df, test_size=0.1, 
                                                    random_state=SEED,
                                                    stratify=train_df.label.values)

In [None]:
X_train.shape, X_valid.shape

In [None]:
normalize = transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

In [None]:
train_tf = transforms.Compose([
    transforms.Pad(4, padding_mode='reflect'),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomResizedCrop(224),
    transforms.ToTensor(),
    normalize
])

valid_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize
])

In [None]:
train_ds = CassavaDS(X_train, train_dir, train_tf)
valid_ds = CassavaDS(X_valid, train_dir, valid_tf)

In [None]:
bs = 64

In [None]:
train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=bs, shuffle=True)

# Labels

In [None]:
import json

with open(name_json, 'r') as f:
    cat_to_name = json.load(f)

In [None]:
cat_to_name

# Plot Images

In [None]:
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
        return tensor

In [None]:
unnorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

In [None]:
def display_img(img, label=None, unnorm_obj=None, invert=True, return_label=True):
    if unnorm_obj != None:
        img = unnorm_obj(img)

    plt.imshow(img.permute(1, 2, 0))
    
    if label != None:
        plt.title(cat_to_name[str(label)])

In [None]:
def display_batch(batch, unnorm_obj=None):    
    imgs, labels = batch
    
    if unnorm_obj:
        unnorm_imgs = []
        for img in imgs:
            unnorm_imgs.append(unnorm_obj(img))
        imgs = unnorm_imgs
    
    ig, ax = plt.subplots(figsize=(16, 8))
    ax.set_xticks([]); ax.set_yticks([])
    ax.imshow(make_grid(imgs, nrow=16).permute(1, 2, 0))

In [None]:
img, label = train_ds[0]
display_img(img, label)

In [None]:
display_batch(next(iter(train_loader)))

# Helper Functions

In [None]:
class AvgStats(object):
    def __init__(self):
        self.reset()
        
    def reset(self):
        self.losses =[]
        self.precs =[]
        self.its = []
        
    def append(self, loss, prec, it):
        self.losses.append(loss)
        self.precs.append(prec)
        self.its.append(it)

In [None]:
def save_checkpoint(model, is_best, filename='./checkpoint.pth'):
    """Save checkpoint if a new best is achieved"""
    if is_best:
        torch.save(model.state_dict(), filename)  # save checkpoint
    else:
        print ("=> Validation Accuracy did not improve")

In [None]:
def load_checkpoint(model, filename = './checkpoint.pth'):
    sd = torch.load(filename, map_location=lambda storage, loc: storage)
    names = set(model.state_dict().keys())
    for n in list(sd.keys()): 
        if n not in names and n+'_raw' in names:
            if n+'_raw' not in sd: sd[n+'_raw'] = sd[n]
            del sd[n]
    model.load_state_dict(sd)

# Train and Test

In [None]:
def train(loader, model, optimizer, device):
    model.train()
    correct, trn_loss, trn_time = 0., 0., 0
    t = tqdm(loader, leave=False, total=len(loader))
    bt_start = time.time()
    for i, (ip, target) in enumerate(t):
        ip, target = ip.to(device), target.to(device)                          
        output = model(ip)
        loss = criterion(output, target)
        trn_loss += loss.item()
        
        # measure accuracy and record loss
        _, pred = output.max(dim=1)
        correct += torch.sum(pred == target.data)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    trn_time = time.time() - bt_start
    trn_acc = correct * 100 / len(loader.dataset)
    trn_loss /= len(loader)
    return trn_acc, trn_loss, trn_time

In [None]:
def valid(loader, model, optimizer, device):
    model.eval()
    with torch.no_grad():
        correct, val_loss, val_time = 0., 0., 0
        t = tqdm(loader, leave=False, total=len(loader))
        bt_start = time.time()
        for i, (ip, target) in enumerate(t):
            ip, target = ip.to(device), target.to(device)                          
            output = model(ip)
            loss = criterion(output, target)
            val_loss += loss.item()

            # measure accuracy and record loss
            _, pred = output.max(dim=1)
            correct += torch.sum(pred == target.data)

        val_time = time.time() - bt_start
        val_acc = correct * 100 / len(loader.dataset)
        val_loss /= len(loader)
        return val_acc, val_loss, val_time

In [None]:
def fit(model, sched, optimizer, device, epoch):
    print("Epoch\tTrn_loss\tVal_loss\tTrn_acc\t\tVal_acc")
    best_acc = 0.
    for j in range(epoch):
        trn_acc, trn_loss, trn_time = train(train_loader, model, optimizer, device)
        trn_stat.append(trn_loss, trn_acc, trn_time)
        val_acc, val_loss, val_time = valid(valid_loader, model, optimizer, device)
        val_stat.append(val_acc, val_loss, val_time)
        if sched:
            sched.step()
        if val_acc > best_acc:
            best_acc = val_acc
            save_checkpoint(model, True, './best_model.pth')
        print("{}\t{:06.8f}\t{:06.8f}\t{:06.8f}\t{:06.8f}"
              .format(j+1, trn_loss, val_loss, trn_acc, val_acc))

# Vision Transformer

Using implementation from https://github.com/nachiket273/Vision_transformer_pytorch.git

Using weights from pretrained-model on Imagenet-1k <br>
File is available at https://www.kaggle.com/nachiket273/visiontransformerpretrainedimagenet1kweights

pytorch tpu kernel available @ https://www.kaggle.com/nachiket273/pytorch-tpu-vision-transformer

In [None]:
!cp ../input/visiontransformerpretrainedimagenet1kweights/vit.py .
!cp ../input/visiontransformerpretrainedimagenet1kweights/vit_16_224_imagenet1000.pth .

In [None]:
from vit import ViT

In [None]:
def get_model(out_features=5):
    model = ViT(224, 16, drop_rate=0.1)
    load_checkpoint(model, './vit_16_224_imagenet1000.pth')
    model.out = nn.Linear(in_features=model.out.in_features, out_features=5)
    for param in model.parameters():
        param.require_grad = True
    return model

In [None]:
model = get_model()

In [None]:
model = model.to(device)

In [None]:
trn_stat = AvgStats()
val_stat = AvgStats()

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)

In [None]:
epochs = 20

In [None]:
sched = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, 1e-3)

In [None]:
fit(model, sched, optimizer, device, epochs)

# Predict

In [None]:
test_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize
])

In [None]:
load_checkpoint(model, './best_model.pth')

In [None]:
def predict(test_dir, model, device):
    img_names = []
    preds = []
    for name in os.listdir(test_dir):
        img_path = os.path.join(test_dir, name)
        img = Image.open(img_path).convert("RGB")
        img = test_tf(img)
        img = img.unsqueeze(0)
        img = img.to(device)
        op = model(img)
        _, pred = op.max(dim=1)
        img_names.append(name)
        preds.append(pred.item())
    return img_names, preds

In [None]:
img_names, preds = predict(test_dir, model, device)

In [None]:
img_names, preds

In [None]:
sub_df.head()

In [None]:
sub_df['image_id'] = img_names
sub_df['label'] = preds

In [None]:
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv', index=False)