In [None]:
import torch
import torchvision
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
import pandas as pd
import numpy as np
import gc
import os
import sys
import cv2
import time
import datetime
import warnings
import random
import matplotlib.pyplot as plt
import seaborn as sns
sys.path.append('../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master')
from efficientnet_pytorch import EfficientNet
%matplotlib inline

In [None]:
warnings.simplefilter('ignore')
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(47)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
class LeafDataset(Dataset):
    def __init__(self, df: pd.DataFrame, imfolder: str, train: bool = True, transforms = None):
        """
        Class initialization
        Args:
            df (pd.DataFrame): DataFrame with data description
            imfolder (str): folder with images
            train (bool): flag of whether a training dataset is being initialized or testing one
            transforms: image transformation method to be applied
            
        """
        self.df = df
        self.imfolder = imfolder
        self.transforms = transforms
        self.train = train
        
    def __getitem__(self, index):
        im_path = os.path.join(self.imfolder, self.df.iloc[index]['image_id'])
        x = cv2.imread(im_path)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)  # GBR2RGB cast

        if self.transforms:
            x = self.transforms(x)
            
        if self.train:
            y = self.df.iloc[index]['label']
            return x, y
        else:
            return x
    
    def __len__(self):
        return len(self.df)
    
    
class Net(nn.Module):
    def __init__(self, arch):
        super(Net, self).__init__()
        self.arch = arch
        if 'ResNet' in str(arch.__class__):
            self.arch.fc = nn.Linear(in_features=512, out_features=5, bias=True)
        if 'EfficientNet' in str(arch.__class__):
            self.arch._fc = nn.Linear(in_features=1280, out_features=5, bias=True)
        
    def forward(self, inputs):
        return self.arch(inputs)

In [None]:
train_df = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
sub = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
# This is to work around Pytorch limitation
# Because it expects input shape of data to have more than one example
if len(sub) == 1:
    sub = pd.concat([sub, sub], ignore_index=True)

In [None]:
train_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((256, 256)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])
test_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((256, 256)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

In [None]:
architecture = 'efficientnet-b1'  # Model architecture
arch = EfficientNet.from_name(architecture)  # Loading initial weights
epochs = 12  # Number of epochs to run
es_patience = 3  # Early Stopping patience - for how many epochs with no improvements to wait
TTA = 3 # Test Time Augmentation rounds
data_folder = '/kaggle/input/cassava-leaf-disease-classification'
oof = np.zeros((len(train_df), 1))  # Out Of Fold predictions
preds = torch.zeros((len(sub), train_df['label'].nunique()), dtype=torch.float32, device=device)  # Test predictions
splits = 4  # number of folds
train_losses = {i+1: [] for i in range(splits)}
train_accs = {i+1: [] for i in range(splits)}
val_accs = {i+1: [] for i in range(splits)}

test = LeafDataset(df=sub,
                   imfolder=os.path.join(data_folder, 'test_images'), 
                   train=False,
                   transforms=train_transform)

In [None]:
skf = StratifiedKFold(n_splits=splits)
for fold, (train_idx, val_idx) in enumerate(skf.split(X=np.zeros(len(train_df)), y=train_df['label']), 1):
    print('=' * 20, 'Fold', fold, '=' * 20)
    model_path = f'effnet_b1_fold_{fold}.pth'  # Path and filename to save model to
    best_val = 0  # Best validation score within this fold
    patience = es_patience  # Current patience counter
    arch = EfficientNet.from_name(architecture)
    model = Net(arch=arch)
    model = model.to(device)
    optim = torch.optim.Adam(model.parameters(), lr=0.003)
    scheduler = ReduceLROnPlateau(optimizer=optim, mode='max', patience=1, verbose=True, factor=0.2)
    criterion = nn.CrossEntropyLoss()
    
    train = LeafDataset(df=train_df.iloc[train_idx].reset_index(drop=True),
                       imfolder=os.path.join(data_folder, 'train_images'), 
                       train=True,
                       transforms=train_transform)
    val = LeafDataset(df=train_df.iloc[val_idx].reset_index(drop=True),
                       imfolder=os.path.join(data_folder, 'train_images'),
                       train=True,
                       transforms=test_transform)
    
    train_loader = DataLoader(dataset=train, batch_size=64, shuffle=True, num_workers=2)
    val_loader = DataLoader(dataset=val, batch_size=16, shuffle=False, num_workers=2)
    test_loader = DataLoader(dataset=test, batch_size=16, shuffle=False, num_workers=2)
    
    for epoch in range(epochs):
        start_time = time.time()
        correct = 0
        epoch_loss = 0
        model.train()
        
        for x, y in train_loader:
            x = torch.tensor(x, device=device, dtype=torch.float32)
            y = torch.tensor(y, device=device, dtype=torch.long)
            optim.zero_grad()
            z = model(x)
            loss = criterion(z, y)
            loss.backward()
            optim.step()
            pred = torch.argmax(z, axis=1)
            correct += (pred.cpu() == y.cpu()).sum().item()
            epoch_loss += loss.item()
        train_acc = correct / len(train_idx)  # Train accuracy
        
        model.eval()  # switch model to the evaluation mode
        val_preds = torch.zeros((len(val_idx), 1), dtype=torch.long, device=device)
        with torch.no_grad():  # Do not calculate gradient since we are only predicting
            for j, (x_val, y_val) in enumerate(val_loader):
                x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
                y_val = torch.tensor(y_val, device=device, dtype=torch.long)
                z_val = model(x_val)
                val_pred = torch.argmax(z_val, axis=1)
                val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val.shape[0]] = val_pred.reshape(-1, 1)
            val_acc = accuracy_score(train_df.iloc[val_idx]['label'].values, val_preds.cpu())
            train_time = str(datetime.timedelta(seconds=time.time() - start_time))[:7]
            train_losses[fold].append(epoch_loss)
            train_accs[fold].append(train_acc)
            val_accs[fold].append(val_acc)
            print(f'[{train_time}] | Epoch: {epoch+1:03} | Loss: {epoch_loss:.4f} | Train acc: {train_acc:.4f} | Val acc: {val_acc:.4f}')
            scheduler.step(val_acc)
            
            if val_acc >= best_val:
                best_val= val_acc
                patience = es_patience
                torch.save(model, model_path)
            else:
                patience -= 1
                if patience == 0:
                    print(f'Early stopping. Best Val accuracy: {best_val:.4f}')
                    break
                    
    model = torch.load(model_path)  # Loading best model of this fold
    model.eval()  # switch model to the evaluation mode
    val_preds = torch.zeros((len(val_idx), 1), dtype=torch.long, device=device)
    with torch.no_grad():
        # Predicting on validation set once again to obtain data for OOF
        for j, (x_val, y_val) in enumerate(val_loader):
            x_val = torch.tensor(x_val, device=device, dtype=torch.float32)
            y_val = torch.tensor(y_val, device=device, dtype=torch.long)
            z_val = model(x_val)
            val_pred = torch.argmax(z_val, axis=1)
            val_preds[j*val_loader.batch_size:j*val_loader.batch_size + x_val.shape[0]] = val_pred.reshape(-1, 1)
        oof[val_idx] = val_preds.cpu().numpy()
        
        # Predicting on test set
        tta_preds = torch.zeros((len(test), train_df['label'].nunique()), dtype=torch.float32, device=device)
        for _ in range(TTA):
            for i, x_test in enumerate(test_loader):
                x_test = torch.tensor(x_test, device=device, dtype=torch.float32)
                z_test = model(x_test)
                z_test = torch.softmax(z_test, axis=1)  # we need probabilities to average them later
                tta_preds[i*test_loader.batch_size:i*test_loader.batch_size + x_test.shape[0]] += z_test
        preds += tta_preds / TTA
        
preds /= skf.n_splits
preds = torch.argmax(preds, axis=1)

In [None]:
print('OOF score: {:.4f}'.format(accuracy_score(train_df['label'], oof)))

In [None]:
fig, axes = plt.subplots(skf.n_splits, 1, figsize=(10, 4 * skf.n_splits))
for i in range(4):
    axes[i].plot(train_losses[i+1], label='Train loss', color='tab:blue')
    axes[i].grid();
    axes[i].legend(fontsize=12, facecolor='white');
    axes[i].set_ylabel('Loss', fontsize=14);
    axes[i].set_xlabel('Epoch', fontsize=14);
    max_ticks = max(len(train_losses[i+1]), len(train_accs[i+1]), len(val_accs[i+1]))
    axes[i].set_xticks(range(max_ticks));
    axes[i].set_xticklabels(range(1, max_ticks+1));
    axes2 = axes[i].twinx()
    axes2.plot(train_accs[i+1], label='Train accuracy', color='tab:orange')
    axes2.plot(val_accs[i+1], label='Validation accuracy', color='tab:red')
    axes2.plot(max(val_accs[i+1]), color='tab:red', marker='o', markersize=10)
    axes2.legend(fontsize=12, facecolor='white');
    axes2.set_ylabel('Accuracy', fontsize=14);

In [None]:
sub['label'] = preds.cpu().numpy()
sub.to_csv('submission.csv', index=False)