In [None]:
import os
import numpy as np
import pandas as pd

import albumentations as A
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim

from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2

from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold, StratifiedKFold

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings  
warnings.filterwarnings('ignore')

In [None]:
DIR_INPUT = '/kaggle/input/cassava-leaf-disease-classification'
DIR_WEIGHTS = '/kaggle/input/cassava-pytorch-starter-train'

SEED = 42
N_FOLDS = 5
BATCH_SIZE = 64
SIZE = 256

In [None]:
class CassavaDataset(Dataset):
    
    def __init__(self, df, dataset='train', transforms=None):
    
        self.df = df
        self.transforms=transforms
        self.dataset=dataset
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        image_src = f'{DIR_INPUT}/{self.dataset}_images/{self.df.loc[idx, "image_id"]}'
        # print(image_src)
        image = cv2.imread(image_src, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            transformed = self.transforms(image=image)
            image = transformed['image']

        return image

In [None]:
class CassavaModel(nn.Module):
    
    def __init__(self, num_classes=5):
        super().__init__()
        
        self.backbone = torchvision.models.resnet18(pretrained=False)
        
        in_features = self.backbone.fc.in_features

        self.logit = nn.Linear(in_features, num_classes)
        
    def forward(self, x):
        batch_size, C, H, W = x.shape
        
        x = self.backbone.conv1(x)
        x = self.backbone.bn1(x)
        x = self.backbone.relu(x)
        x = self.backbone.maxpool(x)

        x = self.backbone.layer1(x)
        x = self.backbone.layer2(x)
        x = self.backbone.layer3(x)
        x = self.backbone.layer4(x)
        
        x = F.adaptive_avg_pool2d(x,1).reshape(batch_size,-1)
        x = F.dropout(x, 0.25, self.training)

        x = self.logit(x)

        return x

In [None]:
transforms_test = A.Compose([
    A.Resize(height=SIZE, width=SIZE, p=1.0),
    A.Normalize(p=1.0),
    ToTensorV2(p=1.0),
])

In [None]:
submission_df = pd.read_csv(DIR_INPUT + '/sample_submission.csv')
submission_df.iloc[:, 1] = 0

submission_df.head()

In [None]:
if submission_df.shape[0] == 1:
    submission_df = pd.DataFrame([{'image_id': '2216849948.jpg', 'label': 0},{'image_id': '2216849948.jpg', 'label': 0}])
    submission_df.reset_index(drop=True, inplace=True)

submission_df.head()

In [None]:
dataset_test = CassavaDataset(df=submission_df, dataset='test', transforms=transforms_test)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)

In [None]:
submissions = None
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')

for i_fold in range(N_FOLDS):
    
    model = CassavaModel(num_classes=5)
    model.to(device)
    
    checkpoint = torch.load(f"{DIR_WEIGHTS}/model_state_fold_{i_fold}.pth", map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'], strict=True)
    model.eval()
    test_preds = None

    for step, batch in enumerate(dataloader_test):

        images = batch
        images = images.to(device, dtype=torch.float)

        with torch.no_grad():
            outputs = model(images)

            preds = torch.softmax(outputs, dim=1).data.cpu()
            
            if test_preds is None:
                test_preds = preds
            else:
                test_preds = torch.cat((test_preds, preds), dim=0)
    
    
    # submission_df[['label']] = test_preds.argmax(test_preds, dim=1)
    # submission_df.to_csv('submission_fold_{}.csv'.format(i_fold), index=False)

    # logits avg
    if submissions is None:
        submissions = test_preds / N_FOLDS
    else:
        submissions += test_preds / N_FOLDS
    

In [None]:
submissions[:10]

In [None]:
submission_df['label'] = torch.argmax(submissions, dim=1)
submission_df.to_csv('submission.csv', index=False)
submission_df