In [None]:
import sys
sys.path = [
    '../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master',
] + sys.path
sys.path = [
    '../input/ttach-kaggle/ttach/',
] + sys.path
import ttach as tta

In [None]:
import os
import numpy as np
import pandas as pd

import albumentations as A
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.optim as optim

from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from albumentations.pytorch import ToTensorV2

from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold, StratifiedKFold

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings  
warnings.filterwarnings('ignore')

In [None]:
DIR_INPUT = '/kaggle/input/cassava-leaf-disease-classification'
DIR_WEIGHTS = '/kaggle/input/cassava-pytorch-starter-train'

SEED = 42
N_FOLDS = 1
BATCH_SIZE = 16
SIZE = 512
CROP = 512
init_lr = 5e-5
n_epochs = 5

In [None]:
class CassavaDataset(Dataset):
    
    def __init__(self, df, dataset='train', transforms=None):
    
        self.df = df
        self.transforms=transforms
        self.dataset=dataset
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        image_src = f'{DIR_INPUT}/{self.dataset}_images/{self.df.loc[idx, "image_id"]}'
        # print(image_src)
        image = cv2.imread(image_src)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            transformed = self.transforms(image=image)
            images = transformed['image']
        
        images = images.astype(np.float32)
        images /= 255
        images = images.transpose(2, 0, 1)
        
        return torch.tensor(images)

In [None]:
from efficientnet_pytorch import EfficientNet
modelname="efficientnet-b0"
modelname2="efficientnet-b2"
class enetv2(nn.Module):
    def __init__(self, out_dim=1, ModelName="efficientnet-b0"):
        super(enetv2, self).__init__()
        self.basemodel = EfficientNet.from_name(ModelName) 
        self.myfc = nn.Linear(self.basemodel._fc.in_features, out_dim)
        self.basemodel._fc = nn.Identity()        
            
    def extract(self, x):
        return self.basemodel(x)

    def forward(self, x):
        x = self.basemodel(x)
        x = self.myfc(x)
        return x

In [None]:
transforms_test = A.Compose([
    A.Resize(height=SIZE, width=SIZE, p=1.0),
    #ToTensorV2(p=1.0),
])

In [None]:
submission_df = pd.read_csv(DIR_INPUT + '/sample_submission.csv')
submission_df.iloc[:, 1] = 0

submission_df.head()

In [None]:
if submission_df.shape[0] == 1:
    submission_df = pd.DataFrame([{'image_id': '2216849948.jpg', 'label': 0},{'image_id': '2216849948.jpg', 'label': 0}])
    submission_df.reset_index(drop=True, inplace=True)
    commit = True
else:
    commit = False
submission_df.head()

In [None]:
dataset_test = CassavaDataset(df=submission_df, dataset='test', transforms=transforms_test)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, num_workers=4, shuffle=False)

# Psuedo-Labeling

In [None]:
submissions = None
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')

for i_fold in range(N_FOLDS):
    model = enetv2(5, modelname2).to(device)
    model.to(device)
    
    checkpoint2 = torch.load(f"../input/cassavadata/efficientnet-b2_512_final_epoch10_fold0.pth", map_location=device)
    model.load_state_dict(checkpoint2)
    model.eval()
    
    transforms = tta.Compose(
        [
            tta.HorizontalFlip(),
        ]
    )
    tta_models = []
    for model in [model]:
        tta_models.append(tta.ClassificationTTAWrapper(model, transforms))
    
    for net in tta_models:
        test_preds = None
        for step, batch in enumerate(dataloader_test):

            images = batch
            images = images.to(device, dtype=torch.float)

            with torch.no_grad():
                outputs = net(images)

                preds = torch.softmax(outputs, dim=1).data.cpu()

                if test_preds is None:
                    test_preds = preds
                else:
                    test_preds = torch.cat((test_preds, preds), dim=0)


        # submission_df[['label']] = test_preds.argmax(test_preds, dim=1)
        # submission_df.to_csv('submission_fold_{}.csv'.format(i_fold), index=False)

        # logits avg
        if submissions is None:
            submissions = test_preds
        else:
            submissions += test_preds
    

In [None]:
submissions[:10]

In [None]:
pl_df = pd.read_csv(DIR_INPUT + '/sample_submission.csv')
if pl_df.shape[0] == 1:
    pl_df = pd.DataFrame([{'image_id': '2216849948.jpg', 'label': 0},{'image_id': '2216849948.jpg', 'label': 0}])
    pl_df.reset_index(drop=True, inplace=True)

pl_df['label'] = torch.argmax(submissions, dim=1)
pl_df["pl"] = np.ones_like(torch.argmax(submissions, dim=1))
pl_df


In [None]:
df_train = pd.read_csv(os.path.join(DIR_INPUT,"train.csv"))
df_train["pl"] = np.zeros_like(df_train["image_id"])

df_train = pd.concat([df_train, pl_df]).reset_index()

In [None]:
class CassavaDataset2(Dataset):
    
    def __init__(self, df, dataset='train', transforms=None):
    
        self.df = df
        self.transforms=transforms
        self.dataset=dataset
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):
        imageid = self.df.loc[idx, "image_id"]
        label = self.df.loc[idx, "label"]
        dir = self.df.loc[idx, "pl"]
        if dir:
            image_src = f'{DIR_INPUT}/test_images/{imageid}'
        else:
            image_src = f'{DIR_INPUT}/train_images/{imageid}'
        # print(image_src)
        image = cv2.imread(image_src)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        if self.transforms:
            transformed = self.transforms(image=image)
            images = transformed['image']
        
        images = images.astype(np.float32)
        images /= 255
        images = images.transpose(2, 0, 1)
        #label = row.label
        
        return torch.tensor(images), torch.tensor(label)
    
import albumentations
import albumentations as A
transforms_train = albumentations.Compose([
    albumentations.ShiftScaleRotate(scale_limit=0.3, rotate_limit=180,p=0.5),
    A.OneOf([
        A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                             val_shift_limit=0.2, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=0.2, 
                                   contrast_limit=0.2, p=0.5),
    ],p=0.9),
    A.Cutout(num_holes=12, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
    albumentations.Rotate(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.HorizontalFlip(p=0.5),   
    albumentations.Resize(SIZE, SIZE, p=1.0), 
])
dataset_train = CassavaDataset2(df=df_train, dataset='test', transforms=transforms_train)
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, num_workers=4, shuffle=True)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=init_lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs)

# Retrain model

In [None]:
scaler = torch.cuda.amp.GradScaler(enabled=False)
def train_epoch(loader, optimizer):
    model.train()
    train_loss = []
    bar = tqdm(loader)
    i = 0
    for (data, target) in bar:
        data, target = data.to(device), target.to(device).long()
        loss_func = criterion
        optimizer.zero_grad()
        with torch.cuda.amp.autocast(enabled=False):
            logits = model(data).squeeze(1)            
            loss = loss_func(logits, target)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        loss_np = loss.detach().cpu().numpy()
        train_loss.append(loss_np)
        smooth_loss = sum(train_loss[-100:]) / min(len(train_loss), 100)
        bar.set_description('loss: %.5f, smth: %.5f' % (loss_np, smooth_loss))
        i+=1
        if commit and i==10:
            break
    return np.mean(train_loss)

In [None]:
for epoch in range(1, n_epochs+1):
    torch.cuda.empty_cache() 
    scheduler.step(epoch-1)

    train_loss = train_epoch(dataloader_train , optimizer)

In [None]:
submissions = None
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device('cpu')

for i_fold in range(N_FOLDS):
    model.eval()
    
    transforms = tta.Compose(
        [
            tta.HorizontalFlip(),
        ]
    )
    tta_models = []
    for model in [model]:
        tta_models.append(tta.ClassificationTTAWrapper(model, transforms))
    
    for net in tta_models:
        test_preds = None
        for step, batch in enumerate(dataloader_test):

            images = batch
            images = images.to(device, dtype=torch.float)

            with torch.no_grad():
                outputs = net(images)

                preds = torch.softmax(outputs, dim=1).data.cpu()

                if test_preds is None:
                    test_preds = preds
                else:
                    test_preds = torch.cat((test_preds, preds), dim=0)


        # submission_df[['label']] = test_preds.argmax(test_preds, dim=1)
        # submission_df.to_csv('submission_fold_{}.csv'.format(i_fold), index=False)

        # logits avg
        if submissions is None:
            submissions = test_preds
        else:
            submissions += test_preds
    

# Submission

In [None]:
submissions

In [None]:
submission_df['label'] = torch.argmax(submissions, dim=1)
submission_df.to_csv('submission.csv', index=False)
submission_df