In [None]:
# Uninstall fastai for solving dependence problems
!pip uninstall fastai -y
# Install packages without internet
!pip install ../input/packages/torch-1.7.1-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/packages/torchvision-0.8.2-cp37-cp37m-manylinux1_x86_64.whl
!pip install ../input/byol-pytorch/byol-pytorch-master

In [None]:
import sys
sys.path.append('../input/repvgg/')
sys.path.append('../input/repvggmodels/')
sys.path.append('../input/pytorch-optimizers/')

from torch_optimizer.radam import RAdam
from repvgg import RepVGG, create_RepVGG_B2, create_RepVGG_B3g4, create_RepVGG_B3, repvgg_model_convert

In [None]:
import os
import cv2
import time
import copy
import random
import joblib
import sklearn
import warnings
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from tqdm import tqdm
from pathlib import Path
from datetime import datetime
from skimage import io
from sklearn import metrics
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss
from IPython.display import display

import torch
import torch.nn.functional as F
import torchvision
from torch import nn
from torch.cuda.amp import autocast, GradScaler
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.optim.lr_scheduler import StepLR
from torch.nn.modules.loss import _WeightedLoss
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision import models
from torchvision import transforms
from byol_pytorch import BYOL

from albumentations.pytorch import ToTensor, ToTensorV2
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout,
    ShiftScaleRotate, CenterCrop, Resize, GaussianBlur)

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [None]:
train      = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
display(train.head(2))
display(submission.head())

In [None]:
train.label.value_counts()

In [None]:
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    return im_rgb

img = get_img('../input/cassava-leaf-disease-classification/train_images/1000015157.jpg')
plt.imshow(img)
plt.show()

In [None]:
BATCH_SIZE  = 64
EPOCHS      = 40
LR          = 0.001
IMAGE_SIZE  = 224
EARLY_STOP  = 10
NUM_WORKERS = multiprocessing.cpu_count()

In [None]:
class SelfSupervisedLearner(nn.Module):
    
    def __init__(self, net, **kwargs):
        super().__init__()
        self.learner = BYOL(net, **kwargs)

    def forward(self, images):
        return self.learner(images)

class ImagesDataset(Dataset):
    
    def __init__(self, df, image_size):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.data_root = '../input/cassava-leaf-disease-classification/train_images/'
        self.transform = Compose([ToTensor()], p=1.)

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index : int):
        path = f"{self.data_root}/{self.df.iloc[index]['image_id']}"
        img  = get_img(path)
        return self.transform(image=img)['image']

In [None]:
ds = ImagesDataset(train, IMAGE_SIZE)
train_loader = DataLoader(ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True, pin_memory=True)

In [None]:
#backbone = models.resnet50(pretrained=True)
backbone = create_RepVGG_B2(deploy=False)
backbone.load_state_dict(torch.load('../input/repvgg/RepVGG-B2-train.pth'))

model    = SelfSupervisedLearner(
    backbone,
    image_size = IMAGE_SIZE,
    #hidden_layer = 'avgpool',  # ResNet
    hidden_layer = 'gap',
    projection_size = 256,
    projection_hidden_size = 2056,
    moving_average_decay = 0.99,
    use_momentum = False)

In [None]:
def sgd_optimizer(model, lr, momentum, weight_decay):
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        apply_weight_decay = weight_decay
        apply_lr = lr
        if 'bias' in key or 'bn' in key:
            apply_weight_decay = 0
        if 'bias' in key:
            apply_lr = 2 * lr       #   Just a Caffe-style common practice. Made no difference.
        params += [{'params': [value], 'lr': apply_lr, 'weight_decay': apply_weight_decay}]
    optimizer = torch.optim.SGD(params, lr, momentum=momentum)
    return optimizer

def train_epoch(model, dataloader, optim, device="cpu", scheduler=None):
    model.train()
    
    running_loss = None
    losses = []
    scaler = torch.cuda.amp.GradScaler()
    
    tbar   = tqdm(dataloader)
    for item in tbar:
        x = item.to(device).float()
        optim.zero_grad()
        # Runs the forward pass with autocasting.
        with autocast():
            loss = model(x)
        scaler.scale(loss).backward()
        scaler.step(optim)
        scaler.update()
        if running_loss is None:
            running_loss = loss.item()
        else:
            running_loss = running_loss * .99 + loss.item() * .01
        tbar.set_description('loss - {:.4f}'.format(running_loss))
        losses.append(loss.item())
        
    if scheduler is not None:
        scheduler.step()

    return losses
        
def do_train():
    not_improved_cnt = 0
    best_loss = 999
    
    #optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    #scheduler_steplr = StepLR(optimizer, step_size=5, gamma=0.9)
    #scheduler_warmup = GradualWarmupScheduler(optimizer, multiplier=1, total_epoch=10, after_scheduler=scheduler_steplr)
    #optimizer = sgd_optimizer(model, LR, 0.9, 1e-4)
    #scheduler = CosineAnnealingLR(optimizer=optimizer, T_max=100)
    optimizer = RAdam(model.parameters(), lr=LR, weight_decay=1e-4)

    model.to(device)
    
    for epoch in range(EPOCHS):
        losses = train_epoch(model, train_loader, optimizer, device, None)
        print(f"epoch - {epoch + 1} mean loss - {np.mean(losses):.5f}")
        
        if best_loss > np.mean(losses):
            print('Best model will be saved to output path')
            best_loss = np.mean(losses)
            torch.save(backbone.state_dict(), "./byol.pt")
            not_improved_cnt = 0
        elif EARLY_STOP == not_improved_cnt:
            print("Met early stopping.")
            break
        else:
            not_improved_cnt += 1

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
do_train()