In [None]:
%%capture
%%bash
pip install efficientnet_pytorch torchtoolbox

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
import multiprocessing as mp

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

import albumentations as A
from albumentations.pytorch import ToTensorV2

from efficientnet_pytorch import EfficientNet

import warnings
warnings.simplefilter('ignore')
%matplotlib inline

## Data

In [None]:
base = "/kaggle/input/data-saving/im_resized/"
train_df = pd.read_csv(base+"train.csv")
valid_df = pd.read_csv(base+"val.csv")
train_df.head()

In [None]:
p = 0.5
train_tfms = A.Compose([
    A.Cutout(p=p),
    A.RandomRotate90(p=p),
    A.Flip(p=p),
    A.OneOf([
        A.RandomBrightnessContrast(brightness_limit=0.2,
                                   contrast_limit=0.2,
                                   ),
        A.HueSaturationValue(
            hue_shift_limit=20,
            sat_shift_limit=50,
            val_shift_limit=50)
    ], p=p),
    A.OneOf([
        A.IAAAdditiveGaussianNoise(),
        A.GaussNoise(),
    ], p=p),
    A.OneOf([
        A.MotionBlur(p=0.2),
        A.MedianBlur(blur_limit=3, p=0.1),
        A.Blur(blur_limit=3, p=0.1),
    ], p=p),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=p),
    A.OneOf([
        A.OpticalDistortion(p=0.3),
        A.GridDistortion(p=0.1),
        A.IAAPiecewiseAffine(p=0.3),
    ], p=p), 
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
])
    
test_tfms = A.Compose([
    A.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
    ToTensorV2()
])

class Data(Dataset):
    def __init__(self, df, size, base, transform=None, is_test=False):
        self.labels = df["target"].values.astype(np.float32)[:,None]
        self.size = size
        self.base = base
        self.current = None
        self.transform = transform
        self.is_test = is_test
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, i):
        batch = i // self.size
        if self.current != self.base + str(batch) + ".npy":
            self.current = self.base + str(batch) + ".npy"
            self.current_batch = np.load(self.current) #.transpose((0,3,1,2))
        i = i % self.size
        image = self.current_batch[i]
        if self.transform:
            image = self.transform(image=image)['image']
            
        if self.is_test:
            return image
        return image, self.labels[i]

In [None]:
BS = 128
train_ds = Data(train_df, 128, base+"train_batch_", train_tfms)
valid_ds = Data(valid_df, 128, base+"valid_batch_", test_tfms)
train_dl = DataLoader(train_ds, BS, drop_last=True, num_workers=mp.cpu_count())
valid_dl = DataLoader(valid_ds, BS, num_workers=mp.cpu_count())

In [None]:
class MyEfficientNet(nn.Module):
    def __init__(self, base="efficientnet-b0"):
        super().__init__()

        # EfficientNet
        self.base = EfficientNet.from_pretrained(base)
        # freeze _fc
        for p in self.base._fc.parameters(): p.requires_grad=False
        
        # Replace last layer
        self.fc = nn.Sequential(nn.Linear(self.base._fc.in_features, 512), 
                                         nn.ReLU(),  
                                         nn.Dropout(0.25),
                                         nn.Linear(512, 128), 
                                         nn.ReLU(),  
                                         nn.Dropout(0.25), 
                                         nn.Linear(128,1))
    
    def forward(self, x):
        pool = F.adaptive_avg_pool2d(self.base.extract_features(x), 1)
        pool = pool.view(x.shape[0], -1)
        return self.fc(pool)

model = MyEfficientNet()

## Loss function

In [None]:
a1 = 1 / train_df["target"].mean()
a2 = 1 / (1 - train_df["target"].mean())

class WeightedFocalLoss(nn.Module):
    "Non weighted version of Focal Loss"
    def __init__(self, a1, a2, gamma=2):
        super().__init__()
        self.alpha = torch.tensor([a1, a2]).cuda()
        self.gamma = gamma

    def forward(self, inputs, targets):
        BCE_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction='none')
        targets = targets.type(torch.long)
        at = self.alpha.gather(0, targets.data.view(-1))
        pt = torch.exp(-BCE_loss)
        F_loss = at*(1-pt)**self.gamma * BCE_loss
        return F_loss.mean()
    
loss_fn = WeightedFocalLoss(a1, a2)

## Optimizer

In [None]:
blocks = []

for n,p in model.base.named_parameters():
    if p.requires_grad:
        if n.startswith("_blocks."):
            n = ".".join(n.split(".", maxsplit=2)[:2])
        else:
            n = n.split(".", maxsplit=1)[0]
        if n not in blocks:
            blocks.append(n)

blocks = ["base."+block for block in blocks]
blocks += ["fc"]
blocks = [block+"." for block in blocks]

In [None]:
lr_range = [1e-9, 1e-5]
mul = (lr_range[1] / lr_range[0]) ** (1/(len(blocks)-1))
lrs = [lr_range[0]*mul**i for i in range(len(blocks))]

param_list = []
for lr, block in zip(lrs, blocks):
    param_list.extend([{'params':p ,'lr':lr} for n,p in model.named_parameters() if n.startswith(block)])
optimizer = torch.optim.Adam(param_list)

lr_sched = lambda batch: 1.1**batch
scheduler = LambdaLR(optimizer, lr_lambda=[lr_sched]*len(param_list))

## Train

Save initial weights before LR finding

In [None]:
weights = []

for param in model.parameters():
    weights.append(param.clone())

In [None]:
lrs = []
losses = []
min_loss = 1e9

model.train()
model = model.to(device)
for x, y in tqdm(train_dl):
    x, y = x.to(device), y.to(device)
    optimizer.zero_grad()
    
    lrs.append(optimizer.param_groups[-1]['lr'])
    y_pred = model(x)
    loss = loss_fn(y_pred, y)
    loss.backward()
    optimizer.step()
    scheduler.step()
    losses.append(loss.detach().cpu().numpy())
    
    print(f"\rLoss: {loss:.4f}, lr {lrs[-1]:.6f}", end="")
    if loss < min_loss:
        min_loss = loss
    if loss > 20 * min_loss:
        break

In [None]:
plt.plot(lrs, losses)
plt.xscale('log')
plt.show()

Put original weights back into model.

In [None]:
for w, p in zip(weights, model.parameters()):
    p.data = w