In [None]:
# %%capture
# !curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!pip install timm
# !python pytorch-xla-env-setup.py --version 1.7

In [None]:
# Data preprocessing

import numpy as np 
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold


# Plotting

import matplotlib.pyplot as plt
import seaborn as sns


# ANN + ML

import torch
import torch.nn as nn
import lightgbm as lgb
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import CosineAnnealingLR,CosineAnnealingWarmRestarts, ReduceLROnPlateau


# Image preprocessing

import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2


# Miscellanous

import os
import gc
import sys
import math
import time
import pickle
import random
from tqdm.auto import tqdm
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm
import warnings


warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# CFG

In [None]:
# general global variables
DATA_PATH = "../input/petfinder-pawpularity-score"
TRAIN_PATH = "../input/petfinder-pawpularity-score/train"
TEST_PATH = "../input/petfinder-pawpularity-score/test"

CFG = {
    'batch_size' : 16,
    'num_workers' : 4,
    'n_fold': 5,
    'epochs' : 4,
    'lr' : 1e-4,
    'gamma':0.7,
    'img_size' : 384,
    'model_name': 'vit_base_patch32_384',
    'target_col':'Pawpularity',
    'train':True,
    'apex': False,
    'trn_fold':[0,1,2,3,4],
    'print_freq' : 100,
    'gradient_accumulation_steps':1,
    'device': 'GPU',
    'freeze_epo' : 0, # GradualWarmupSchedulerV2
    'warmup_epo' : 1, # GradualWarmupSchedulerV2
    'cosine_epo' : 19, # GradualWarmupSchedulerV2
    'epochs' : 20, # [freeze_epo, + warmup_epo + cosine_epo]
    'scheduler':'CosineAnnealingLR',
    'T_0' : 10,
    'T_max': 3,
    'min_lr':1e-6,
    'weight_decay':1e-6,
    'max_grad_norm':1000,
    'seed' : 42,
    'nprocs':1
}

In [None]:
if CFG['device'] == 'TPU':
    import ignite.distributed as idist
    os.system('curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py')
    os.system('python pytorch-xla-env-setup.py --version nightly --apt-packages libomp5 libopenblas-dev')
    os.system('export XLA_USE_BF16=1')
    os.system('export XLA_TENSOR_ALLOCATOR_MAXSIZE=100000000')
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.parallel_loader as pl
    import torch_xla.distributed.xla_multiprocessing as xmp
    import torch_xla
    import torch_xla.core.xla_model as xm
    import torch_xla.distributed.parallel_loader as pl
    import torch_xla.distributed.xla_multiprocessing as xmp
    
    CFG['lr'] = CFG['lr'] * CFG['nprocs']
    CFG['batch_size'] = CFG['batch_size'] // CFG['nprocs']    
    
elif CFG['device'] == 'GPU' and CFG['apex']:
    from torch.cuda.amp import autocast, GradScaler

In [None]:
train_df = pd.read_csv(DATA_PATH + "/train.csv")
test_df = pd.read_csv(DATA_PATH  + "/test.csv")
train_df.head()

# Utils

In [None]:
def get_score(y_true, y_pred):
    score = mean_squared_error(y_true, y_pred, squared=False) # RMSE
    return score

def seed_everything(seed):
    """
    Seeds basic parameters for reproductibility of results
    
    Arguments:
        seed {int} -- Number of the seed
    """
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(seed=CFG['seed'])

# CV Split


In [None]:
train_df["fold"] = -1
skf = StratifiedKFold(n_splits=CFG['n_fold'])
train_df["bins"] = pd.cut(train_df["Pawpularity"], bins=10, labels=False)
target = train_df["bins"]

for fold, (train_idx, val_idx) in enumerate(skf.split(train_df, target)):
    train_df.loc[val_idx, 'fold'] = fold
    
# train_df = train_df.drop(["bins"], axis=1)
train_df['fold'] = train_df['fold'].astype(int)
train_df.groupby(['fold','bins']).size()

In [None]:
train_df.to_pickle('./train_fold.pkl')

# Dataset

In [None]:
class PawpularityDataset(Dataset):
    def __init__(self, df, augs=None):
        self.df = df
        self.augs = augs

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        image = self._load_image(self.df["Id"].iloc[index])

        # Apply image augmentations if available
        if self.augs:
            image = self.augs(image=image)["image"]

        return image, self.df["Pawpularity"].iloc[index]

    def _load_image(self, image_id):
        image = cv2.imread(f"{TRAIN_PATH}/{image_id}.jpg", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image

# Transforms

In [None]:
def get_train_augs():
    return A.Compose([A.RandomResizedCrop(CFG['img_size'],CFG['img_size'],scale = (0.85, 1.0)),
                         A.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
                          ToTensorV2()])


def get_valid_augs():
    return A.Compose([A.Resize(CFG['img_size'],CFG['img_size']),
                         A.Normalize(mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]),
                          ToTensorV2()])

In [None]:
train_dataset = PawpularityDataset(train_df, augs=get_train_augs())

for i in range(5):
    plt.figure(figsize=(4, 4))
    image, label = train_dataset[i]
    plt.imshow(image[0],cmap = 'gray')
    plt.title(f'label: {label}')
    plt.show()

# Model

In [None]:
class PawpularityModelCNN(nn.Module):
    def __init__(self, model_name = CFG['model_name'], pretrained = False):
    
        super().__init__()
        
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.fc = nn.Linear(self.features, 1)
        
    def feature(self, image):
        feature = self.model(image)
        return feature
    
    def forward(self, image):
        feature = self.feature(image)
        output = self.fc(feature)
        return output

class PawpularityModelViT(nn.Module):
    def __init__(self, model_name=CFG['model_name'], pretrained=False):

        super().__init__()

        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.features = self.model.head.in_features
        self.model.head = nn.Identity()
        self.fc = nn.Linear(self.features, 1)
        
    def feature(self, image):
        feature = self.model(image)
        return feature 
    

    def forward(self, image):
        feature  = self.feature(image)
        output = self.fc(feature)
        return output

# Loss

In [None]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self, yhat, y):
        loss = torch.sqrt(self.mse(yhat, y.float()) + self.eps)
        return loss

# Helper Function

In [None]:
# ====================================================
# Helper functions
# ====================================================
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (remain %s)' % (asMinutes(s), asMinutes(rs))

def train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device):
    if CFG['device'] == 'GPU':
        scaler = GradScaler()
        
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to train mode
    model.train()
    start = end = time.time()
    global_step = 0
    for step, (images, labels) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        if CFG['device'] == 'GPU':
            with autocast():
                y_preds = model(images)
                loss = criterion(y_preds.view(-1), labels)
                # record loss
                losses.update(loss.item(), batch_size)
                if CFG['gradient_accumulation_steps'] > 1:
                    loss = loss / CFG['gradient_accumulation_steps']
                scaler.scale(loss).backward()
                grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG['max_grad_norm'])
                if (step + 1) % CFG['gradient_accumulation_steps'] == 0:
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                    global_step += 1
        elif CFG['device'] == 'TPU':
            y_preds = model(images)
            loss = criterion(y_preds, labels)
            # record loss
            losses.update(loss.item(), batch_size)
            if CFG['gradient_accumulation_steps'] > 1:
                loss = loss / CFG['gradient_accumulation_steps']
            loss.backward()
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG['max_grad_norm'])
            if (step + 1) % CFG['gradient_accumulation_steps'] == 0:
                xm.optimizer_step(optimizer, barrier=True)
                optimizer.zero_grad()
                global_step += 1
                
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        
        if CFG['device'] == 'GPU':
            if (step % CFG['print_freq'] == 0) or (step == (len(train_loader)-1)):
                print('Epoch: [{0}][{1}/{2}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      'Grad: {grad_norm:.4f}  '
                      #'LR: {lr:.6f}'
                      .format(
                       epoch+1, step, len(train_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(train_loader)),
                       grad_norm=grad_norm,
                       #lr=scheduler.get_lr()[0],
                       ))
        elif CFG['device'] == 'TPU':
            if (step % CFG['print_freq'] == 0) or (step == (len(train_loader)-1)):
                xm.master_print('Epoch: [{0}][{1}/{2}] '
                                'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                                'Elapsed {remain:s} '
                                'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                                'Grad: {grad_norm:.4f}  '
                                #'LR: {lr:.6f}  '
                                .format(
                                epoch+1, step, len(train_loader), batch_time=batch_time,
                                data_time=data_time, loss=losses,
                                remain=timeSince(start, float(step+1)/len(train_loader)),
                                grad_norm=grad_norm,
                                #lr=scheduler.get_lr()[0],
                                ))
                
        
        print({f"[fold{fold}] loss": losses.val,
                           f"[fold{fold}] lr": scheduler.get_lr()[0]})
        
    return losses.avg


def valid_fn(valid_loader, model, criterion, device):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    scores = AverageMeter()
    # switch to evaluation mode
    model.eval()
    trues = []
    preds = []
    start = end = time.time()
    for step, (images, labels) in enumerate(valid_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        trues.append(labels.to('cpu').numpy())
        preds.append(y_preds.to('cpu').numpy())
        if CFG['gradient_accumulation_steps'] > 1:
            loss = loss / CFG['gradient_accumulation_steps']
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        if CFG['device'] == 'GPU':
            if step % CFG['print_freq'] == 0 or step == (len(valid_loader)-1):
                print('EVAL: [{0}/{1}] '
                      'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                      'Elapsed {remain:s} '
                      'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                      .format(
                       step, len(valid_loader), batch_time=batch_time,
                       data_time=data_time, loss=losses,
                       remain=timeSince(start, float(step+1)/len(valid_loader)),
                       ))
        elif CFG['device'] == 'TPU':
            if step % CFG['print_freq'] == 0 or step == (len(valid_loader)-1):
                xm.master_print('EVAL: [{0}/{1}] '
                                'Data {data_time.val:.3f} ({data_time.avg:.3f}) '
                                'Elapsed {remain:s} '
                                'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                                .format(
                                step, len(valid_loader), batch_time=batch_time,
                                data_time=data_time, loss=losses,
                                remain=timeSince(start, float(step+1)/len(valid_loader)),
                                ))

    predictions = np.concatenate(preds)
    return losses.avg, predictions

In [None]:
# ====================================================
# Train loop
# ====================================================
def train_loop(folds, fold):

    print(f"========== fold: {fold} training ==========")

    # ====================================================
    # loader
    # ====================================================
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index

    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)

    train_dataset = PawpularityDataset(train_folds, 
                                 augs=get_train_augs())
    valid_dataset = PawpularityDataset(valid_folds, 
                                 augs=get_valid_augs())

    
    train_loader = DataLoader(train_dataset, 
                              batch_size=CFG['batch_size'], 
                              shuffle=True, 
                              num_workers=CFG['num_workers'], pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG['batch_size'], 
                              shuffle=False, 
                              num_workers=CFG['num_workers'], pin_memory=True, drop_last=False)
    
    valid_labels = valid_folds[CFG['target_col']].values
    
    # ====================================================
    # scheduler 
    # ====================================================
    def get_scheduler(optimizer):
        if CFG['scheduler']=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG['factor'], patience=CFG['patience'], verbose=True, eps=CFG['eps'])
        elif CFG['scheduler']=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG['T_max'], eta_min=CFG['min_lr'], last_epoch=-1)
        elif CFG['scheduler']=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG['T_0'], T_mult=1, eta_min=CFG['min_lr'], last_epoch=-1)
        elif CFG['scheduler']=='GradualWarmupSchedulerV2':
            scheduler_cosine=torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, CFG['cosine_epo'])
            scheduler_warmup=GradualWarmupSchedulerV2(optimizer, multiplier=10, total_epoch=CFG['warmup_epo'], after_scheduler=scheduler_cosine)
            scheduler=scheduler_warmup
        return scheduler

    # ====================================================
    # model & optimizer
    # ====================================================
    if CFG['device'] == 'TPU':
        device = xm.xla_device()
        xm.set_rng_state(CFG['seed'], device)
    elif CFG['device'] == 'GPU':
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    
    def get_model(pretrained=False):
        if CFG['model_name'] =='vit_base_patch32_384':
            model = PawpularityModelViT(model_name=CFG['model_name'], pretrained=pretrained)
        elif CFG['model_name'] =='tf_efficientnet_b0_ns':
            model = PawpularityModelCNN(CFG['model_name'], pretrained=pretrained)
        return model
    

    model = get_model(pretrained=True)    
    model.to(device)

    optimizer = Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'], amsgrad=False)
    scheduler = get_scheduler(optimizer) 


    # ====================================================
    # loop 
    # ====================================================
    criterion = RMSELoss()
    print(f'Criterion: {criterion}')

    best_score = np.inf
    best_loss = np.inf
    
    for epoch in range(CFG['epochs']):
        
        start_time = time.time()
        
        # train
        if CFG['device'] == 'TPU':
            if CFG['nprocs'] == 1:
                avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
            elif CFG['nprocs'] == 8:
                para_train_loader = pl.ParallelLoader(train_loader, [device])
                avg_loss = train_fn(fold, para_train_loader.per_device_loader(device), model, criterion, optimizer, epoch, scheduler, device)
        elif CFG['device'] == 'GPU':
            avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
                
        # eval
        if CFG['device'] == 'TPU':
            if CFG['nprocs'] == 1:
                avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
            elif CFG['nprocs'] == 8:
                para_valid_loader = pl.ParallelLoader(valid_loader, [device])
                avg_val_loss, preds = valid_fn(para_valid_loader.per_device_loader(device), model, criterion, device)
                preds = idist.all_gather(torch.tensor(preds)).to('cpu').numpy()
                valid_labels = idist.all_gather(torch.tensor(valid_labels)).to('cpu').numpy()
        elif CFG['device'] == 'GPU':
            avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)
        
        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()
        elif isinstance(scheduler, GradualWarmupSchedulerV2):
            scheduler.step(epoch)

        score = get_score(valid_labels, preds)

        elapsed = time.time() - start_time

        if CFG['device'] == 'GPU':
            print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
            print(f'Epoch {epoch+1} - Score: {score:.4f}')
        elif CFG['device'] == 'TPU':
            if CFG['nprocs'] == 1:
                print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
                print(f'Epoch {epoch+1} - Score: {score:.4f}')
            elif CFG['nprocs'] == 8:
                xm.master_print(f'Epoch {epoch+1} - avg_train_loss: {avg_loss:.4f}  avg_val_loss: {avg_val_loss:.4f}  time: {elapsed:.0f}s')
                xm.master_print(f'Epoch {epoch+1} - Score: {score:.4f}')
        
        if score < best_score:
            best_score = score
            if CFG['device'] == 'GPU':
                print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(),'preds': preds},
                             './{}_fold{}_best_score.pth'.format(CFG['model_name'], fold))
            elif CFG['device']== 'TPU':
                if CFG['nprocs'] == 1:
                    print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                elif CFG['nprocs'] == 8:
                    xm.master_print(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
                torch.save({'model': model.state_dict(),'preds': preds}, 
                           './{}_fold{}_best_score.pth'.format(CFG['model_name'], fold))
    
    valid_folds['preds'] = torch.load('./{}_fold{}_best_score.pth'.format(CFG['model_name'],fold), 
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

In [None]:
model = PawpularityModelViT(pretrained = True)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"The model has {count_parameters(model):,} trainable parameters")

# Main


In [None]:
# ====================================================
# main
# ====================================================
def main():

    """
    Prepare: 1.train 
    """

    def get_result(result_df):
        preds = result_df['preds'].values
        labels = result_df[CFG['target_col']].values
        score = get_score(labels, preds)
        print(f'Score: {score:<.4f}')
    
    if CFG['train']:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG['n_fold']):
            if fold in CFG['trn_fold']:
                _oof_df = train_loop(train_df, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                print(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
                
                
        if CFG['nprocs'] != 8:
            # CV result
            print(f"========== CV ==========")
            get_result(oof_df)
            # save result
            oof_df.to_csv('./oof_df.csv', index=False)

In [None]:
main()

In [None]:
def get_features(test_loader, model, device):
    model.eval()
    features = []
    tk0 = tqdm(enumerate(test_loader), total = len(test_loader))
    for step, (images) in tk0:
        images = images.to(device)
        batch_size = images.size(0)
        with torch.no_grad():
            feature = model.feature(images)
        features.append(feature.to('cpu').numpy())
    features = np.concatenate(features)
    return features

In [None]:
class PawpularityDataset(Dataset):
    def __init__(self, df, augs=None):
        self.df = df
        self.augs = augs

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        image = self._load_image(self.df["Id"].iloc[index])

        # Apply image augmentations if available
        if self.augs:
            image = self.augs(image=image)["image"]

        return image

    def _load_image(self, image_id):
        image = cv2.imread(f"{TRAIN_PATH}/{image_id}.jpg", cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        return image

In [None]:
IMG_FEATURES = []
test_dataset = PawpularityDataset(train_df, augs=get_valid_augs())
test_loader = DataLoader(test_dataset, 
                         batch_size=CFG['batch_size'] * 2, 
                         shuffle=False, 
                         num_workers=CFG['num_workers'], pin_memory=True, drop_last=False)
for fold in range(CFG['n_fold']):
    model = PawpularityModelViT(pretrained=False)
    state = torch.load('./{}_fold{}_best_score.pth'.format(CFG['model_name'], fold), 
                       map_location=torch.device('cpu'))['model']
    model.load_state_dict(state)
    model.to(device)
    features = get_features(test_loader, model, device)
    IMG_FEATURES.append(features)
    del state; gc.collect()
    torch.cuda.empty_cache()

In [None]:
IMG_FEATURES[0].shape

In [None]:
def run_single_lightgbm(param, train, features, target, fold=0, categorical=[]):
    
    train[[f"img_{i}" for i in np.arange(768)]] = IMG_FEATURES[fold]
    
    trn_idx = train[train.fold != fold].index
    val_idx = train[train.fold == fold].index
    print(f'train size : {len(trn_idx)}  valid size : {len(val_idx)}')
    
    if categorical == []:
        trn_data = lgb.Dataset(train.iloc[trn_idx][features].values, label=target.iloc[trn_idx].values)
        val_data = lgb.Dataset(train.iloc[val_idx][features].values, label=target.iloc[val_idx].values)
    else:
        trn_data = lgb.Dataset(train.iloc[trn_idx][features], label=target.iloc[trn_idx].values, categorical_feature=categorical)
        val_data = lgb.Dataset(train.iloc[val_idx][features], label=target.iloc[val_idx].values, categorical_feature=categorical)
        
    num_round = 10000
    clf = lgb.train(param, 
                    trn_data,
                    num_round,
                    valid_sets=[trn_data, val_data],
                    verbose_eval=10,
                    early_stopping_rounds=10)
    print(f'Dumping model with pickle... lightgbm_fold{fold}.pkl')
    with open(f'./lightgbm_fold{fold}.pkl', 'wb') as fout:
        pickle.dump(clf, fout)
    
    oof = np.zeros(len(train))
    oof[val_idx] = clf.predict(train.iloc[val_idx][features], num_iteration=clf.best_iteration)
    score = get_score(target.iloc[val_idx].values, oof[val_idx])
    print(f"fold{fold} score: {score:<.5f}")
    
    fold_importance_df = pd.DataFrame()
    fold_importance_df["Feature"] = features
    fold_importance_df["importance"] = clf.feature_importance(importance_type='gain')
    fold_importance_df["fold"] = fold

    return oof, fold_importance_df, val_idx


def run_kfold_lightgbm(param, train, features, target, n_fold=5, categorical=[]):
    
    oof = np.zeros(len(train))
    feature_importance_df = pd.DataFrame()
    val_idxes = []
    
    for fold in range(n_fold):
        print(f"===== Fold {fold} =====")
        _oof, fold_importance_df, val_idx = run_single_lightgbm(param, 
                                                                train, features, target, 
                                                                fold=fold, categorical=categorical)
        oof += _oof
        feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
        val_idxes.append(val_idx)
    
    val_idxes = np.concatenate(val_idxes)
    score = get_score(target.iloc[val_idxes].values, oof[val_idxes])
    print(f"CV score: {score:<.5f}")
    
    return oof, feature_importance_df, val_idxes


def show_feature_importance(feature_importance_df):
    cols = (feature_importance_df[["Feature", "importance"]]
                .groupby("Feature").mean().sort_values(by="importance", ascending=False)[:50].index)
    best_features = feature_importance_df.loc[feature_importance_df.Feature.isin(cols)]
    plt.figure(figsize=(8, 16))
    sns.barplot(x="importance", y="Feature", data=best_features.sort_values(by="importance",ascending=False))
    plt.title('Features importance (averaged/folds)')
    plt.tight_layout()
    plt.savefig(f'./feature_importance_df_lightgbm.png')

In [None]:
target = train_df[CFG['target_col']]
features = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
            'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'] + [f"img_{i}" for i in np.arange(768)]

lgb_param = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'learning_rate': 0.01,
    'seed': 42,
    'max_depth': -1,
    'min_data_in_leaf': 10,
    'verbosity': -1,
}

oof, feature_importance_df, _ = run_kfold_lightgbm(lgb_param, 
                                                   train_df, features, target, 
                                                   n_fold=CFG['n_fold'], categorical=[])

show_feature_importance(feature_importance_df)
feature_importance_df.to_csv(f'./feature_importance_df.csv', index=False)

In [None]:
train_df['pred'] = oof
score = get_score(train_df['Pawpularity'].values, train_df['pred'].values)
print(f"CV: {score:<.5f}")
train_df[['Id', 'Pawpularity', 'pred']].to_pickle('./oof.pkl')
