kaggleコンペpetfinder　https://www.kaggle.com/c/petfinder-adoption-prediction
をfastaiやpytorch Lightningのような
ラッパーライブラリなしで実装したノートブック

参照
https://www.kaggle.com/yasufuminakama/petfinder-efficientnet-b0-starter-training

# Training efficientnet_b0

## 準備

### Library

In [1]:
import os
import gc
import sys
import math
import time
import random
import shutil
import seaborn as sns
import pickle
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as transforms
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
from pytorch_lightning.loggers.wandb import WandbLogger
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM

sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm
import lightgbm as lgb

from torch.cuda.amp import autocast, GradScaler

import warnings
warnings.filterwarnings('ignore')
torch.backends.cudnn.benchmark = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

OUTPUT_DIR = './my_model/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

### Config

In [4]:
class CFG:
    apex=False
    debug=False
    print_freq=10
    num_workers=4
    size=512
    model_name='tf_efficientnet_b0_ns'
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=3
    #factor=0.2 # ReduceLROnPlateau
    #patience=4 # ReduceLROnPlateau
    #eps=1e-6 # ReduceLROnPlateau
    T_max=3 # CosineAnnealingLR
    #T_0=3 # CosineAnnealingWarmRestarts
    lr=1e-4
    min_lr=1e-6
    batch_size=16
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    target_size=1
    target_col='Pawpularity'
    n_fold=2
    trn_fold=[0, 1]
    train=True
    grad_cam=True
    
if CFG.debug:
    CFG.epochs = 1
    train = train.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)

### wandb

In [None]:
# ====================================================
# wandb
# ====================================================
import wandb
wandb.login

def class2dict(f):
    return dict((name, getattr(f, name)) for name in dir(f) if not name.startswith('__'))

run = wandb.init(project="petfinder_myproject", 
                 config=class2dict(CFG),
                 job_type="train")

## 前処理

### deta load

In [2]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

def get_train_file_path(image_id):
    return "../input/petfinder-pawpularity-score/train/{}.jpg".format(image_id)

def get_test_file_path(image_id):
    return "../input/petfinder-pawpularity-score/test/{}.jpg".format(image_id)

train['file_path'] = train['Id'].apply(get_train_file_path)
test['file_path'] = test['Id'].apply(get_test_file_path)

display(train.head())
display(test.head())

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity,file_path
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63,../input/petfinder-pawpularity-score/train/000...
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42,../input/petfinder-pawpularity-score/train/000...
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28,../input/petfinder-pawpularity-score/train/001...
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15,../input/petfinder-pawpularity-score/train/001...
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72,../input/petfinder-pawpularity-score/train/001...


Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,file_path
0,4128bae22183829d2b5fea10effdb0c3,1,0,1,0,0,1,1,0,0,1,0,1,../input/petfinder-pawpularity-score/test/4128...
1,43a2262d7738e3d420d453815151079e,0,1,0,0,0,0,1,1,0,0,0,0,../input/petfinder-pawpularity-score/test/43a2...
2,4e429cead1848a298432a0acad014c9d,0,0,0,1,0,1,1,1,0,1,1,1,../input/petfinder-pawpularity-score/test/4e42...
3,80bc3ccafcc51b66303c2c263aa38486,1,0,1,0,0,0,0,0,0,0,1,0,../input/petfinder-pawpularity-score/test/80bc...
4,8f49844c382931444e68dffbe20228f4,1,1,1,0,1,1,0,1,0,1,1,0,../input/petfinder-pawpularity-score/test/8f49...


### CV Split

In [None]:
num_bins = int(np.floor(1+np.log2(len(train))))
train["bins"] = pd.cut(train[CFG.target_col], bins=num_bins, labels=False)
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train, train["bins"])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby(['fold', "bins"]).size())
train.to_pickle(OUTPUT_DIR+'train.pkl')

### Function

In [7]:
#seed値を固定
def set_seed(seed =42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic =True
set_seed(seed=CFG.seed)

def get_transforms(*, data):
    if data == 'train':
        return transforms.Compose([
            transforms.RandomResizedCrop(CFG.size, CFG.size, scale=(0.85, 1.0)),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return transforms.Compose([
            transforms.Resize(CFG.size, CFG.size),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

### Dataset

In [6]:
class LoadDataset(Dataset):
    def __init__(self,df,transform =None,isTrain = True):
        self.df = df
        self.file_names = df['file_path'].values
        if(isTrain):
            self.labels = df[CFG.target_col].values
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx,isTrain = True):
        file_path = self.file_names[idx]
        #cv2は画像読み込みなどに使われるライブラリ
        image = cv2.imread(file_path)
        #色空間を変換
        #TODO 変換しない場合を実験
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            image = self.transform(image=image)['image']
        if(isTrain):
            label = torch.tensor(self.labels[idx]).float()
            return image, label
        else:
            return image

In [None]:
train_dataset = LoadDataset(train, transform=get_transforms(data='train'))

## model

### model

In [8]:
class Model(nn.Module):
    def __init__(self, cfg, pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(self.cfg.model_name, pretrained=pretrained)
        self.n_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.fc = nn.Linear(self.n_features, self.cfg.target_size)

    def feature(self, image):
        feature = self.model(image)
        return feature
        
    def forward(self, image):
        feature = self.feature(image)
        output = self.fc(feature)
        return output

### Loss

In [None]:
class RMSELoss(nn.Module):
    def __init__(self, eps=1e-6):
        super().__init__()
        self.mse = nn.MSELoss()
        self.eps = eps

    def forward(self, yhat, y):
        loss = torch.sqrt(self.mse(yhat, y) + self.eps)
        return loss

### function



In [9]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def init_logger(log_file=OUTPUT_DIR+'train.log'):
    from logging import getLogger, INFO, FileHandler,  Formatter,  StreamHandler
    logger = getLogger(__name__)
    logger.setLevel(INFO)
    handler1 = StreamHandler()
    handler1.setFormatter(Formatter("%(message)s"))
    handler2 = FileHandler(filename=log_file)
    handler2.setFormatter(Formatter("%(message)s"))
    logger.addHandler(handler1)
    logger.addHandler(handler2)
    return logger

LOGGER = init_logger()

def get_scheduler(optimizer):
        if CFG.scheduler=='ReduceLROnPlateau':
            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=CFG.factor, patience=CFG.patience, verbose=True, eps=CFG.eps)
        elif CFG.scheduler=='CosineAnnealingLR':
            scheduler = CosineAnnealingLR(optimizer, T_max=CFG.T_max, eta_min=CFG.min_lr, last_epoch=-1)
        elif CFG.scheduler=='CosineAnnealingWarmRestarts':
            scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=CFG.T_0, T_mult=1, eta_min=CFG.min_lr, last_epoch=-1)
        return scheduler

def get_RMSE(y_true,y_pred):
    ## squared=FalseでRSCMになる。※TrueでMSE
    score = mean_squared_error(y_true= y_true ,y_pred=y_pred,squared=False)
    return score

#RSMEを出力
def get_result(result_df):
    preds = result_df['preds'].values
    labels = result_df[CFG.target_col].values
    score = get_RMSE(labels, preds)
    LOGGER.info(f'Score: {score:<.4f}')

def train_fn(fold,train_loader,model,criterion,optimizer,epoch,scheduler,device):
    model.train()
    losses = AverageMeter()
    global_step = 0
    for step,(images,labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)

        # record loss
        losses.update(loss.item(), batch_size)

        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), CFG.max_grad_norm)
        if (step + 1) % CFG.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            global_step += 1
        if step % CFG.print_freq == 0 or step == (len(train_loader)-1):
            print('Epoch: [{0}][{1}/{2}] '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  'Grad: {grad_norm:.4f}  '
                  'LR: {lr:.6f}  '
                  .format(epoch+1, step, len(train_loader),                         
                          loss=losses,
                          grad_norm=grad_norm,
                          lr=scheduler.get_lr()[0]))
        wandb.log({f"[fold{fold}] loss": losses.val,
                   f"[fold{fold}] lr": scheduler.get_lr()[0]})

def valid_fn(valid_loader, model, criterion, device):
    #推論モードに切り替え
    model.eval()
    losses = AverageMeter()
    preds = []
    for step, (images, labels) in enumerate(valid_loader):
        images = images.to(device)
        labels = labels.to(device)
        batch_size = labels.size(0)
        # compute loss
        with torch.no_grad():
            y_preds = model(images)
        loss = criterion(y_preds.view(-1), labels)
        losses.update(loss.item(), batch_size)
        # record accuracy
        preds.append(y_preds.to('cpu').numpy())
        if CFG.gradient_accumulation_steps > 1:
            loss = loss / CFG.gradient_accumulation_steps
        if step % CFG.print_freq == 0 or step == (len(valid_loader)-1):
            print('EVAL: [{0}/{1}] '
                  'Loss: {loss.val:.4f}({loss.avg:.4f}) '
                  .format(step, len(valid_loader),
                          loss=losses,
                          ))
    predictions = np.concatenate(preds)
    return losses.avg, predictions


### train loop

In [10]:
def train_loop(folds, fold):
    LOGGER.info(f"========== fold: {fold} training ==========")

    #dataset
    trn_idx = folds[folds['fold'] != fold].index
    val_idx = folds[folds['fold'] == fold].index
    train_folds = folds.loc[trn_idx].reset_index(drop=True)
    valid_folds = folds.loc[val_idx].reset_index(drop=True)
    valid_labels = valid_folds[CFG.target_col].values
    train_dataset = LoadDataset(train_folds, transform=get_transforms(data='train'))
    valid_dataset = LoadDataset(valid_folds, transform=get_transforms(data='train'))

    #dataloader
    train_loader = DataLoader(train_dataset,
                              batch_size=CFG.batch_size, 
                              shuffle=True, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, 
                              batch_size=CFG.batch_size * 2, #TODO why
                              shuffle=False, 
                              num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
    
    #model
    model = Model(CFG, pretrained=True)
    model.to(device)
    optimizer = Adam(model.parameters(),lr= CFG.lr,weight_decay=CFG.weight_decay,amsgrad=False)
    scheduler = get_scheduler(optimizer)
    criterion = RMSELoss()

    #train loop
    best_score = np.inf
    best_loss = np.inf
    for epoch in range(CFG.epochs):
        # train
        avg_loss = train_fn(fold, train_loader, model, criterion, optimizer, epoch, scheduler, device)
        
        # validation
        avg_val_loss, preds = valid_fn(valid_loader, model, criterion, device)

        if isinstance(scheduler, ReduceLROnPlateau):
            scheduler.step(avg_val_loss)
        elif isinstance(scheduler, CosineAnnealingLR):
            scheduler.step()
        elif isinstance(scheduler, CosineAnnealingWarmRestarts):
            scheduler.step()

        # scoring
        score = get_RMSE(valid_labels, preds)
        LOGGER.info(f'Epoch {epoch+1} - Score: {score:.4f}')
        wandb.log({f"[fold{fold}] epoch": epoch+1, 
                   f"[fold{fold}] avg_train_loss": avg_loss, 
                   f"[fold{fold}] avg_val_loss": avg_val_loss,
                   f"[fold{fold}] score": score})
        if score < best_score:
            best_score = score
            LOGGER.info(f'Epoch {epoch+1} - Save Best Score: {best_score:.4f} Model')
            torch.save({'model': model.state_dict(), 
                        'preds': preds},
                        OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth')
        valid_folds['preds'] = torch.load(OUTPUT_DIR+f'{CFG.model_name}_fold{fold}_best.pth', 
                                      map_location=torch.device('cpu'))['preds']

    return valid_folds

## main

In [None]:
def main():
    if CFG.train:
        # train 
        oof_df = pd.DataFrame()
        for fold in range(CFG.n_fold):
            if fold in CFG.trn_fold:
                _oof_df = train_loop(train, fold)
                oof_df = pd.concat([oof_df, _oof_df])
                LOGGER.info(f"========== fold: {fold} result ==========")
                get_result(_oof_df)
        LOGGER.info(f"========== CV ==========")
        get_result(oof_df)
        #結果を保存
        oof_df.to_csv(OUTPUT_DIR+'oof_df.csv', index=False)
    wandb.finish()
if __name__ == '__main__':
    main()

# LGB [training]

## 設定

### directory設定

In [11]:
import os

OUTPUT_DIR = './my_model'
#TODO
MODEL_DIR = '../my_model/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

### Config

In [12]:

class CFG:
    num_workers=4
    size=512
    batch_size=32
    model_name='tf_efficientnet_b0_ns'
    seed=42
    target_size=1
    target_col='Pawpularity'
    n_fold=2

### CV split

In [13]:
train = pd.read_pickle('../my_model/train.pkl')

display(train.groupby(['fold', "bins"]).size())

fold  bins
0     0        165
      1        209
      2        551
      3       1014
      4        941
      5        650
      6        420
      7        267
      8        203
      9        137
      10        99
      11        70
      12        51
      13       179
1     0        165
      1        209
      2        550
      3       1015
      4        942
      5        649
      6        419
      7        266
      8        203
      9        137
      10        99
      11        70
      12        52
      13       180
dtype: int64

## Model

### function

In [16]:
def get_features(test_loader, model, device):
    model.eval()
    features = []
    tk0 = tqdm(enumerate(test_loader), total=len(test_loader))
    for step, (images) in tk0:
        images = images.to(device)
        batch_size = images.size(0)
        with torch.no_grad():
            feature = model.feature(images)
        features.append(feature.to('cpu').numpy())
    features = np.concatenate(features)
    return features

def run_single_lightgbm(param, train, features, target, fold=0, categorical=[]):
    #dataload
    train[[f"img_{i}" for i in np.arange(1280)]] = IMG_FEATURES[fold]
    trn_idx = train[train.fold != fold].index
    val_idx = train[train.fold == fold].index
    LOGGER.info(f'train size : {len(trn_idx)}  valid size : {len(val_idx)}')
    if categorical == []:
        trn_data = lgb.Dataset(train.iloc[trn_idx][features].values, label=target.iloc[trn_idx].values)
        val_data = lgb.Dataset(train.iloc[val_idx][features].values, label=target.iloc[val_idx].values)
    else:
        trn_data = lgb.Dataset(train.iloc[trn_idx][features], label=target.iloc[trn_idx].values, categorical_feature=categorical)
        val_data = lgb.Dataset(train.iloc[val_idx][features], label=target.iloc[val_idx].values, categorical_feature=categorical)
    num_round = 10000

    #train
    clf = lgb.train(param, 
                    trn_data,
                    num_round,
                    valid_sets=[trn_data, val_data],
                    verbose_eval=10,
                    early_stopping_rounds=10)
    LOGGER.info(f'Dumping model with pickle... lightgbm_fold{fold}.pkl')
    with open(OUTPUT_DIR+f'lightgbm_fold{fold}.pkl', 'wb') as fout:
        #モデルをシリアライズ化して保存
        pickle.dump(clf, fout)
    
    #予測
    oof = np.zeros(len(train))
    oof[val_idx] = clf.predict(train.iloc[val_idx][features], num_iteration=clf.best_iteration)
    score = get_RMSE(target.iloc[val_idx].values, oof[val_idx])
    LOGGER.info(f"fold{fold} score: {score:<.5f}")
    
    #future_importance
    fold_importance_df = pd.DataFrame()
    fold_importance_df["Feature"] = features
    fold_importance_df["importance"] = clf.feature_importance(importance_type='gain')
    fold_importance_df["fold"] = fold

    return oof, fold_importance_df, val_idx


def run_kfold_lightgbm(param, train, features, target, n_fold=5, categorical=[]):
    oof = np.zeros(len(train))
    feature_importance_df = pd.DataFrame()
    val_idxes = []
    
    for fold in range(n_fold):
        LOGGER.info(f"===== Fold {fold} =====")
        _oof, fold_importance_df, val_idx = run_single_lightgbm(param, 
                                                                train, features, target, 
                                                                fold=fold, categorical=categorical)
        oof += _oof
        feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
        val_idxes.append(val_idx)
    
    val_idxes = np.concatenate(val_idxes)
    score = get_RMSE(target.iloc[val_idxes].values, oof[val_idxes])
    LOGGER.info(f"CV score: {score:<.5f}")
    
    return oof, feature_importance_df, val_idxes


def show_feature_importance(feature_importance_df):
    cols = (feature_importance_df[["Feature", "importance"]]
                .groupby("Feature").mean().sort_values(by="importance", ascending=False)[:50].index)
    best_features = feature_importance_df.loc[feature_importance_df.Feature.isin(cols)]
    plt.figure(figsize=(8, 16))
    sns.barplot(x="importance", y="Feature", data=best_features.sort_values(by="importance",ascending=False))
    plt.title('Features importance (averaged/folds)')
    plt.tight_layout()
    plt.savefig(OUTPUT_DIR+'feature_importance_df_lightgbm.png')

In [17]:
IMG_FEATURES = []
test_dataset = LoadDataset(train, transform=get_transforms(data='valid'),isTrain=False)
test_loader = DataLoader(test_dataset, 
                         batch_size=CFG.batch_size * 2, 
                         shuffle=False, 
                         num_workers=CFG.num_workers, pin_memory=True, drop_last=False)
for fold in range(CFG.n_fold):
    model = Model(CFG, pretrained=False)
    state = torch.load(MODEL_DIR+f'{CFG.model_name}_fold{fold}_best.pth', 
                       map_location=torch.device('cpu'))['model']
    model.load_state_dict(state)
    model.to(device)
    features = get_features(test_loader, model, device)
    IMG_FEATURES.append(features)
    del state; gc.collect()
    torch.cuda.empty_cache()

  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]


  0%|          | 0/155 [00:00<?, ?it/s]

  0%|          | 0/155 [00:00<?, ?it/s]


In [18]:
target = train['Pawpularity']
features = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
            'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'] + [f"img_{i}" for i in np.arange(1280)]

lgb_param = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'learning_rate': 0.01,
    'seed': 42,
    'max_depth': -1,
    'min_data_in_leaf': 10,
    'verbosity': -1,
}

oof, feature_importance_df, _ = run_kfold_lightgbm(lgb_param, 
                                                   train, features, target, 
                                                   n_fold=5, categorical=[])

===== Fold 0 =====
train size : 4956  valid size : 4956
Dumping model with pickle... lightgbm_fold0.pkl


Training until validation scores don't improve for 10 rounds
[10]	training's rmse: 20.5749	valid_1's rmse: 20.5689
[20]	training's rmse: 20.5494	valid_1's rmse: 20.5641
[30]	training's rmse: 20.5276	valid_1's rmse: 20.5618
[40]	training's rmse: 20.51	valid_1's rmse: 20.5615
Early stopping, best iteration is:
[36]	training's rmse: 20.5166	valid_1's rmse: 20.5614


ValueError: DataFrame.dtypes for data must be int, float or bool.
Did not expect the data types in the following fields: img_0, img_1, img_2, img_3, img_4, img_5, img_6, img_7, img_8, img_9, img_10, img_11, img_12, img_13, img_14, img_15, img_16, img_17, img_18, img_19, img_20, img_21, img_22, img_23, img_24, img_25, img_26, img_27, img_28, img_29, img_30, img_31, img_32, img_33, img_34, img_35, img_36, img_37, img_38, img_39, img_40, img_41, img_42, img_43, img_44, img_45, img_46, img_47, img_48, img_49, img_50, img_51, img_52, img_53, img_54, img_55, img_56, img_57, img_58, img_59, img_60, img_61, img_62, img_63, img_64, img_65, img_66, img_67, img_68, img_69, img_70, img_71, img_72, img_73, img_74, img_75, img_76, img_77, img_78, img_79, img_80, img_81, img_82, img_83, img_84, img_85, img_86, img_87, img_88, img_89, img_90, img_91, img_92, img_93, img_94, img_95, img_96, img_97, img_98, img_99, img_100, img_101, img_102, img_103, img_104, img_105, img_106, img_107, img_108, img_109, img_110, img_111, img_112, img_113, img_114, img_115, img_116, img_117, img_118, img_119, img_120, img_121, img_122, img_123, img_124, img_125, img_126, img_127, img_128, img_129, img_130, img_131, img_132, img_133, img_134, img_135, img_136, img_137, img_138, img_139, img_140, img_141, img_142, img_143, img_144, img_145, img_146, img_147, img_148, img_149, img_150, img_151, img_152, img_153, img_154, img_155, img_156, img_157, img_158, img_159, img_160, img_161, img_162, img_163, img_164, img_165, img_166, img_167, img_168, img_169, img_170, img_171, img_172, img_173, img_174, img_175, img_176, img_177, img_178, img_179, img_180, img_181, img_182, img_183, img_184, img_185, img_186, img_187, img_188, img_189, img_190, img_191, img_192, img_193, img_194, img_195, img_196, img_197, img_198, img_199, img_200, img_201, img_202, img_203, img_204, img_205, img_206, img_207, img_208, img_209, img_210, img_211, img_212, img_213, img_214, img_215, img_216, img_217, img_218, img_219, img_220, img_221, img_222, img_223, img_224, img_225, img_226, img_227, img_228, img_229, img_230, img_231, img_232, img_233, img_234, img_235, img_236, img_237, img_238, img_239, img_240, img_241, img_242, img_243, img_244, img_245, img_246, img_247, img_248, img_249, img_250, img_251, img_252, img_253, img_254, img_255, img_256, img_257, img_258, img_259, img_260, img_261, img_262, img_263, img_264, img_265, img_266, img_267, img_268, img_269, img_270, img_271, img_272, img_273, img_274, img_275, img_276, img_277, img_278, img_279, img_280, img_281, img_282, img_283, img_284, img_285, img_286, img_287, img_288, img_289, img_290, img_291, img_292, img_293, img_294, img_295, img_296, img_297, img_298, img_299, img_300, img_301, img_302, img_303, img_304, img_305, img_306, img_307, img_308, img_309, img_310, img_311, img_312, img_313, img_314, img_315, img_316, img_317, img_318, img_319, img_320, img_321, img_322, img_323, img_324, img_325, img_326, img_327, img_328, img_329, img_330, img_331, img_332, img_333, img_334, img_335, img_336, img_337, img_338, img_339, img_340, img_341, img_342, img_343, img_344, img_345, img_346, img_347, img_348, img_349, img_350, img_351, img_352, img_353, img_354, img_355, img_356, img_357, img_358, img_359, img_360, img_361, img_362, img_363, img_364, img_365, img_366, img_367, img_368, img_369, img_370, img_371, img_372, img_373, img_374, img_375, img_376, img_377, img_378, img_379, img_380, img_381, img_382, img_383, img_384, img_385, img_386, img_387, img_388, img_389, img_390, img_391, img_392, img_393, img_394, img_395, img_396, img_397, img_398, img_399, img_400, img_401, img_402, img_403, img_404, img_405, img_406, img_407, img_408, img_409, img_410, img_411, img_412, img_413, img_414, img_415, img_416, img_417, img_418, img_419, img_420, img_421, img_422, img_423, img_424, img_425, img_426, img_427, img_428, img_429, img_430, img_431, img_432, img_433, img_434, img_435, img_436, img_437, img_438, img_439, img_440, img_441, img_442, img_443, img_444, img_445, img_446, img_447, img_448, img_449, img_450, img_451, img_452, img_453, img_454, img_455, img_456, img_457, img_458, img_459, img_460, img_461, img_462, img_463, img_464, img_465, img_466, img_467, img_468, img_469, img_470, img_471, img_472, img_473, img_474, img_475, img_476, img_477, img_478, img_479, img_480, img_481, img_482, img_483, img_484, img_485, img_486, img_487, img_488, img_489, img_490, img_491, img_492, img_493, img_494, img_495, img_496, img_497, img_498, img_499, img_500, img_501, img_502, img_503, img_504, img_505, img_506, img_507, img_508, img_509, img_510, img_511, img_512, img_513, img_514, img_515, img_516, img_517, img_518, img_519, img_520, img_521, img_522, img_523, img_524, img_525, img_526, img_527, img_528, img_529, img_530, img_531, img_532, img_533, img_534, img_535, img_536, img_537, img_538, img_539, img_540, img_541, img_542, img_543, img_544, img_545, img_546, img_547, img_548, img_549, img_550, img_551, img_552, img_553, img_554, img_555, img_556, img_557, img_558, img_559, img_560, img_561, img_562, img_563, img_564, img_565, img_566, img_567, img_568, img_569, img_570, img_571, img_572, img_573, img_574, img_575, img_576, img_577, img_578, img_579, img_580, img_581, img_582, img_583, img_584, img_585, img_586, img_587, img_588, img_589, img_590, img_591, img_592, img_593, img_594, img_595, img_596, img_597, img_598, img_599, img_600, img_601, img_602, img_603, img_604, img_605, img_606, img_607, img_608, img_609, img_610, img_611, img_612, img_613, img_614, img_615, img_616, img_617, img_618, img_619, img_620, img_621, img_622, img_623, img_624, img_625, img_626, img_627, img_628, img_629, img_630, img_631, img_632, img_633, img_634, img_635, img_636, img_637, img_638, img_639, img_640, img_641, img_642, img_643, img_644, img_645, img_646, img_647, img_648, img_649, img_650, img_651, img_652, img_653, img_654, img_655, img_656, img_657, img_658, img_659, img_660, img_661, img_662, img_663, img_664, img_665, img_666, img_667, img_668, img_669, img_670, img_671, img_672, img_673, img_674, img_675, img_676, img_677, img_678, img_679, img_680, img_681, img_682, img_683, img_684, img_685, img_686, img_687, img_688, img_689, img_690, img_691, img_692, img_693, img_694, img_695, img_696, img_697, img_698, img_699, img_700, img_701, img_702, img_703, img_704, img_705, img_706, img_707, img_708, img_709, img_710, img_711, img_712, img_713, img_714, img_715, img_716, img_717, img_718, img_719, img_720, img_721, img_722, img_723, img_724, img_725, img_726, img_727, img_728, img_729, img_730, img_731, img_732, img_733, img_734, img_735, img_736, img_737, img_738, img_739, img_740, img_741, img_742, img_743, img_744, img_745, img_746, img_747, img_748, img_749, img_750, img_751, img_752, img_753, img_754, img_755, img_756, img_757, img_758, img_759, img_760, img_761, img_762, img_763, img_764, img_765, img_766, img_767, img_768, img_769, img_770, img_771, img_772, img_773, img_774, img_775, img_776, img_777, img_778, img_779, img_780, img_781, img_782, img_783, img_784, img_785, img_786, img_787, img_788, img_789, img_790, img_791, img_792, img_793, img_794, img_795, img_796, img_797, img_798, img_799, img_800, img_801, img_802, img_803, img_804, img_805, img_806, img_807, img_808, img_809, img_810, img_811, img_812, img_813, img_814, img_815, img_816, img_817, img_818, img_819, img_820, img_821, img_822, img_823, img_824, img_825, img_826, img_827, img_828, img_829, img_830, img_831, img_832, img_833, img_834, img_835, img_836, img_837, img_838, img_839, img_840, img_841, img_842, img_843, img_844, img_845, img_846, img_847, img_848, img_849, img_850, img_851, img_852, img_853, img_854, img_855, img_856, img_857, img_858, img_859, img_860, img_861, img_862, img_863, img_864, img_865, img_866, img_867, img_868, img_869, img_870, img_871, img_872, img_873, img_874, img_875, img_876, img_877, img_878, img_879, img_880, img_881, img_882, img_883, img_884, img_885, img_886, img_887, img_888, img_889, img_890, img_891, img_892, img_893, img_894, img_895, img_896, img_897, img_898, img_899, img_900, img_901, img_902, img_903, img_904, img_905, img_906, img_907, img_908, img_909, img_910, img_911, img_912, img_913, img_914, img_915, img_916, img_917, img_918, img_919, img_920, img_921, img_922, img_923, img_924, img_925, img_926, img_927, img_928, img_929, img_930, img_931, img_932, img_933, img_934, img_935, img_936, img_937, img_938, img_939, img_940, img_941, img_942, img_943, img_944, img_945, img_946, img_947, img_948, img_949, img_950, img_951, img_952, img_953, img_954, img_955, img_956, img_957, img_958, img_959, img_960, img_961, img_962, img_963, img_964, img_965, img_966, img_967, img_968, img_969, img_970, img_971, img_972, img_973, img_974, img_975, img_976, img_977, img_978, img_979, img_980, img_981, img_982, img_983, img_984, img_985, img_986, img_987, img_988, img_989, img_990, img_991, img_992, img_993, img_994, img_995, img_996, img_997, img_998, img_999, img_1000, img_1001, img_1002, img_1003, img_1004, img_1005, img_1006, img_1007, img_1008, img_1009, img_1010, img_1011, img_1012, img_1013, img_1014, img_1015, img_1016, img_1017, img_1018, img_1019, img_1020, img_1021, img_1022, img_1023, img_1024, img_1025, img_1026, img_1027, img_1028, img_1029, img_1030, img_1031, img_1032, img_1033, img_1034, img_1035, img_1036, img_1037, img_1038, img_1039, img_1040, img_1041, img_1042, img_1043, img_1044, img_1045, img_1046, img_1047, img_1048, img_1049, img_1050, img_1051, img_1052, img_1053, img_1054, img_1055, img_1056, img_1057, img_1058, img_1059, img_1060, img_1061, img_1062, img_1063, img_1064, img_1065, img_1066, img_1067, img_1068, img_1069, img_1070, img_1071, img_1072, img_1073, img_1074, img_1075, img_1076, img_1077, img_1078, img_1079, img_1080, img_1081, img_1082, img_1083, img_1084, img_1085, img_1086, img_1087, img_1088, img_1089, img_1090, img_1091, img_1092, img_1093, img_1094, img_1095, img_1096, img_1097, img_1098, img_1099, img_1100, img_1101, img_1102, img_1103, img_1104, img_1105, img_1106, img_1107, img_1108, img_1109, img_1110, img_1111, img_1112, img_1113, img_1114, img_1115, img_1116, img_1117, img_1118, img_1119, img_1120, img_1121, img_1122, img_1123, img_1124, img_1125, img_1126, img_1127, img_1128, img_1129, img_1130, img_1131, img_1132, img_1133, img_1134, img_1135, img_1136, img_1137, img_1138, img_1139, img_1140, img_1141, img_1142, img_1143, img_1144, img_1145, img_1146, img_1147, img_1148, img_1149, img_1150, img_1151, img_1152, img_1153, img_1154, img_1155, img_1156, img_1157, img_1158, img_1159, img_1160, img_1161, img_1162, img_1163, img_1164, img_1165, img_1166, img_1167, img_1168, img_1169, img_1170, img_1171, img_1172, img_1173, img_1174, img_1175, img_1176, img_1177, img_1178, img_1179, img_1180, img_1181, img_1182, img_1183, img_1184, img_1185, img_1186, img_1187, img_1188, img_1189, img_1190, img_1191, img_1192, img_1193, img_1194, img_1195, img_1196, img_1197, img_1198, img_1199, img_1200, img_1201, img_1202, img_1203, img_1204, img_1205, img_1206, img_1207, img_1208, img_1209, img_1210, img_1211, img_1212, img_1213, img_1214, img_1215, img_1216, img_1217, img_1218, img_1219, img_1220, img_1221, img_1222, img_1223, img_1224, img_1225, img_1226, img_1227, img_1228, img_1229, img_1230, img_1231, img_1232, img_1233, img_1234, img_1235, img_1236, img_1237, img_1238, img_1239, img_1240, img_1241, img_1242, img_1243, img_1244, img_1245, img_1246, img_1247, img_1248, img_1249, img_1250, img_1251, img_1252, img_1253, img_1254, img_1255, img_1256, img_1257, img_1258, img_1259, img_1260, img_1261, img_1262, img_1263, img_1264, img_1265, img_1266, img_1267, img_1268, img_1269, img_1270, img_1271, img_1272, img_1273, img_1274, img_1275, img_1276, img_1277, img_1278, img_1279

### feature_importance

In [None]:
show_feature_importance(feature_importance_df)
feature_importance_df.to_csv(OUTPUT_DIR+f'feature_importance_df.csv', index=False)

### モデル出力

In [None]:
train['pred'] = oof
score = get_RMSE(train['Pawpularity'].values, train['pred'].values)
LOGGER.info(f"CV: {score:<.5f}")
train[['Id', 'Pawpularity', 'pred']].to_pickle(OUTPUT_DIR+'oof.pkl')