In [None]:
import os
import gc
import sys
import math
import time
import random
import shutil
import seaborn as sns
import pickle
from pathlib import Path
from contextlib import contextmanager
from collections import defaultdict, Counter
import fastai
import torchvision.models as torch_models

import scipy as sp
import numpy as np
import pandas as pd

from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, GroupKFold, KFold

from tqdm.auto import tqdm
from functools import partial

import cv2
from PIL import Image
from IPython.core.display import display
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
import torchvision.models as models
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau

import albumentations as transforms
from albumentations.pytorch import ToTensorV2
from albumentations import ImageOnlyTransform
from fastai.vision.all import *

sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm
import lightgbm as lgb

from torch.cuda.amp import autocast, GradScaler

import warnings
warnings.filterwarnings('ignore')
torch.backends.cudnn.benchmark = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

OUTPUT_DIR = './my_model/'
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)



In [None]:
print(fastai.__version__)
print(device)

In [None]:
class CFG:
    apex=False
    print_freq=10
    num_workers=4
    size=384 ##モデルによって変える。
    model_name='vit_base_patch16_384' ##モデルによって変える
    scheduler='CosineAnnealingLR' # ['ReduceLROnPlateau', 'CosineAnnealingLR', 'CosineAnnealingWarmRestarts']
    epochs=3
    T_max=3 # CosineAnnealingLR
    lr=1e-4
    min_lr=1e-6
    batch_size=16
    weight_decay=1e-6
    gradient_accumulation_steps=1
    max_grad_norm=1000
    seed=42
    target_size=1
    target_col='Pawpularity'
    n_fold=2
    trn_fold=[0, 1]
    train=True
    grad_cam=True
    isTransFormer = True ##モデルによって変える
    DEBUG = True


In [None]:
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
train['image_id'] = 'train/' + train['Id'] + '.jpg'
test['image_id'] = 'test/' + test['Id'] + '.jpg'
train.head()
if CFG.DEBUG:
    CFG.epochs = 1
    train = train.sample(n=1000, random_state=CFG.seed).reset_index(drop=True)

In [None]:
num_bins = int(np.floor(1+np.log2(len(train))))
train["bins"] = pd.cut(train[CFG.target_col], bins=num_bins, labels=False)
Fold = StratifiedKFold(n_splits=CFG.n_fold, shuffle=True, random_state=CFG.seed)
for n, (train_index, val_index) in enumerate(Fold.split(train, train["bins"])):
    train.loc[val_index, 'fold'] = int(n)
train['fold'] = train['fold'].astype(int)
display(train.groupby(['fold', "bins"]).size())
train.to_pickle(OUTPUT_DIR+'train.pkl')

In [None]:
train.Pawpularity=train.Pawpularity.astype('float')

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))
def run_fold(fold = CFG.n_fold, seed = CFG.seed, verbose = True):
    print(f'Run with validation set = fold no. {fold}')
    
    # mark validation set
    train['is_demo_valid'] = False
    train.loc[train[f'kfold_{seed}'] == fold, 'is_demo_valid'] = True

    # define fastai dataloader
    dls = ImageDataLoaders.from_df(df =  train[['image_id', 'Pawpularity', 'is_demo_valid']], 
                                   path = '.', 
                                   folder = '../input/petfinder-pawpularity-score', 
                                   valid_col = 'is_demo_valid',
                                   y_block=RegressionBlock,
                                   item_tfms=CropPad(512), 
                                   batch_tfms=aug_transforms(size=224), 
                                   bs = CFG.batch_size)
    
    if verbose:
        dls.show_batch()
    if CFG.isTransFormer:
        model = timm.create_model('swin_large_patch4_window7_224', num_classes=dls.c)
        learn = Learner(dls, model, loss_func=BCEWithLogitsLossFlat(), metrics=AccumMetric(petfinder_rmse)).to_fp16()
    else:
        learn = cnn_learner(dls, 
                        torch_models.CFG.model_name, 
                        metrics=[rmse],
                        y_range= (torch.Tensor([1,100]))
                       )
    
    if CFG.DEBUG:
        # mixed precision training
        learn = learn.to_fp16()
    
    if verbose:
        learn.lr_find()
     
    learn.fine_tune(CFG.epochs, 1e-3, cbs=[MixUp(.4), CutMix()]) #
    oof_preds,_ = learn.get_preds()
    
    train.loc[train[train[f'kfold_{seed}']==fold].index,'oof'] = oof_preds
    score = mean_squared_error(train[train[f'kfold_{seed}']==fold]['Pawpularity'], train[train[f'kfold_{seed}']==fold]['oof'], squared = False)
    print(f"{score}:0.5f")
    wandb.log({f"[fold{fold}] score": score})
    if verbose:
        learn.show_results()
        
    learn.export(f'export_fold_{fold}.pkl')
    
        
    return learn

In [None]:
for f in range(CFG.n_fold):
    learn = run_fold(f, seed = CFG.seed, verbose = False)

In [None]:
print(f"rmse: {mean_squared_error(train['Pawpularity'], train['oof'], squared = False):0.5f}")

In [None]:
train.to_csv('train_plus.csv',index=False)

# interface

In [None]:
submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

test = submission.copy()

test['Id'] = 'test/' + test['Id'] + '.jpg'
test.head()

In [None]:
def inf_run(fold):
    learn = load_learner(fname = Path(f'../input/petfinder2021-fastai-train/export_fold_{fold}.pkl'), cpu=False)
    test_dl = learn.dls.test_dl(test)
    preds, _ = learn.get_preds(dl=test_dl) # tta
    
    return preds

In [None]:
submission['Pawpularity'] = 0

for fold in range(CFG.n_fold):
    preds = inf_run(fold)
    test[f'preds_{fold}'] = preds.numpy()[:,0]

In [None]:
test.head()

In [None]:
submission.Pawpularity = test[['preds_'+str(i) for i in range(7)]].mean(axis=1)
submission.to_csv('submission.csv', index = False)
submission.head()