# PetFinder2021 simple Fastai Train

This is a simple Fastai Training Notebook. The models are trained on images only.

The Inference notebook can be found [here](https://www.kaggle.com/joatom/petfinder2021-simple-fastai-inference/).

Related Discussion [here](https://www.kaggle.com/c/petfinder-pawpularity-score/discussion/294822#1617055).

In [None]:
import pandas as pd
import numpy as np

import fastai

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import mean_squared_error

from tqdm import tqdm

from fastai.vision.all import *
from fastai.callback.all import *
import torchvision.models as torch_models

import os
import sys


In [None]:
fastai.__version__

Turn DEBUG off (`FALSE`) for the full run.

In [None]:
DEBUG = False

if DEBUG:
    N_SPLITS = 3
    EPOCHS = 3
else:
    N_SPLITS = 7
    EPOCHS = 10
SEEDS = [2021]

In [None]:
# load and shuffle
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')

if DEBUG:
    train = train.head(500)


In [None]:
train.head()

In [None]:
train['fold_crit'] = 0
train['fold_crit'] = pd.qcut(train.Pawpularity, q=14, labels=False)

train.fold_crit.hist()

In [None]:
target = 'Pawpularity'
fold_crit = 'fold_crit'
features = list(set(train.columns)-set(['Id','Pawpularity','fold_crit']+[target]+['kfold_'+str(s) for s in SEEDS]))

In [None]:
for seed in SEEDS:
    # apply abhisheks splitting technique
    skf = StratifiedKFold(n_splits = N_SPLITS, random_state = seed, shuffle = True)

    train['kfold_'+str(seed)] = -1

    for f, (train_idx, valid_idx) in enumerate(skf.split(X = train[features+[target]], y = train[fold_crit].values)):

        train.loc[valid_idx,'kfold_'+str(seed)] = f

train.groupby(f'kfold_{SEEDS[0]}')[target].count()

In [None]:
train['image_id'] = 'train/' + train['Id'] + '.jpg'
test['image_id'] = 'test/' + test['Id'] + '.jpg'
train.head()

In [None]:
train.Pawpularity=train.Pawpularity.astype('float')
train.info()

In [None]:
train.Pawpularity.hist(bins=20)

In [None]:
def run_fold(fold = 4, seed = SEEDS[0], verbose = True):
    print(f'Run with validation set = fold no. {fold}')
    
    # mark validation set
    train['is_demo_valid'] = False
    train.loc[train[f'kfold_{seed}'] == fold, 'is_demo_valid'] = True

    # define fastai dataloader
    dls = ImageDataLoaders.from_df(df =  train[['image_id', 'Pawpularity', 'is_demo_valid']], 
                                   path = '.', 
                                   folder = '../input/petfinder-pawpularity-score', 
                                   valid_col = 'is_demo_valid',
                                   y_block=RegressionBlock,
                                   item_tfms=CropPad(512), 
                                   batch_tfms=aug_transforms(size=224), 
                                   bs = 16)
    
    if verbose:
        dls.show_batch()
        
    learn = cnn_learner(dls, 
                        torch_models.resnext101_32x8d, 
                        metrics=[rmse],
                        y_range= (torch.Tensor([1,100]))
                       )
    
    if DEBUG:
        # mixed precision training
        learn = learn.to_fp16()
    
    if verbose:
        learn.lr_find()
     
    learn.fine_tune(EPOCHS, 1e-3, cbs=[MixUp(.4), CutMix()]) #
    oof_preds,_ = learn.get_preds()
    
    train.loc[train[train[f'kfold_{seed}']==fold].index,'oof'] = oof_preds
    print(f"rmse real: {mean_squared_error(train[train[f'kfold_{seed}']==fold]['Pawpularity'], train[train[f'kfold_{seed}']==fold]['oof'], squared = False):0.5f}")
    
    if verbose:
        learn.show_results()
        
    learn.export(f'export_fold_{fold}.pkl')
    
        
    return learn

In [None]:
#learn = run_fold(verbose=False)

In [None]:
for f in range(N_SPLITS):
    learn = run_fold(f, seed = SEEDS[0], verbose = False)

In [None]:

print(f"rmse: {mean_squared_error(train['Pawpularity'], train['oof'], squared = False):0.5f}")

In [None]:
train.to_csv('train_plus.csv',index=False)

In [None]:
train[['oof','Pawpularity']].plot.scatter('oof','Pawpularity')

In [None]:
train[['oof','Pawpularity']].hist()

In [None]:
train.head()