# Fastai training with Resnet50

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import fastai
from fastai import *
from fastai.vision import *
from fastai.vision.all import *
from fastai.data.all import *
from fastai.metrics import *
from fastai.imports import *
from fastai.basics import *

#img augmentation:
import torch
import torchvision
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.transforms import Normalize

import os, sys

# Loading, preparing data

Here I'm going to read in the csv files, and prepare the dataset in order to have the preferred structure for the training phase.

In [None]:
path = Path('../input/petfinder-pawpularity-score')
!dir {path}

In [None]:
sample_s = pd.read_csv(path/'sample_submission.csv')
sample_s.head()

In [None]:
data_train = pd.read_csv(path/'train.csv')
data_test = pd.read_csv(path/'test.csv')
data_train.head()

In [None]:
data_test.head()

In [None]:
data_train.info()

Looks like the dataset is not containing any null-valued record

In [None]:
# get multilabel col-names 
# from: https://stackoverflow.com/questions/38334296/reversing-one-hot-encoding-in-pandas

def change_labels_nolabel(df):
    # drop onehot cols:
    droplist = ['Subject Focus','Eyes','Face','Near','Action','Accessory','Group','Collage','Human','Occlusion','Info','Blur']

    return df.drop(columns=droplist, axis=1)


#Set target label for regression:
# (https://www.kaggle.com/tanlikesmath/petfinder-pawpularity-eda-fastai-starter)
def new_target_nolabel(data_train):
    data_train['Target_Value'] = data_train['Pawpularity'] / 100.
    return data_train

def get_suff_to_df_nolabel(df, test=False):
    
    if test==False:
        df['Id'] = df['Id'].apply(lambda x: f'{x}.jpg')
        df = df.reindex(columns=['Id','Pawpularity','Target_Value'])
    else:
        df['Id'] = df['Id'].apply(lambda x: f'{x}.jpg')
        df = df.reindex(columns=['Id'])

    return df


In [None]:
data_test = change_labels_nolabel(data_test)
data_test = get_suff_to_df_nolabel(data_test, test=True)
data_train = change_labels_nolabel(data_train)
data_train = new_target_nolabel(data_train)
data_train = get_suff_to_df_nolabel(data_train)

In [None]:
data_train.head()

In [None]:
data_test.head()

# Preparing dataloader

In [None]:
from torch.linalg import solve

bs=32
size=224

method = [ResizeMethod.Squish]#, ResizeMethod.Pad, ResizeMethod.Crop]
item_tfms = Resize(size,method)
batch_tfms = aug_transforms(do_flip = True, max_lighting=0.1, max_zoom=1.1, max_warp=0.1, max_rotate=20, p_affine=0.75, p_lighting=0.75)
#batch_tfms = setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])

data = ImageDataLoaders.from_df(data_train, path, valid_pct=0.2, fn_col=0, label_col=2, folder = 'train',
                                seed=999,
                                bs = bs,
                                num_workers=8,
                                y_block=RegressionBlock, #(ImageBlock, MultiCategoryBlock),
                                item_tfms=item_tfms,   
                                batch_tfms=batch_tfms.append(Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])) # normalize with imagenet stats
                               )
                                
#data.cuda()


In [None]:
# checking out the shape of training samples and labels
xshape, yshape = data.one_batch()
print(f'Input image shape: {xshape.shape}, target shape: {yshape.shape}.')

In [None]:
data.show_batch(figsize=(10,10))

# Training

In [None]:
cd /kaggle/working

In [None]:
# Definition of metrics
# by: https://www.kaggle.com/tanlikesmath/petfinder-pawpularity-eda-fastai-starter

def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
import torchvision.models as models

resnet_pytorch = models.resnet50

In [None]:
resnet_pytorch

In [None]:
model_dir = '/kaggle/working/'
learner = cnn_learner(data, resnet_pytorch, loss_func=BCEWithLogitsLossFlat(), opt_func=Adam, metrics=petfinder_rmse, model_dir=model_dir).to_fp16() # fp16 for faster calculation

In [None]:
learner.lr_find()

In [None]:
learner.fit_one_cycle(7, slice(3e-6, 6e-4), cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', patience=3)])

In [None]:
learner.save('model_1')

In [None]:
learner.lr_find(suggest_funcs=minimum)

In [None]:
learner.fit_one_cycle(7, slice(1e-5, 4e-3), cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', patience=3)])

In [None]:
learner.fit_one_cycle(10, slice(1e-5, 1e-2), cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', patience=3)])

In [None]:
learner.save('model_2')

In [None]:
learner.lr_find()

In [None]:
learner.fit_one_cycle(10, slice(1e-5, 1e-2), cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', patience=3)])

No improvement reached since model_2 3. epoch. So that should be the final model.

In [None]:
learner.load('model_2')

In [None]:
# resnet structure and parameters
learner.summary()

**First I tried with bs=64, I had 18,4 for RMSE
with bs=32 best RMSE result: 18.27**

# Inference

ref: https://www.kaggle.com/warotjanpinitrat/lovely-doggo-with-bonky-fastai-timm

In [None]:
data_test.head()

In [None]:
tst_files = get_image_files(path/"test")
len(tst_files)

In [None]:
tst_dl = learner.dls.test_dl(tst_files)

In [None]:
preds, _ = learner.get_preds(dl=tst_dl_2)

In [None]:
preds

In [None]:
all_preds = []
all_preds.append(preds)
np.mean(np.stack(all_preds*100))

In [None]:
preds = np.mean(np.stack(all_preds), axis=0)
data_test['Pawpularity'] = preds*100

In [None]:
data_test

In [None]:
data_test.to_csv('submission.csv',index=False)