In [None]:
!pip install timm

In [None]:
import sys
from timm import create_model
from timm.data.mixup import Mixup
from fastai.vision.all import *
from sklearn.model_selection import StratifiedKFold
import gc
from sklearn.metrics import mean_squared_error

seed=1
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

BATCH_SIZE = 16
resize = 288
num_workers = 2
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
dataset_path = Path('../input/petfinder-pawpularity-score')

In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')

train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) 
test_df = pd.read_csv(dataset_path/'test.csv')
test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
train_df['norm_score'] = train_df['Pawpularity']/100
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')

train_df.head()

In [None]:
len_df = len(train_df)
print(f"There are {len_df} images")

train_df['Pawpularity'].hist(figsize = (10, 5))
print(f"The mean Pawpularity score is {train_df['Pawpularity'].mean()}")
print(f"The median Pawpularity score is {train_df['Pawpularity'].median()}")
print(f"The standard deviation of the Pawpularity score is {train_df['Pawpularity'].std()}")

print(f"There are {len(train_df['Pawpularity'].unique())} unique values of Pawpularity score")

train_df['norm_score'] = train_df['Pawpularity']/100
train_df['norm_score']

im = Image.open(train_df['path'][1])
width, height = im.size
print(width,height)

num_bins = int(np.floor(1+np.log2(len(train_df))))
num_bins

train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)
train_df['bins'].hist()

In [None]:
train_df['fold'] = -1

N_FOLDS = 10
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')
train_df.fold.value_counts().plot.bar()
train_df[train_df['fold']==0]['bins'].value_counts()
train_df[train_df['fold']==1]['bins'].value_counts()

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

def get_data(folds):

    train_df_f = train_df.copy()
    train_df_f['is_valid'] = (train_df_f['fold'] == folds)
    
    splitter = RandomSplitter(0.2)
    splitter = IndexSplitter(splitter(range(len(train_df)))[1])
    dls = DataBlock(blocks=(ImageBlock, RegressionBlock),
                get_x=ColReader('path'),
                get_y=ColReader('norm_score'),
                splitter=splitter,
                item_tfms=Resize(resize), 
               )
    
    paw_dls = dls.dataloaders(train_df_f, 
                          bs=BATCH_SIZE,
                          num_workers=num_workers,
                          seed=seed)
    
    return paw_dls, splitter


def get_learner(fold_num):
    data, splitter = get_data(fold_num)
     
    model = create_model('resnet18', pretrained=True, num_classes=data.c)

    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse, 
                    cbs=[MixUp(0.2)]
                    ).to_fp16()
    
    return learn, splitter

In [None]:
all_preds = []
train_df['pred'] = -1

for i in range(7,8):

        print(f'Fold {i} results')

        learn, splitter = get_learner(fold_num=i)

        learn.fit_one_cycle(1, 2e-5, cbs=[SaveModelCallback(), EarlyStoppingCallback(monitor='petfinder_rmse', comp=np.less, patience=1)]) 

        learn.recorder.plot_loss()
        
        learn.save(f'model_fold_{i}')

        dls = DataBlock(blocks=(ImageBlock, RegressionBlock),
                    get_x=ColReader('path'),
                    get_y=ColReader('norm_score'),
                    splitter=RandomSplitter(0.2),
                    item_tfms=Resize(resize), 
                   )

        paw_dls = dls.dataloaders(train_df, 
                              bs=BATCH_SIZE,
                              num_workers=num_workers,
                          seed=seed)
        
        test_dl = paw_dls.test_dl(test_df)

        preds, _ = learn.tta(dl=test_dl, n=5, beta=0)

        all_preds.append(preds)
        
        val_idx = splitter(range(len(train_df)))[1]
        val_df = train_df.loc[val_idx]
        val_pred, _ = learn.tta(dl=paw_dls.test_dl(val_df), n=5, beta=0)
        print(val_df['Pawpularity'][:5], val_pred[:5])
        score = mean_squared_error(val_df['Pawpularity'], val_pred*100, squared=False)
        print(f'Fold {i} | Score: {score}')
        train_df.loc[val_idx, 'pred'] = val_pred*100

        del learn

        torch.cuda.empty_cache()

        gc.collect()
        
        cv_score = mean_squared_error(train_df.loc[train_df['pred']!=-1, 'Pawpularity'], 
                                      train_df.loc[train_df['pred']!=-1, 'pred'], squared=False)
        print(f'CV Score: {cv_score}')