In [None]:
import numpy as np
import os
import pandas as pd
from sklearn import datasets
from sklearn import model_selection
import gc
from fastai.vision.all import *

In [None]:
seed=999
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-transformer/swin_large_patch4_window7_224_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window7_224_22kto1k.pth'

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model

In [None]:
#best ensamble

In [None]:
set_seed(999, reproducible=True)
BATCH_SIZE = 8

Let's check what data is available to us:

In [None]:
dataset_path = Path('../input/petfinder-pawpularity-score/')
dataset_path.ls()

In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')
train_df.head()

In [None]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
train_df.head()

Okay, let's check how many images are available in the training dataset:

In [None]:
len_df = len(train_df)
print(f"There are {len_df} images")

Let's check the distribution of the Pawpularity Score:

In [None]:
train_df['Pawpularity'].hist(figsize = (10, 5))
print(f"The mean Pawpularity score is {train_df['Pawpularity'].mean()}")
print(f"The median Pawpularity score is {train_df['Pawpularity'].median()}")
print(f"The standard deviation of the Pawpularity score is {train_df['Pawpularity'].std()}")

In [None]:
print(f"There are {len(train_df['Pawpularity'].unique())} unique values of Pawpularity score")

Note that the Pawpularity score is an integer, so in addition to being a regression problem, it could also be treated as a 100-class classification problem. Alternatively, it can be treated as a binary classification problem if the Pawpularity Score is normalized between 0 and 1:

In [None]:
train_df['norm_score'] = train_df['Pawpularity']/100
train_df['norm_score']

Let's check an example image to see what it looks like:

In [None]:
im = Image.open(train_df['path'][1])
width, height = im.size
print(width,height)

In [None]:
im

## Data loading
After my quick 'n dirty EDA, let's load the data into fastai as DataLoaders objects. We're using the normalized score as the label. I use some fairly basic augmentations here.

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp '../input/swin-large-transformer/swin_large_patch4_window12_384_22kto1k.pth' '/root/.cache/torch/hub/checkpoints/swin_large_patch4_window12_384_22kto1k.pth'


In [None]:
#Sturges' rule
num_bins = int(np.floor(1+(3.3)*(np.log2(len(train_df)))))
# num_bins

In [None]:
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)
train_df['bins'].hist()

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_df['fold'] = -1


N_FOLDS = 5
strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')

train_df.fold.value_counts().plot.bar()

In [None]:
train_df[train_df['fold']==0].head()

In [None]:
train_df[train_df['fold']==0]['bins'].value_counts()

In [None]:
train_df[train_df['fold']==1]['bins'].value_counts()

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
def get_data_384(fold):
#     train_df_no_val = train_df.query(f'fold != {fold}')
#     train_df_val = train_df.query(f'fold == {fold}')
    
#     train_df_bal = pd.concat([train_df_no_val,train_df_val.sample(frac=1).reset_index(drop=True)])
    train_df_f = train_df.copy()
    # add is_valid for validation fold
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
#                                valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               shuffle=False,
                               item_tfms=Resize(384), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
    
    return dls

In [None]:
def get_data_224(fold):
#     train_df_no_val = train_df.query(f'fold != {fold}')
#     train_df_val = train_df.query(f'fold == {fold}')
    
#     train_df_bal = pd.concat([train_df_no_val,train_df_val.sample(frac=1).reset_index(drop=True)])
    train_df_f = train_df.copy()
    # add is_valid for validation fold
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
#                                valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               shuffle=False,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
    
    return dls

In [None]:
#Valid Kfolder size
the_data = get_data_384(0)
#assert (len(the_data.train) + len(the_data.valid)) == (len(train_df)//BATCH_SIZE)

In [None]:
def get_learner_384(fold_num):
    data = get_data_384(fold_num)
    
    model = create_model('swin_large_patch4_window12_384', pretrained=True, num_classes=data.c)

    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse)
    
    return learn

In [None]:
def get_learner_224(fold_num):
    data = get_data_224(fold_num)
    
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=data.c)

    learn = Learner(data, model, loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse)
    
    return learn

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')
test_df.head()

In [None]:
test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
#get_learner(fold_num=0).lr_find(end_lr=3e-2)

In [None]:
import gc

In [None]:
class eval_after_N_steps(Callback):
    
    def __init__(self,fold=None,n_step=None,start_epoch=0):
        self.counter = 0
        self.start_epoch = start_epoch
        self.n_step = n_step
        self.fold = fold
        self.best_rmse = 100000000
        
    def before_batch(self):
        self.counter = self.counter + 1
        if self.fold != None:
            if self.counter % self.n_step == 0 and int(self.counter/247) >= self.start_epoch :
                preds_list = []
                targ_list = []
                current_rmse_loss_list = []
                with torch.no_grad(): 
                    for xb,yb in learn.dls.valid:
                        preds = self.learn.model(xb)
                        current_rmse_loss_list.append(petfinder_rmse(preds,yb))
                    current_rmse_loss_array = np.array(current_rmse_loss_list,dtype='float')
                    current_rmse_loss = np.mean(current_rmse_loss_array)
                if current_rmse_loss < self.best_rmse:
                    self.best_rmse = current_rmse_loss
                    self.save(f'best_model_fold_{self.fold}')
                    print(f'best_rmse ----> {self.best_rmse}')


In [None]:
def petfinder_diff(input,target):
    return torch.sum((100*((input.flatten()-target)))**2)

In [None]:
len(os.listdir('../input/petfinder-pawpularity-score/test')) == 8

In [None]:
all_preds = []
import numpy as np
import joblib
# Both import methods supported
from cuml import Ridge
from cuml.linear_model import Ridge

if len(os.listdir('../input/petfinder-pawpularity-score/test')) == 8:
    debug = True
else:
    debug = False
    
for i in range(N_FOLDS):
    #swin 384
    print(f'Fold {i} results')
    print('swin_384')
    if i == 2:
        learn = get_learner_384(fold_num=i)    
        state = torch.load(f'../input/transformer384-fold2/best_model_fold_2.pth')  
    else:
        learn = get_learner_384(fold_num=i)    
        state = torch.load(f'../input/transformer384-fold{i}/models/best_model_fold_{i}.pth')          
    learn.model.load_state_dict(state['model'])
    #learn = learn.to_fp32()
    
    #learn.export(f'model_fold_{i}.pkl')
    #learn.save(f'model_fold_{i}.pkl')
    
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               shuffle=False,
                               item_tfms=Resize(384), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) 

    #val_preds, targs = learn.tta(n=10,beta=0.07)
    #print(f'rmse fold-{i}={rmse(100*val_preds,100*targs)}')
    #if len(os.listdir('../input/petfinder-pawpularity-score/test')) == 8:
    test_dl = dls.test_dl(test_df)
    test_dl_384 = dls.test_dl(test_df)
    (preds_test_384_tta,preds_test_384), _ = learn.tta(dl=test_dl_384, n=4,beta=None)

    ###########  ensambling svr-head-preds with swin384-preds #############  
    
    def get_activation(name):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook
    
    activation = {}
    fold_activation = []
    targs = []
    preds_test = np.array([])
    extra_test_features = test_dl.items.iloc[:,:-2].values    
    with torch.no_grad(): 
         for n,xb in enumerate(test_dl):
              xb = xb[0]
              learn.model.avgpool.register_forward_hook(get_activation('avgpool'))
              preds = learn.model(xb)
              preds_test =  np.concatenate((preds_test,preds.cpu().data.numpy().reshape(-1)),axis=0)
              if n == len(test_dl) - 1:
                 if xb.shape[0] == BATCH_SIZE:
                    fold_activation.append(activation['avgpool'].cpu().data.numpy())
                    fold_activation_array = np.array(fold_activation)
                    fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])
                    
                 else:
                    
                    if debug == True:
                        fold_activation.append(activation['avgpool'].cpu().data.numpy()) 
                        fold_activation_array = np.array(fold_activation)
                        fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])

                    else:
                        fold_activation_array = np.array(fold_activation)
                        fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])
                        last_batch_activations = activation['avgpool'].cpu().data.numpy().reshape(-1,fold_activation_array.shape[1])
                        fold_activation_array = np.concatenate((fold_activation_array,last_batch_activations),axis=0)
              else:
                  fold_activation.append(activation['avgpool'].cpu().data.numpy())
    X_test =  np.concatenate((fold_activation_array,extra_test_features),axis=1)             
    NN_preds = 1/(1 + np.exp(- preds_test))                         
    svr_model = joblib.load(f'../input/petfindder-swin-384-svr-training-with-extra-data/svr_head_model_swin384_fold{i}')
    svr_preds = svr_model.predict(X_test)
    svr_preds_head_swin384 = svr_preds
    ridge = joblib.load(f'../input/petfindder-swin-384-svr-training-with-extra-data/blender_model_for_svr_NN_swin384_fold{i}')
    X_test =  np.concatenate((svr_preds.reshape(-1,1),NN_preds.reshape(-1,1),extra_test_features),axis=1) 
    blender_svr_and_swin384_test_preds = ridge.predict(X_test)   
    

    del learn ,ridge, svr_model ,X_test ,NN_preds ,activation ,fold_activation ,extra_test_features ,preds_test
    torch.cuda.empty_cache()
    gc.collect()
   
    #swin 224
    print('without_svr_head')
    learn = get_learner_224(fold_num=i)    
    state = torch.load(f'../input/swintransformermodels/models/best_model_fold_{i}.pth')
    learn.model.load_state_dict(state['model'])
    learn.model.cuda()
    #learn = learn.to_fp32()
    #learn.export(f'model_fold_{i}.pkl')
    #learn.save(f'model_fold_{i}.pkl')
    
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=BATCH_SIZE, #pass in batch size
                               shuffle=False,
                               num_workers=8,
                               item_tfms=Resize(224), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) 
    #val_preds, targs = learn.tta(n=10,beta=0.07)
    #print(f'rmse fold-{i}={rmse(100*val_preds,100*targs)}')
    #if len(os.listdir('../input/petfinder-pawpularity-score/test')) == 8:
    test_dl = dls.test_dl(test_df)
    test_dl_224 = dls.test_dl(test_df)
    (preds_test_224_tta,preds_test_224), _ = learn.tta(dl=test_dl_224, n=4,beta=None)
    ################# ensambling svr-head preds and  swin-224 preds ##################   

    activation = {}
    fold_activation = []
    targs = []
    preds_test = np.array([])
    extra_test_features = test_dl.items.iloc[:,:-2].values    
    with torch.no_grad(): 
         for n,xb in enumerate(test_dl):
              xb = xb[0]
              learn.model.avgpool.register_forward_hook(get_activation('avgpool'))
              preds = learn.model(xb)
              preds_test =  np.concatenate((preds_test,preds.cpu().data.numpy().reshape(-1)),axis=0)
              if n == len(test_dl) - 1:
                 if xb.shape[0] == BATCH_SIZE:
                    fold_activation.append(activation['avgpool'].cpu().data.numpy())
                    fold_activation_array = np.array(fold_activation)
                    fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])
                    
                 else:
                    
                    if debug == True:
                        fold_activation.append(activation['avgpool'].cpu().data.numpy()) 
                        fold_activation_array = np.array(fold_activation)
                        fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])

                    else:
                        fold_activation_array = np.array(fold_activation)
                        fold_activation_array = fold_activation_array.reshape(-1,fold_activation_array.shape[2])
                        last_batch_activations = activation['avgpool'].cpu().data.numpy().reshape(-1,fold_activation_array.shape[1])
                        fold_activation_array = np.concatenate((fold_activation_array,last_batch_activations),axis=0)
              else:
                  fold_activation.append(activation['avgpool'].cpu().data.numpy())
    X_test =  np.concatenate((fold_activation_array,extra_test_features),axis=1)             
    NN_preds = 1/(1 + np.exp(- preds_test))                         
    svr_model = joblib.load(f'../input/fork-of-swin-224-svr-training-meta-data/svr_head_model_swin224_fold{i}')
    svr_preds = svr_model.predict(X_test)
    svr_preds_head_swin224 = svr_preds
    ridge = joblib.load(f'../input/fork-of-swin-224-svr-training-meta-data/blender_model_for_svr_NN_swin224_fold{i}')
    X =  np.concatenate((svr_preds.reshape(-1,1),NN_preds.reshape(-1,1),extra_test_features),axis=1) 
    blender_svr_and_swin224_test_preds = ridge.predict(X)   
      

    del learn ,ridge, svr_model 
    torch.cuda.empty_cache()
    gc.collect()
    
    #ensamble    
    print('ensamble')
    result_ridge = joblib.load(f'../input/best-ensamble-with-training-blender/final_blender_model_fold{i}')
    X = pd.DataFrame()
    X['preds_test_224'] = np.array(preds_test_224).reshape(-1)
    X['svr_preds_head_swin224'] = np.array(svr_preds_head_swin224).reshape(-1)
    X['preds_test_224_tta'] = np.array(preds_test_224_tta).reshape(-1)

    X['preds_test_384'] = np.array(preds_test_384).reshape(-1)
    X['svr_preds_head_swin384'] = np.array(svr_preds_head_swin384).reshape(-1)
    X['preds_test_384_tta'] = np.array(preds_test_384_tta).reshape(-1)

    meta_cols = ['Subject_Focus','Eyes','Face','Near','Action','Accessory','Group','Collage','Human','Occlusion','Info','Blur']    
    X[meta_cols] = np.array(extra_test_features)
    
    all_preds.append(X.values)
    del activation, fold_activation ,fold_activation_array ,X_test ,preds_test_224 ,preds_test_384 ,blender_svr_and_swin224_test_preds ,blender_svr_and_swin384_test_preds
    gc.collect()


In [None]:
X_test = np.mean(np.stack(all_preds), axis=0)
X_test.shape

In [None]:
result_ridge  = joblib.load(f'../input/best-ensamble-with-training-final-blender/final_blender_model')
final_preds = result_ridge.predict(X_test)
preds = final_preds

In [None]:
preds_2 = preds*100

In [None]:
#psudo_labeling

In [None]:
path = Path('../input/petfinder-pawpularity-score')
df_train = pd.read_csv(path/'train.csv')
df_test  = pd.read_csv(path/'test.csv')
df_train.Id = df_train.Id.map(lambda x:str(path) + '/train/' + x + '.jpg')
df_test.Id = df_test.Id.map(lambda x:str(path) + '/test/' + x + '.jpg')

In [None]:
df_train['min_Pawpularity'] = df_train['Pawpularity']
df_train['max_Pawpularity'] = df_train['Pawpularity']

In [None]:
df_test['Pawpularity'] = preds_2
min_test_Pawpularity = []
max_test_Pawpularity = []
for pred in list(preds_2):
    if pred - 4 >= 0: 
        if pred + 4 <= 100:
            min_test_Pawpularity.append(pred - 4)
            max_test_Pawpularity.append(pred + 4)
        else:
            min_test_Pawpularity.append(pred - 4 - (pred + 4 - 100))
            max_test_Pawpularity.append(100.0)            
    else:
        min_test_Pawpularity.append(0)
        max_test_Pawpularity.append(pred + 4 - (pred - 4))     
df_test['min_Pawpularity'] = np.array(min_test_Pawpularity).reshape(-1)
df_test['max_Pawpularity'] = np.array(max_test_Pawpularity).reshape(-1)

In [None]:
ymin = 0.00001
ymax = 100

class scaledSigmoid(nn.Module):
    def forward(self, input):
        return torch.sigmoid(input) * (ymax - ymin) + ymin

class clampedReLU(nn.Module):
    def forward(self, input):
        bottomClamp = input < ymin
        topClamp = input > ymax
        input[bottomClamp,] = ymin
        input[topClamp,] = ymax
        return input
    

In [None]:
def petfinder_rmse(input,target,segmoid=True):
    if segmoid == True:
        return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))
    else:
        return 100*torch.sqrt(F.mse_loss(input.flatten(), target))

In [None]:
data_with_psudolabel = pd.concat((df_train,df_test),axis=0)
def get_x(r):return r['Id']
def get_y(r):return random.choice(list(np.arange(r['min_Pawpularity'],r['max_Pawpularity'] + 1)))/100
def get_dls(bs,size,df,mult=1):
        dblock = DataBlock(blocks=(ImageBlock, RegressionBlock), #pass in train DataFrame
                                   splitter=IndexSplitter([1]),
                                   get_x=get_x, #filename/path is in the second column of the DataFrame
                                   get_y=get_y, #label is in the first column of the DataFrame
                                   item_tfms=Resize(224), #pass in item_tfms
                                   batch_tfms=setup_aug_tfms([Flip()]))
        dls = dblock.dataloaders(data_with_psudolabel,bs=bs)
        dsets = dblock.datasets(data_with_psudolabel)
        return dls,dsets
dls_psudolabel,dsets = get_dls(bs=8,size=224,df=data_with_psudolabel)
dls_psudolabel.train.one_batch()[0].shape
dls_psudolabel.show_batch()

In [None]:
if df_test.shape[0] == 8:
    debug=False
else:
    debug=True

if debug == True:
    model = create_model('swin_large_patch4_window7_224', pretrained=True, num_classes=1)
    learn = Learner(dls_psudolabel,model,loss_func=BCEWithLogitsLossFlat(), metrics=petfinder_rmse)
    learn.unfreeze()
    learn.fit_one_cycle(17,2e-5)

In [None]:
if debug == True:
    test_dls = dls_psudolabel.test_dl(df_test)
    preds_1 = learn.get_preds(dl=test_dls)

In [None]:
if debug == True:
    torch.cuda.empty_cache()
    gc.collect()

In [None]:
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
if debug == True:
     sample_df['Pawpularity'] = 0.9 * preds_2 + 0.1 * np.array(preds_1[0]*100).reshape(-1) 
else:
     sample_df['Pawpularity'] = preds_2
   
sample_df.to_csv('submission.csv',index=False)
pd.read_csv('submission.csv').head()