In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model
from IPython.display import display
import gc

from fastai.vision.all import *

In [None]:
seed = 402
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

# 1 Data

In [None]:
class Data:
    def __init__(self, data_path='../input/petfinder-pawpularity-score/', n_split=5):
        self.batch_size = batch_size
        self.dataset_path = Path('../input/petfinder-pawpularity-score/')
        train_val_df = self.preprocess_df(pd.read_csv(self.dataset_path/'train.csv'), is_train=True)
        self.test_df = self.preprocess_df(pd.read_csv(self.dataset_path/'test.csv'), is_train=False)
        self.train_val_df = self.get_split_df(train_val_df, n_split=n_split)
        
    def preprocess_df(self, df, is_train):
        if is_train:
            img_dirname = "train"
        else:
            img_dirname = "test"
            df['Pawpularity'] = [1] * len(df)
        df['path'] = df['Id'].map(lambda x:str(self.dataset_path/img_dirname/x)+'.jpg')
        df = df.drop(columns=['Id'])
        df['norm_score'] = df['Pawpularity']/100
        if is_train:
            df = df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
            return df
        else:
            return df
    
    def get_split_df(self, df, n_split=5):
        from sklearn.model_selection import StratifiedKFold
        
        num_bins = int(np.floor(1+(3.3)*(np.log2(len(df)))))
        print(num_bins)

        df['bins'] = pd.cut(df['norm_score'], bins=num_bins, labels=False)
        print()
        display(df['bins'].hist())
        plt.show()

        df['fold'] = -1

        strat_kfold = StratifiedKFold(n_splits=n_split, random_state=seed, shuffle=True)
        for i, (_, train_index) in enumerate(strat_kfold.split(df.index, df['bins'])):
            df.iloc[train_index, -1] = i

        df['fold'] = df['fold'].astype('int')
        df.fold.value_counts().plot.bar()
        plt.show()
        return df
    
    def get_test_dls(self, img_size, batch_size):
        train_df_f = self.train_val_df.copy()
        dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
                                       valid_pct=0.2,
                                       seed=seed, #seed
                                       fn_col='path', # filename/path is in the second column of the DataFrame
                                       label_col='norm_score', # label is in the first column of the DataFrame
                                       y_block=RegressionBlock, # The type of target
                                       bs=batch_size, # pass in batch size
                                       num_workers=2,
                                       item_tfms=Resize(img_size), #pass in item_tfms
                                       batch_tfms=setup_aug_tfms([Brightness(), Contrast(), Hue(), Saturation()])) #pass in batch_tfms
        return dls

# 2 Trainer

In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(torch.sigmoid(input.flatten()), target))

In [None]:
n_split=5
with_1k = True
batch_size = 8

data = Data(n_split=n_split)

all_preds = []
for fold in range(n_split):
    print(f'Fold {fold} results')
    dls = data.get_test_dls(batch_size=batch_size, img_size=224)
    test_dl = dls.test_dl(data.test_df)
    learn = load_learner(f"../input/pet-findder-img-size-224-1k/model_fold_{fold}.pkl")
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    all_preds.append(preds)
    
    dls = data.get_test_dls(batch_size=batch_size, img_size=384)
    test_dl = dls.test_dl(data.test_df)
    learn = load_learner(f"../input/pet-findder-img-size-384-1k/model_fold_{fold}.pkl")
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    all_preds.append(preds)
    
    del learn, dls, test_dl
    torch.cuda.empty_cache()
    gc.collect()
    
# for fold in range(10):
#     dls = data.get_test_dls(batch_size=batch_size, img_size=224)
#     test_dl = dls.test_dl(data.test_df)
#     learn = load_learner(f"../input/pet-findder-img-size-224-1k-fold10/model_fold_{fold}.pkl")
#     preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
#     all_preds.append(preds)
    
#     del learn, dls, test_dl
#     torch.cuda.empty_cache()
#     gc.collect()

In [None]:
sample_df = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
preds = np.mean(np.stack(all_preds), axis=0) # In regeression, median is often better than mean. 
sample_df['Pawpularity'] = preds*100
sample_df.to_csv('submission.csv',index=False)
display(pd.read_csv('submission.csv').head())