<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>Info</b></h1> 

In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')
from timm import create_model

In [None]:
from fastai.vision.all import *

In [None]:
import torch
device = torch.device("cuda:{}".format("0"))
torch.cuda.set_device(device)

In [None]:
set_seed(999, reproducible=True)
N_FOLDS = 10
SEED=999

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>CFG Models
</b></h2> 

In [None]:
MODELS_1_W=0.55
MODELS_1_BATCH_SIZE=256
MODELS_1_IMG_SIZE=224
MODELS_1_NAME="swin_large_patch4_window7_224"
MODEL_1_DIR = "../input/petfinder-z111-20220103065520/Z111-swin_large_patch4_window7_224-Mixup02___MaxEpochModels"
MODELS_1 = glob.glob(MODEL_1_DIR+"/"+"*.pth")
# ---------------------------------------------------------------- #
MODELS_2_W=0.45
MODELS_2_BATCH_SIZE=64
MODELS_2_IMG_SIZE=384
MODELS_2_NAME="swin_large_patch4_window12_384"
MODEL_2_DIR = "../input/petfinder-z152-20220107111844/Z152-swin_large_patch4_window12_384-BaseZ1411___MaxEpochModels"
MODELS_2 = glob.glob(MODEL_2_DIR+"/"+"*.pth")

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>Premodel Init
</b></h2> 

In [None]:
if not os.path.exists('/root/.cache/torch/hub/checkpoints/'):
    os.makedirs('/root/.cache/torch/hub/checkpoints/')
!cp ../input/timm-swin/* /root/.cache/torch/hub/checkpoints/


In [None]:
!ls /root/.cache/torch/hub/checkpoints/

In [None]:
dataset_path = Path('../input/petfinder-pawpularity-score/')
dataset_path.ls()

In [None]:
train_df = pd.read_csv(dataset_path/'train.csv')

In [None]:
train_df['path'] = train_df['Id'].map(lambda x:str(dataset_path/'train'/x)+'.jpg')
train_df = train_df.drop(columns=['Id'])
train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe

In [None]:
len_df = len(train_df)

In [None]:
train_df['norm_score'] = train_df['Pawpularity']/100
train_df['norm_score']

In [None]:
im = Image.open(train_df['path'][1])
width, height = im.size
print(width,height)

## Data loading
After my quick 'n dirty EDA, let's load the data into fastai as DataLoaders objects. We're using the normalized score as the label. I use some fairly basic augmentations here.

In [None]:
seed=999
set_seed(seed, reproducible=True)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

In [None]:
#Sturges' rule
num_bins = int(np.floor(1+np.log2(len(train_df))))


In [None]:
train_df['bins'] = pd.cut(train_df['norm_score'], bins=num_bins, labels=False)


In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

train_df['fold'] = -1

strat_kfold = StratifiedKFold(n_splits=N_FOLDS, random_state=seed, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(train_df.index, train_df['bins'])):
    train_df.iloc[train_index, -1] = i
    
train_df['fold'] = train_df['fold'].astype('int')



In [None]:
def petfinder_rmse(input,target):
    return 100*torch.sqrt(F.mse_loss(F.sigmoid(input.flatten()), target))

In [None]:
def get_data(fold,img_size,batch_size):
#     train_df_no_val = train_df.query(f'fold != {fold}')
#     train_df_val = train_df.query(f'fold == {fold}')
    
#     train_df_bal = pd.concat([train_df_no_val,train_df_val.sample(frac=1).reset_index(drop=True)])
    train_df_f = train_df.copy()
    # add is_valid for validation fold
    train_df_f['is_valid'] = (train_df_f['fold'] == fold)
    
    dls = ImageDataLoaders.from_df(train_df_f, #pass in train DataFrame
#                                valid_pct=0.2, #80-20 train-validation random split
                               valid_col='is_valid', #
                               seed=999, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=batch_size, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(img_size), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Flip(),
                                                          Zoom(),
                                                          Resize(int(img_size*0.9), method='crop'),
                                                          Brightness(), 
                                                          Contrast(), 
                                                          Hue(), 
                                                          Saturation()
                                                          ]))
    
    return dls

In [None]:
def get_learner(fold_num, model_arc, img_size, batch_size):
    data = get_data(fold_num, img_size, batch_size)
    model = create_model(model_arc, pretrained=True, num_classes=data.c)
    learn = Learner(data,
                    model,
                    loss_func=BCEWithLogitsLossFlat(),
                    metrics=petfinder_rmse,
                    path='',
                    model_dir='')
    
    return learn

In [None]:
test_df = pd.read_csv(dataset_path/'test.csv')

In [None]:
test_df['Pawpularity'] = [1]*len(test_df)
test_df['path'] = test_df['Id'].map(lambda x:str(dataset_path/'test'/x)+'.jpg')
test_df = test_df.drop(columns=['Id'])
train_df['norm_score'] = train_df['Pawpularity']/100

In [None]:
import gc

<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>Inference
<small>Model</small>
</b></h2> 

In [None]:
all_preds_1 = []

for i, model_path in zip(range(N_FOLDS),MODELS_1):
    print(f'# ------------------------------------------- #')
    print(f'# Fold {i}')
    print(f'# {model_path}')
    print(f'# ------------------------------------------- #')
    

    learn = get_learner(fold_num=i,model_arc=MODELS_1_NAME, img_size=MODELS_1_IMG_SIZE, batch_size=MODELS_1_BATCH_SIZE)
    learn = learn.load(model_path.replace('.pth',''))
    
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=SEED, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=MODELS_1_BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(MODELS_1_IMG_SIZE), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Flip(),
                                                          Zoom(),
                                                          Resize(int(MODELS_1_IMG_SIZE*0.9), method='crop'),
                                                          Brightness(), 
                                                          Contrast(), 
                                                          Hue(), 
                                                          Saturation()
                                                          ]))
    
    """ Pred """
    test_dl = dls.test_dl(test_df)
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    all_preds_1.append(preds)
    
    """ Memory Clear """
    del learn
    torch.cuda.empty_cache()
    gc.collect()



<a id=#cbb></a>
<h2 style="color: #6cb4e4; background: #dfefff;  box-shadow: 0px 0px 0px 5px #dfefff;  border: dashed 4px white;  padding: 0.2em 0.5em;">
<b>Inference
<small>Model</small>
</b></h2> 

In [None]:
all_preds_2 = []
for i, model_path in zip(range(N_FOLDS),MODELS_2):
    print(f'# ------------------------------------------- #')
    print(f'# Fold {i}')
    print(f'# {model_path}')
    print(f'# ------------------------------------------- #')
    

    learn = get_learner(fold_num=i,model_arc=MODELS_2_NAME,img_size=MODELS_2_IMG_SIZE, batch_size=MODELS_2_BATCH_SIZE)
    learn = learn.load(model_path.replace('.pth',''))
    
    dls = ImageDataLoaders.from_df(train_df, #pass in train DataFrame
                               valid_pct=0.2, #80-20 train-validation random split
                               seed=SEED, #seed
                               fn_col='path', #filename/path is in the second column of the DataFrame
                               label_col='norm_score', #label is in the first column of the DataFrame
                               y_block=RegressionBlock, #The type of target
                               bs=MODELS_2_BATCH_SIZE, #pass in batch size
                               num_workers=8,
                               item_tfms=Resize(MODELS_2_IMG_SIZE), #pass in item_tfms
                               batch_tfms=setup_aug_tfms([Flip(),
                                                          Zoom(),
                                                          Resize(int(MODELS_2_IMG_SIZE*0.9), method='crop'),
                                                          Brightness(), 
                                                          Contrast(), 
                                                          Hue(), 
                                                          Saturation()
                                                          ]))
    
    """ Pred """
    test_dl = dls.test_dl(test_df)
    preds, _ = learn.tta(dl=test_dl, n=5, beta=0)
    all_preds_2.append(preds)
    
    """ Memory Clear """
    del learn
    torch.cuda.empty_cache()
    gc.collect()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>Ph.MakeSub</b></h1> 

In [None]:
np.mean(np.stack(all_preds_1*100))

In [None]:
np.mean(np.stack(all_preds_2*100))

In [None]:
"""
    Weight
"""
preds1 = np.mean(np.stack(all_preds_1), axis=0)
preds2 = np.mean(np.stack(all_preds_2), axis=0)
preds = preds1*MODELS_1_W\
       +preds2*MODELS_2_W

In [None]:
sample_df = pd.read_csv(dataset_path/'sample_submission.csv')
sample_df['Pawpularity'] = preds*100
sample_df.to_csv('submission.csv',index=False)

In [None]:
pd.read_csv('submission.csv').head()

<h1 style="color: #6cb4e4;  text-align: center;  padding: 0.25em;  border-top: solid 2.5px #6cb4e4;  border-bottom: solid 2.5px #6cb4e4;  background: -webkit-repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);  background: repeating-linear-gradient(-45deg, #f0f8ff, #f0f8ff 3px,#e9f4ff 3px, #e9f4ff 7px);height:45px;">
<b>EOF</b></h1> 