In [None]:
import sys
import os
import sys
import json
import math
import random
import cv2
timm_path = "../input/timm-pytorch-image-models/pytorch-image-models-master"
sys.path.append(timm_path)
import timm
from timm.data import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.metrics import mean_squared_error
from tqdm.notebook import tqdm
import random
import glob
import albumentations as A
from albumentations.pytorch import ToTensorV2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset,DataLoader
from torch import optim
from torchvision import transforms
from transformers import  get_cosine_schedule_with_warmup
import wandb
import warnings
warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
def set_seed(seed):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
set_seed(42)

In [None]:
df = pd.read_csv('../input/pawpular-upd-csv/train_updated.csv')
num_bins = int(np.ceil(2*((len(df))**(1./3))))
df['bins'] = pd.cut(df['Pawpularity'], bins=num_bins, labels=False)
df['kfold'] = -1


strat_kfold = model_selection.StratifiedKFold(n_splits=10, random_state=42, shuffle=True)
for i, (_, train_index) in enumerate(strat_kfold.split(df.index, df['bins'])):
    df.iloc[train_index, -1] = i

df['kfold'] = df['kfold'].astype('int')

In [None]:
df['path'] = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df["Id"].values]
import cuml, pickle
from cuml.svm import SVR
print('RAPIDS version',cuml.__version__,'\n')

In [None]:
class Model(nn.Module):
    def __init__(self,pretrained):
        super().__init__()
        self.backbone = timm.create_model("swin_large_patch4_window12_384_in22k", pretrained=False, num_classes=0, drop_rate=0.0, drop_path_rate=0.0,global_pool='')
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.fc3_A = nn.Linear(1536,12)
        self.fc3_B = nn.Linear(1536,1)
        self.do = nn.Dropout(p=0.3)
    
    def forward(self,image):
        image = self.backbone(image)
        
        if(len(image.shape) == 4):#for efficientnet models
            image = self.pool(image)
            image = image.view(image.shape[0], -1)

        dec2 = self.fc3_B(image)
      
        return image , dec2

In [None]:
image_size = 384
val_aug = A.Compose(
    [ 
        A.Resize(image_size,image_size,p=1.0),
        A.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD),
        ToTensorV2()
    ]
)

In [None]:
class Pets(Dataset):
    def __init__(self , df,augs = None):
        self.df = df
        self.augs = augs
    def __len__(self):
        return len(self.df)
    def __getitem__(self,idx):
        img_src = self.df.loc[idx,'path']
        image = cv2.imread(img_src)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        transformed = self.augs(image=image)
        image = transformed['image']
       
        return image

In [None]:
def inference_func(oof , path):
    oof_dataset = Pets(oof.reset_index(drop=True),augs = val_aug)
    oof_loader = DataLoader(oof_dataset, batch_size=16, shuffle=False,  num_workers=4)
    m = Model(False)
    m = m.to(device)
    m.load_state_dict(torch.load(path))
    m.eval()
    bar = tqdm(oof_loader)

    PREDS1 = []
    PREDS2 = []
    with torch.no_grad():
        for batch_idx, images in enumerate(bar):
            x = images.to(device)
            output1,output2 = m(x)
            output2 = output2.sigmoid()
            output2 = output2*100
            PREDS1 += [output1.detach().cpu()]
            PREDS2 += [output2.detach().cpu()]
        PREDS1 = torch.cat(PREDS1).cpu().numpy()
        PREDS2 = torch.cat(PREDS2).cpu().numpy()  
    return PREDS1,PREDS2

In [None]:
path = []
path.append("../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 0 - val rmse 17.2610.pth")
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 1 - val rmse 17.2024.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 2 - val rmse 16.8636.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 3 - val rmse 17.7805.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 4 - val rmse 16.9883.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 5 - val rmse 16.5446.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 6 - val rmse 16.8143.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 7 - val rmse 16.8518.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 8 - val rmse 16.6582.pth')
path.append('../input/paw-models-384-large-all/swin_large_patch4_window12_384 - all - Fold 9 - val rmse 17.6446.pth')

In [None]:
LOAD_SVR_FROM_PATH = '../input/svm-swin-trainv2/'
from sklearn.metrics import mean_squared_error

In [None]:
super_final_predictions = []
super_final_predictions2 = []
super_final_oof_predictions = []
super_final_oof_predictions2 = []
super_final_oof_true = []

for fold_ in range(10):
    print('#'*25)
    print('### FOLD',fold_+1)
    print('#'*25)
        
    #model.load(f"../input/paw-models/model_f{fold_}.bin", device="cuda", weights_only=True)

    df_test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
    df_test['path'] = [f"../input/petfinder-pawpularity-score/test/{x}.jpg" for x in df_test["Id"].values]
        
    df_valid = df[df.kfold == fold_]
    #valid_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_valid["Id"].values]

    dense_features = [
        'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
        'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'
    ]
    
    name = f"SVR_fold_{fold_}.pkl" 
    if LOAD_SVR_FROM_PATH is None:
        ##################
        # EXTRACT TRAIN EMBEDDINGS
        
        df_train = df[df.kfold != fold_]
        #train_img_paths = [f"../input/petfinder-pawpularity-score/train/{x}.jpg" for x in df_train["Id"].values]
        
        print('Extracting train embedding...')
        embedx,_ = inference_func(df_train , path[fold_])
        
        ##################
        # FIT RAPIDS SVR
        print('Fitting SVR...')
        clf = SVR(C=20.0)
        clf.fit(embedx.astype('float32'), df_train.Pawpularity.values.astype('int32'))
    
        ##################
        # SAVE RAPIDS SVR 
        pickle.dump(clf, open(name, "wb"))
        
    else:
        ##################
        # LOAD RAPIDS SVR 
        print('Loading SVR...',LOAD_SVR_FROM_PATH+name)
        clf = pickle.load(open(LOAD_SVR_FROM_PATH+name, "rb"))

    ##################
    # TEST PREDICTIONS

    print('Predicting test...')
    embedt,final_test_predictions = inference_func(df_test , path[fold_])

    #final_test_predictions = []
   

    #final_test_predictions = [sigmoid(x) * 100 for x in final_test_predictions]
    final_test_predictions2 = clf.predict(embedt)
    super_final_predictions.append(final_test_predictions)
    super_final_predictions2.append(final_test_predictions2)
    ##################
    
    ##################

In [None]:
best_w = 0.4
super_final_predictions = np.mean(np.column_stack(super_final_predictions), axis=1)
super_final_predictions2 = np.mean(np.column_stack(super_final_predictions2), axis=1)
df_test["Pawpularity"] = (1-best_w)*super_final_predictions + best_w*super_final_predictions2
df_test = df_test[["Id", "Pawpularity"]]
df_test.to_csv("submission.csv", index=False)
df_test.head()