In [None]:
import numpy as np 
import pandas as pd 
import scipy as sp 
import matplotlib.pyplot as plt 
from matplotlib import animation, rc
import cv2 
from PIL import Image 

from sklearn.metrics.pairwise import cosine_similarity

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
train.head()

In [None]:

use_col = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']


train["img_file"] = [f"../input/petfinder-pawpularity-score/train/{f}.jpg" for f in train.Id.to_list()]
test["img_file"] = [f"../input/petfinder-pawpularity-score/test/{f}.jpg" for f in test.Id.to_list()]

train_feature = train.groupby("Id").max().loc[:, use_col]
test_feature = test.groupby("Id").max().loc[:, use_col]

train_feature.head()

In [None]:

'''
It is necessary to align the feature units of the data in advance.

columns: train.Id 
index: test.Id

'''

last_train_index = train_feature.shape[0]
df = pd.concat([train_feature, test_feature])

df_sparse = sp.sparse.csr_matrix(df.values)
df_sparse = cosine_similarity(df_sparse)
df_sparse = pd.DataFrame(df_sparse, columns=df.index, index=df.index)
df_sparse = df_sparse.iloc[:last_train_index, last_train_index:]

df_sparse.head()

# Search for 

In [None]:

'''
Set up some functions and animations for similarity search.
Here, the top 10 images from the similarity 
of annotation data are displayed frame by frame.
'''


rc('animation', html='jshtml')

def find_similar_train_id(test_id, n=10):
    similar_train_id = df_sparse[test_id].sort_values(ascending=False)[:n]
    return pd.DataFrame({"similar": similar_train_id.values}, index=similar_train_id.index)


def decode_img(img_f):
    img = cv2.imread(img_f)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    return img 


def create_animation(ims, ids):
    fig = plt.figure(figsize=(6, 6))
    plt.axis("off")
    plt.title(f"test_id={ids}", c="r")
    im = plt.imshow(ims[0])
    
    def animate_f(i):
        im.set_array(ims[i])
        return [im]
    return animation.FuncAnimation(fig, animate_f, frames=len(ims), interval=1000)


def find_similar_animation(test_id):
    sim_df = find_similar_train_id(test_id)
    train_imgs = []
    for sim in sim_df.index.to_list():
        train_img_f = train.loc[train.Id == sim, "img_file"].values[0]
        train_imgs.append(decode_img(train_img_f))
    return train_imgs


def find_similar_score(test_id):
    sim_df = find_similar_train_id(test_id, 1)
    score = train.loc[train.Id == sim_df.index.values[0], "Pawpularity"].values[0]
    return score 
    

In [None]:
random_test_id = np.random.choice(test.Id.to_list(), 2)

find_similar_train_id(random_test_id[0]).style.background_gradient(cmap="Blues")

In [None]:
a = find_similar_animation(random_test_id[0])
create_animation(a, random_test_id[0])

# A simple Submission 

In [None]:

'''
Apply from similarity search to test data.
'''

similar_list = []
for test_id in test.Id.to_list():
    similar_score = find_similar_score(test_id)
    similar_list.append(similar_score)
    
sub = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
sub["Pawpularity"] = similar_list 
sub.to_csv("submission.csv", index=False)
