# imports

## dependencies

In [1]:
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import pickle


In [2]:
#!pip install scikit-learn==0.24.2

## files

In [3]:
# model_path = "../data/models_anime_map_knn_model.joblib"
# pickle.load(open('../model.sav', 'rb'))

In [4]:
def get_anime():
    anime_df_relevant_PG = pd.read_csv("../data/anime_df_relevant_PG.csv")
    return anime_df_relevant_PG.rename(columns={'MAL_ID' : 'anime_id'})

In [5]:
def get_data():
    data = pd.read_csv('../data/processed_data/active_users_df_10PlusRatings_partial.csv')
    return data
get_data()

Unnamed: 0,user_id,anime_id,rating
0,0,67,9
1,0,6702,7
2,0,242,10
3,0,21,10
4,0,24,9
...,...,...,...
822413,4998,6758,7
822414,4998,2001,8
822415,4998,5005,8
822416,4998,12431,7


In [6]:
def get_model():
    return pickle.load(open('../model.sav', 'rb'))

In [7]:
get_model()

NearestNeighbors(metric='cosine')

In [8]:
def process_data():
    data_users_df = get_data()
    data_users_df['rating'] = data_users_df['rating']/10
    
    anime_df_relevant_PG = get_anime()
    anime_name_df = anime_df_relevant_PG[['anime_id','Name']]
    data_users_df_merge = data_users_df.merge(anime_name_df, on = 'anime_id', how='inner')
    pivot_df = data_users_df_merge.pivot_table(index='anime_id',columns='user_id',values='rating').fillna(0)
    
    anime_Genres_df = anime_df_relevant_PG[['anime_id','Genres']]
    anime_Genres_df_encoded = pd.concat(objs = [anime_Genres_df.drop(columns = 'Genres', axis =1), anime_Genres_df['Genres'].str.get_dummies(sep=", ")], axis = 1)
    anime_Genres_df_encoded = anime_Genres_df_encoded.set_index('anime_id')
    
    pivot_df = pivot_df.merge(anime_Genres_df_encoded, how='inner',left_index=True, right_index=True)
    anime_name_pivot_df = data_users_df_merge[['anime_id','Name']].drop_duplicates()
    anime_name_pivot_df = anime_name_pivot_df.sort_values('anime_id')
    anime_name_pivot_df = anime_name_pivot_df.reset_index().drop(columns = 'index')
    
    return pivot_df, anime_name_pivot_df

# predict

In [9]:
def recomendation_10PlusRatings(anime_name, nb_recomendation = 10):
    pivot_df, anime_name_pivot_df = process_data()
    model = get_model()
    index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == anime_name].tolist()[0]
    distances, indices = model.kneighbors(pivot_df.iloc[index_nb,:].values.reshape(1, -1), n_neighbors = nb_recomendation + 1)

    prediction = []
    for i in range(0, len(distances.flatten())):
        if i == 0:
            prediction.append([pivot_df.index[indices.flatten()[i]],0])
        else:
            prediction.append([pivot_df.index[indices.flatten()[i]],distances.flatten()[i]])
    results = []
    for i in range(len(prediction)):
        anime_name = anime_name_pivot_df.query(f'anime_id == {prediction[i][0]}').iloc[0].Name
        distance = prediction[i][1]
        results.append([anime_name,distance])
    return results

In [10]:
recomendation_10PlusRatings('Naruto')

[['Naruto', 0],
 ['Naruto: Shippuuden', 0.18049078542744768],
 ['Death Note', 0.32724375187989996],
 ['Bleach', 0.35922338431707146],
 ['Shingeki no Kyojin', 0.3764965221555553],
 ['Fullmetal Alchemist: Brotherhood', 0.4092187913883153],
 ['Code Geass: Hangyaku no Lelouch', 0.4141593159792041],
 ['Sword Art Online', 0.4145034622526954],
 ['One Piece', 0.4324693299705893],
 ['Fairy Tail', 0.43374727465720364],
 ['One Punch Man', 0.440672137299862]]