Bare minimum for 
- consumption of model.joblib
- export of properly formatted .json

# imports

## dependencies

In [1]:
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors


MODEL_PATH = "../data/anime_map_data_animelist_100plus_PG_knn_model.joblib"
ANIME_DF_PATH = "../data/partial_knn_data/anime_df_relevant_PG.csv"
PIVOT_DF_NAME = "anime_map_data_animelist_100plus_PG_PCA_vector_df"
ANIME_ID_NAME = "anime_map_data_animelist_100plus_PG_anime_id_df"

In [2]:
#!pip install scikit-learn==0.24.2

## files

In [2]:
model_path = "../data/models_anime_map_knn_model.joblib"

In [7]:
def get_anime():
    anime_df_relevant_PG = pd.read_csv(ANIME_DF_PATH)
    return anime_df_relevant_PG.rename(columns={'MAL_ID' : 'anime_id'})

In [8]:
def get_data(name_file):
    data = pd.read_csv(f'../data/{name_file}.csv')
    return data

In [9]:
def get_model(path):
    return joblib.load(path)

In [7]:
def process_data(name_file):
    data_users_df = get_data(name_file)
    data_users_df['rating'] = data_users_df['rating']/10
    
    anime_df_relevant_PG = get_anime()
    anime_name_df = anime_df_relevant_PG[['anime_id','Name']]
    data_users_df_merge = data_users_df.merge(anime_name_df, on = 'anime_id', how='inner')
    pivot_df = data_users_df_merge.pivot_table(index='anime_id',columns='user_id',values='rating').fillna(0)
    
    anime_Genres_df = anime_df_relevant_PG[['anime_id','Genres']]
    anime_Genres_df_encoded = pd.concat(objs = [anime_Genres_df.drop(columns = 'Genres', axis =1), anime_Genres_df['Genres'].str.get_dummies(sep=", ")], axis = 1)
    anime_Genres_df_encoded = anime_Genres_df_encoded.set_index('anime_id')
    
    pivot_df = pivot_df.merge(anime_Genres_df_encoded, how='inner',left_index=True, right_index=True)
    anime_name_pivot_df = data_users_df_merge[['anime_id','Name']].drop_duplicates()
    anime_name_pivot_df = anime_name_pivot_df.sort_values('anime_id')
    anime_name_pivot_df = anime_name_pivot_df.reset_index().drop(columns = 'index')
    
    return pivot_df, anime_name_pivot_df

# predict

In [8]:
def recommendation_10PlusRatings(anime_name, nb_recomendation = 10):
    pivot_df, anime_name_pivot_df = process_data('active_users_df_10PlusRatings_partial')
    model = get_model(model_path)
    index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == anime_name].tolist()[0]
    distances, indices = model.kneighbors(pivot_df.iloc[index_nb,:].values.reshape(1, -1), n_neighbors = nb_recomendation + 1)

    prediction = []
    for i in range(1, len(distances.flatten())):
        prediction.append([pivot_df.index[indices.flatten()[i]],distances.flatten()[i]])
    results = {}
    for i in range(len(prediction)):
        anime_name = anime_name_pivot_df.query(f'anime_id == {prediction[i][0]}').iloc[0].Name
        distance = prediction[i][1]
        results[f'{anime_name}'] = distance
    return results

In [13]:
predict_Naruto = recommendation_10PlusRatings('Naruto', 30)

In [14]:
predict_Naruto

{'Naruto: Shippuuden': 0.18049078542744768,
 'Death Note': 0.32724375187989996,
 'Bleach': 0.35922338431707146,
 'Shingeki no Kyojin': 0.3764965221555553,
 'Fullmetal Alchemist: Brotherhood': 0.4092187913883153,
 'Code Geass: Hangyaku no Lelouch': 0.4141593159792041,
 'Sword Art Online': 0.4145034622526954,
 'One Piece': 0.4324693299705893,
 'Fairy Tail': 0.43374727465720364,
 'One Punch Man': 0.440672137299862,
 'Code Geass: Hangyaku no Lelouch R2': 0.44317776537599496,
 'Tokyo Ghoul': 0.4478750610328671,
 'Boku no Hero Academia': 0.4509773475530082,
 'Fullmetal Alchemist': 0.45734048611945144,
 'Ao no Exorcist': 0.459692800542125,
 'Hunter x Hunter (2011)': 0.4624201558967598,
 'Angel Beats!': 0.4655259992943921,
 'Dragon Ball Z': 0.4667113626750077,
 'Boku no Hero Academia 2nd Season': 0.4783955337192296,
 'No Game No Life': 0.4788718820530793,
 'Toradora!': 0.4801106307799001,
 'Highschool of the Dead': 0.48100086207446635,
 'Nanatsu no Taizai': 0.48689829546741004,
 'Shingeki no K

### Export model.joblib as pickle / sav file

In [97]:
model_joblib = get_model(model_path)

In [98]:
model_joblib

NearestNeighbors(metric='cosine')

In [99]:
import pickle

In [102]:
pickle.dump(model_joblib, open('model.sav',"wb"))

In [103]:
ls

FrontEnd_Explo.ipynb  model.sav


In [104]:
pwd


'/home/mijka/code/mijkami/AnimeMap_front/notebook'

# create anime_name_pivot_df

In [None]:
RATING_COMPLETED_MODEL_PATH = "data/anime_map_data_rating_complete_100plus_PG_knn_model.joblib"
RATING_COMPLETED_PIVOT_DF_NAME = "anime_map_data_rating_complete_100plus_PG_PCA_vector_df"

In [32]:
anime_df = get_anime()

In [33]:
anime_df

Unnamed: 0,anime_id,Name,Genres,Studios
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Sunrise
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",Bones
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Madhouse
3,7,Witch Hunter Robin,"Action, Mystery, Police, Supernatural, Drama, ...",Sunrise
4,8,Bouken Ou Beet,"Adventure, Fantasy, Shounen, Supernatural",Toei Animation
...,...,...,...,...
11043,47398,Kimetsu Gakuen: Valentine-hen,Comedy,Unknown
11044,47402,Heikousen,"Music, Romance","10Gauge, Studio DURIAN"
11045,47614,Nu Wushen de Canzhuo Spring Festival Special,"Slice of Life, Comedy",Unknown
11046,47616,Yakusoku no Neverland 2nd Season: Michishirube,"Mystery, Psychological, Supernatural, Thriller...",CloverWorks


In [34]:
ANIME_ID_COMPLETE_NAME = 'anime_map_data_rating_complete_100plus_PG_anime_id_df'
anime_id_completed = get_data(ANIME_ID_COMPLETE_NAME)

In [35]:
anime_id_completed

Unnamed: 0,anime_id
0,1
1,5
2,6
3,7
4,8
...,...
10942,47398
10943,47402
10944,47614
10945,47616


In [36]:
anime_name_pivot_completed_df = anime_id_completed.merge(anime_df[['anime_id', "Name"]], on='anime_id', how='inner')

In [37]:
anime_name_pivot_completed_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [52]:
anime_name_pivot_completed_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [53]:
anime_name_pivot_completed_df.to_csv('../data/anime_map_data_rating_complete_100plus_PG_anime_name_pivot_df.csv', index=False)

In [39]:
anime_name_pivot_completed_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [10]:
anime_id = get_data(ANIME_ID_NAME)
anime_df = get_anime()
anime_name_pivot_df = anime_id.merge(anime_df, on='anime_id', how='inner')

In [11]:
anime_id

Unnamed: 0,anime_id
0,1
1,5
2,6
3,7
4,8
...,...
11043,47398
11044,47402
11045,47614
11046,47616


In [12]:
anime_df

Unnamed: 0,anime_id,Name,Genres,Studios
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space",Sunrise
1,5,Cowboy Bebop: Tengoku no Tobira,"Action, Drama, Mystery, Sci-Fi, Space",Bones
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen",Madhouse
3,7,Witch Hunter Robin,"Action, Mystery, Police, Supernatural, Drama, ...",Sunrise
4,8,Bouken Ou Beet,"Adventure, Fantasy, Shounen, Supernatural",Toei Animation
...,...,...,...,...
11043,47398,Kimetsu Gakuen: Valentine-hen,Comedy,Unknown
11044,47402,Heikousen,"Music, Romance","10Gauge, Studio DURIAN"
11045,47614,Nu Wushen de Canzhuo Spring Festival Special,"Slice of Life, Comedy",Unknown
11046,47616,Yakusoku no Neverland 2nd Season: Michishirube,"Mystery, Psychological, Supernatural, Thriller...",CloverWorks


In [14]:
anime_name_pivot_df = anime_name_pivot_df[['anime_id', "Name"]]

In [45]:
anime_name_pivot_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
11043,47398,Kimetsu Gakuen: Valentine-hen
11044,47402,Heikousen
11045,47614,Nu Wushen de Canzhuo Spring Festival Special
11046,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [17]:
anime_name_pivot_df.to_csv('../data/anime_map_data_animelist_100plus_PG_anime_name_pivot_df.csv', index=False)

In [18]:
ls ../data


anime_df_relevant_PG.csv
[0m[01;32manime_map_data_animelist_100plus_PG_anime_id_df.csv[0m*
anime_map_data_animelist_100plus_PG_anime_name_pivot_df.csv
[01;32manime_map_data_animelist_100plus_PG_knn_model.joblib[0m*
[01;32manime_map_data_animelist_100plus_PG_PCA_vector_df.csv[0m*
[01;34mpartial_knn_data[0m/


In [19]:
ANIME_NAME_PIVOT_NAME = "anime_map_data_animelist_100plus_PG_anime_name_pivot_df"

In [49]:
anime_name_pivot_df = get_data(ANIME_NAME_PIVOT_NAME)

In [23]:
index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == 'Naruto'].tolist()[0]

In [50]:
anime_name_pivot_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
11043,47398,Kimetsu Gakuen: Valentine-hen
11044,47402,Heikousen
11045,47614,Nu Wushen de Canzhuo Spring Festival Special
11046,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [51]:
anime_name_pivot_completed_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [29]:
def recommendation_10PlusRatings(anime_name, nb_recomendation):
    pivot_df = get_data(PIVOT_DF_NAME)
    anime_name_pivot_df = get_data(ANIME_NAME_PIVOT_NAME)
    model = get_model(MODEL_PATH)
    index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == anime_name].tolist()[0]
    distances, indices = model.kneighbors(pivot_df.iloc[index_nb,:].values.reshape(1, -1), n_neighbors = nb_recomendation + 1)

    prediction = []
    for i in range(1, len(distances.flatten())):
        prediction.append([pivot_df.index[indices.flatten()[i]],distances.flatten()[i]])
    results = {}
    for i in range(len(prediction)):
        anime_name = anime_name_pivot_df.iloc[prediction[i][0]].Name
        distance = prediction[i][1]
        results[f'{anime_name}'] = distance
    return results


In [30]:
recommendation_10PlusRatings('Naruto', 5)

{'Naruto: Shippuuden': 0.16479568859309957,
 'Death Note': 0.28978191191199754,
 'Bleach': 0.31862997487679345,
 'Shingeki no Kyojin': 0.3200006084059118,
 'Fullmetal Alchemist: Brotherhood': 0.34244184214998474}

In [54]:
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors


NOTATION_MODEL_PATH = "../data/anime_map_data_animelist_100plus_PG_knn_model.joblib"
NOTATION_PIVOT_DF_NAME = "anime_map_data_animelist_100plus_PG_PCA_vector_df"
NOTATION_ANIME_NAME_PIVOT_NAME = "anime_map_data_animelist_100plus_PG_anime_name_pivot_df"

RATING_COMPLETED_MODEL_PATH = "../data/anime_map_data_rating_complete_100plus_PG_knn_model.joblib"
RATING_COMPLETED_PIVOT_DF_NAME = "anime_map_data_rating_complete_100plus_PG_PCA_vector_df"
RATING_COMPLETED_ANIME_NAME_PIVOT_NAME = "anime_map_data_rating_complete_100plus_PG_anime_name_pivot_df"


def get_data(name_file):
    data = pd.read_csv(f'../data/{name_file}.csv')
    return data

def get_model(path):
    return joblib.load(path)

def recommendation_10PlusRatings(anime_name, nb_recomendation, model):
    if model == 'notation':
        pivot_df = get_data(NOTATION_PIVOT_DF_NAME)
        anime_name_pivot_df = get_data(NOTATION_ANIME_NAME_PIVOT_NAME)
        model = get_model(NOTATION_MODEL_PATH)
    elif model == 'completed':
        pivot_df = get_data(RATING_COMPLETED_ANIME_NAME_PIVOT_NAME)
        anime_name_pivot_df = get_data(RATING_COMPLETED_PIVOT_DF_NAME)
        model = get_model(RATING_COMPLETED_MODEL_PATH)

    index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == anime_name].tolist()[0]
    distances, indices = model.kneighbors(pivot_df.iloc[index_nb,:].values.reshape(1, -1), n_neighbors = nb_recomendation + 1)
    
    prediction = []
    for i in range(1, len(distances.flatten())):
        prediction.append([pivot_df.index[indices.flatten()[i]],distances.flatten()[i]])
    results = {}
    for i in range(len(prediction)):
        anime_name = anime_name_pivot_df.iloc[prediction[i][0]].Name
        distance = prediction[i][1]
        results[f'{anime_name}'] = distance
    return results

In [42]:
recommendation_10PlusRatings('Naruto', 10, 'notation')

{'Naruto: Shippuuden': 0.16479568859309957,
 'Death Note': 0.28978191191199754,
 'Bleach': 0.31862997487679345,
 'Shingeki no Kyojin': 0.3200006084059118,
 'Fullmetal Alchemist: Brotherhood': 0.34244184214998474,
 'Sword Art Online': 0.3471770420949094,
 'Code Geass: Hangyaku no Lelouch': 0.36965757726627224,
 'One Punch Man': 0.37465993577686285,
 'Tokyo Ghoul': 0.3842327761099542,
 'Fairy Tail': 0.38772256888731504}

In [55]:
anime_name_pivot_df = get_data(RATING_COMPLETED_ANIME_NAME_PIVOT_NAME)

In [56]:
anime_name_pivot_df

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [57]:
recommendation_10PlusRatings('Naruto', 10, 'completed')

KeyError: 'Name'

# Last version


## add anime_id & update results to add anime_id in json

In [22]:
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors


NOTATION_MODEL_PATH = "../data/anime_map_data_animelist_100plus_PG_knn_model.joblib"
NOTATION_PIVOT_DF_NAME = "anime_map_data_animelist_100plus_PG_PCA_vector_df"
NOTATION_ANIME_NAME_PIVOT_NAME = "anime_map_data_animelist_100plus_PG_anime_name_pivot_df"

RATING_COMPLETED_MODEL_PATH = "../data/anime_map_data_rating_complete_100plus_PG_knn_model.joblib"
RATING_COMPLETED_PIVOT_DF_NAME = "anime_map_data_rating_complete_100plus_PG_PCA_vector_df"
RATING_COMPLETED_ANIME_NAME_PIVOT_NAME = "anime_map_data_rating_complete_100plus_PG_anime_name_pivot_df"


def get_data(name_file):
    data = pd.read_csv(f'../data/{name_file}.csv')
    return data

def get_model(path):
    return joblib.load(path)

def recommendation_10PlusRatings(anime_name, nb_recomendation):
    pivot_df = get_data(NOTATION_PIVOT_DF_NAME)
    anime_name_pivot_df = get_data(NOTATION_ANIME_NAME_PIVOT_NAME)
    model = get_model(NOTATION_MODEL_PATH)


    index_nb = anime_name_pivot_df.index[anime_name_pivot_df['Name'] == anime_name].tolist()[0]
    distances, indices = model.kneighbors(pivot_df.iloc[index_nb,:].values.reshape(1, -1), n_neighbors = nb_recomendation + 1)
    
    prediction = []
    for i in range(1, len(distances.flatten())):
        prediction.append([pivot_df.index[indices.flatten()[i]],distances.flatten()[i]])
    results = {}
    for i in range(len(prediction)):
        # add anime_id & update results to add anime_id in json
        anime_name = anime_name_pivot_df.iloc[prediction[i][0]].Name
        anime_id = anime_name_pivot_df.iloc[prediction[i][0]].anime_id
        distance = prediction[i][1]
        results[f'{anime_name}'] = {'distance': distance, 'anime_id' : anime_id}
    return results




In [17]:
get_data(RATING_COMPLETED_ANIME_NAME_PIVOT_NAME)

Unnamed: 0,anime_id,Name
0,1,Cowboy Bebop
1,5,Cowboy Bebop: Tengoku no Tobira
2,6,Trigun
3,7,Witch Hunter Robin
4,8,Bouken Ou Beet
...,...,...
10942,47398,Kimetsu Gakuen: Valentine-hen
10943,47402,Heikousen
10944,47614,Nu Wushen de Canzhuo Spring Festival Special
10945,47616,Yakusoku no Neverland 2nd Season: Michishirube


In [24]:
result = recommendation_10PlusRatings('Cowboy Bebop', 10)

In [30]:
result

{'Samurai Champloo': {'distance': 0.3805723268402532, 'anime_id': 205},
 'Cowboy Bebop: Tengoku no Tobira': {'distance': 0.39646133981958054,
  'anime_id': 5},
 'Neon Genesis Evangelion': {'distance': 0.40337796274787074, 'anime_id': 30},
 'Tengen Toppa Gurren Lagann': {'distance': 0.418607496450369,
  'anime_id': 2001},
 'Death Note': {'distance': 0.43934515752334347, 'anime_id': 1535},
 'FLCL': {'distance': 0.4395010086087435, 'anime_id': 227},
 'Trigun': {'distance': 0.44110443159952506, 'anime_id': 6},
 'Fullmetal Alchemist: Brotherhood': {'distance': 0.44708171983283784,
  'anime_id': 5114},
 'Akira': {'distance': 0.4499154004011452, 'anime_id': 47},
 'Code Geass: Hangyaku no Lelouch': {'distance': 0.45451363937797506,
  'anime_id': 1575}}

In [29]:
result['Samurai Champloo']['anime_id']

205