In [69]:
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Model, Sequential
from pathlib import Path
import matplotlib.pyplot as plt

In [70]:
import os as os

anime_datapath = os.path.join(os.path.dirname(os.getcwd()), 'Dataset','anime.csv')
rating1_datapath = os.path.join(os.path.dirname(os.getcwd()), 'Dataset','rating1.csv')
rating2_datapath = os.path.join(os.path.dirname(os.getcwd()), 'Dataset','rating1.csv')

In [71]:
rating1 = pd.read_csv(rating1_datapath)

rating1

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
3906863,36791,2795,7
3906864,36791,2904,10
3906865,36791,3270,10
3906866,36791,3299,8


In [72]:
rating2 = pd.read_csv(rating2_datapath)

rating2

Unnamed: 0,user_id,anime_id,rating
0,1,20,-1
1,1,24,-1
2,1,79,-1
3,1,226,-1
4,1,241,-1
...,...,...,...
3906863,36791,2795,7
3906864,36791,2904,10
3906865,36791,3270,10
3906866,36791,3299,8


In [73]:
rating_data = pd.concat([rating1, rating2],axis=0)
rating_data.reset_index(inplace=True)
rating_data.drop(columns='index', inplace=True)
rating_data.loc[rating_data['rating'] ==-1,'rating'] = 0

rating_data

Unnamed: 0,user_id,anime_id,rating
0,1,20,0
1,1,24,0
2,1,79,0
3,1,226,0
4,1,241,0
...,...,...,...
7813731,36791,2795,7
7813732,36791,2904,10
7813733,36791,3270,10
7813734,36791,3299,8


In [74]:
user_ids = rating_data["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}
anime_ids = rating_data["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_data["user"] = rating_data["user_id"].map(user2user_encoded)
rating_data["anime"] = rating_data["anime_id"].map(anime2anime_encoded)

In [75]:
num_users = len(user2user_encoded)
num_animes = len(anime2anime_encoded)
rating_data["rating"] = rating_data["rating"].values.astype(np.float32)
# min and max ratings will be used to normalize the ratings later
min_rating = min(rating_data["rating"])
max_rating = max(rating_data["rating"])

print(
    "Number of users: {}, Number of Anime: {}, Min rating: {}, Max rating: {}".format(
        num_users, num_animes, min_rating, max_rating))

Number of users: 36791, Number of Anime: 9750, Min rating: 0.0, Max rating: 10.0


In [76]:
anime2anime_encoded

{20: 0,
 24: 1,
 79: 2,
 226: 3,
 241: 4,
 355: 5,
 356: 6,
 442: 7,
 487: 8,
 846: 9,
 936: 10,
 1546: 11,
 1692: 12,
 1836: 13,
 2001: 14,
 2025: 15,
 2144: 16,
 2787: 17,
 2993: 18,
 3455: 19,
 4063: 20,
 4214: 21,
 4224: 22,
 4581: 23,
 4744: 24,
 4898: 25,
 4999: 26,
 5034: 27,
 5277: 28,
 5667: 29,
 5781: 30,
 5958: 31,
 6163: 32,
 6205: 33,
 6324: 34,
 6500: 35,
 6547: 36,
 6682: 37,
 6707: 38,
 6747: 39,
 6773: 40,
 6793: 41,
 7088: 42,
 7148: 43,
 7593: 44,
 7739: 45,
 7858: 46,
 8074: 47,
 8407: 48,
 8424: 49,
 8525: 50,
 8630: 51,
 8841: 52,
 9041: 53,
 9062: 54,
 9136: 55,
 9181: 56,
 9330: 57,
 9367: 58,
 9515: 59,
 9581: 60,
 9675: 61,
 9750: 62,
 9790: 63,
 9919: 64,
 10067: 65,
 10073: 66,
 10076: 67,
 10079: 68,
 10080: 69,
 10209: 70,
 10578: 71,
 10604: 72,
 10719: 73,
 10790: 74,
 10793: 75,
 10794: 76,
 10805: 77,
 10897: 78,
 11161: 79,
 11266: 80,
 11617: 81,
 11737: 82,
 11757: 83,
 11759: 84,
 11771: 85,
 12293: 86,
 12549: 87,
 12729: 88,
 13357: 89,
 13367: 9

In [77]:
rating_data.head()

Unnamed: 0,user_id,anime_id,rating,user,anime
0,1,20,0.0,0,0
1,1,24,0.0,0,1
2,1,79,0.0,0,2
3,1,226,0.0,0,3
4,1,241,0.0,0,4


In [78]:
# rating_data = rating_data.sample(frac=1, random_state=42) # randomly sample the data-set
x = rating_data[["user", "anime"]].values
# Normalize the targets between 0 and 1. Makes it easy to train.
y = rating_data["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values
# Assuming training on 90% of the data and validating on 10%.
train_indices = int(0.9 * rating_data.shape[0])
x_train, x_val, y_train, y_val = (
    x[:train_indices],
    x[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

In [79]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=42)

In [80]:
x_train.shape

(7032362, 2)

In [81]:
y_train.shape

(7032362,)

In [82]:
# ## defining the model
# embedding_size = 50

# user_ips= layers.Input(shape=[1])
# user_embedding = layers.Embedding(num_users,embedding_size,embeddings_initializer="he_normal",embeddings_regularizer=keras.regularizers.l2(1e-6))(user_ips)
# # Embedding layer will create 610 vectors look-up table of 50 dimension each
# user_vect= layers.Flatten()(user_embedding)

# anime_ips= layers.Input(shape=[1])
# anime_embedding = layers.Embedding(num_animes, embedding_size, embeddings_initializer="he_normal",embeddings_regularizer=keras.regularizers.l2(1e-6))(anime_ips)
# anime_vect= layers.Flatten()(anime_embedding)

# prod = layers.dot(inputs=[user_vect, anime_vect],axes=1)

# dense1= layers.Dense(150, activation='relu', kernel_initializer="he_normal")(prod)
# dense2= layers.Dense(50, activation='relu',kernel_initializer="he_normal")(dense1)
# dense3= layers.Dense(1,activation='sigmoid')(dense2)

# model = Model([user_ips, anime_ips], dense3)
# model.compile(optimizer='adam',loss='mean_squared_error')

# keras.utils.plot_model(model,show_shapes=True)

In [83]:
# history = model.fit([x_train[:,0], x_train[:,1]], y_train, batch_size=64,epochs=10,verbose=1)

In [84]:
# pickle.dump(model, open('model.pkl','wb'))

In [85]:
model = pd.read_pickle('model.pkl')

In [86]:
pred=model.predict([x_train[4:5,0], x_train[4:5,1]])

pred

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step




array([[0.6655904]], dtype=float32)

In [87]:
anime_dataset = pd.read_csv(anime_datapath, encoding = 'latin')

anime_dataset

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266
...,...,...,...,...,...,...,...
12289,9316,Toushindai My Lover: Minami tai Mecha-Minami,Yaoi,OVA,1,4.15,211
12290,5543,Under World,Yaoi,OVA,1,4.28,183
12291,5621,Violence Gekiga David no Hoshi,Yaoi,OVA,4,4.88,219
12292,6133,Violence Gekiga Shin David no Hoshi: Inma Dens...,Yaoi,OVA,1,4.98,175


In [88]:
anime_rating_data = anime_dataset.copy()

In [89]:
rating_data

Unnamed: 0,user_id,anime_id,rating,user,anime
0,1,20,0.0,0,0
1,1,24,0.0,0,1
2,1,79,0.0,0,2
3,1,226,0.0,0,3
4,1,241,0.0,0,4
...,...,...,...,...,...
7813731,36791,2795,7.0,36790,2258
7813732,36791,2904,10.0,36790,658
7813733,36791,3270,10.0,36790,3464
7813734,36791,3299,8.0,36790,973


In [90]:
row = rating_data[rating_data['user_id'] == 2]
row

Unnamed: 0,user_id,anime_id,rating,user,anime
153,2,11771,10.0,1,85
154,2,12189,0.0,1,153
155,2,16417,0.0,1,154
3907021,2,11771,10.0,1,85
3907022,2,12189,0.0,1,153
3907023,2,16417,0.0,1,154


In [91]:
# user_id = rating_data.userId.sample(1).iloc[0]
user_id = rating_data.loc[rating_data['user_id']==6,'user_id'].iloc[0]
print(user_id)
animes_watched_by_user = rating_data[rating_data.user_id == user_id]
animes_not_watched = anime_rating_data[ ~anime_rating_data["anime_id"].isin(animes_watched_by_user.anime_id.values)]["anime_id"]
animes_not_watched

6


0        32281
1         5114
2        28977
3         9253
4         9969
         ...  
12289     9316
12290     5543
12291     5621
12292     6133
12293    26081
Name: anime_id, Length: 12257, dtype: int64

In [92]:
animes_watched_by_user.head()

Unnamed: 0,user_id,anime_id,rating,user,anime
769,6,20,0.0,5,0
770,6,59,0.0,5,603
771,6,228,0.0,5,604
772,6,317,0.0,5,605
773,6,1311,0.0,5,606


In [93]:
animes_not_watched

0        32281
1         5114
2        28977
3         9253
4         9969
         ...  
12289     9316
12290     5543
12291     5621
12292     6133
12293    26081
Name: anime_id, Length: 12257, dtype: int64

In [94]:
## basically inner join between animes_not_watched and anime ids from rating_data table (to be sure that the anime ids are not out of place)
animes_not_watched = list(
    set(animes_not_watched).intersection(set(anime2anime_encoded.keys()))
)
animes_not_watched

[32768,
 1,
 5,
 6,
 7,
 8,
 32775,
 32776,
 32777,
 32778,
 32779,
 32781,
 15,
 16,
 17,
 18,
 19,
 32783,
 32784,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 32801,
 32802,
 32800,
 32803,
 32804,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 32826,
 32828,
 61,
 60,
 63,
 64,
 65,
 66,
 67,
 68,
 62,
 69,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 32844,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 32869,
 108,
 109,
 110,
 111,
 32879,
 113,
 114,
 115,
 116,
 117,
 32886,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 32906,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 32915,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 32927,
 162,
 164,
 165,
 166,
 32935,
 168,
 169,
 170,
 167,
 32934,
 173,
 174,
 175,
 32943,
 177,
 17

In [95]:
animes_not_watched_index = [[anime2anime_encoded.get(x)] for x in animes_not_watched]  # get the indexes of the animes not watched by the user
animes_not_watched_index

[[8963],
 [880],
 [881],
 [231],
 [1743],
 [3566],
 [8964],
 [8965],
 [8966],
 [8967],
 [8968],
 [8969],
 [264],
 [1744],
 [265],
 [266],
 [882],
 [8970],
 [8971],
 [267],
 [5182],
 [1],
 [883],
 [1528],
 [2246],
 [1745],
 [6336],
 [268],
 [625],
 [626],
 [1712],
 [3339],
 [5441],
 [8975],
 [8976],
 [8977],
 [804],
 [805],
 [269],
 [1746],
 [270],
 [1332],
 [1747],
 [1748],
 [2054],
 [2285],
 [1749],
 [806],
 [3472],
 [4169],
 [271],
 [2247],
 [8365],
 [602],
 [807],
 [1750],
 [272],
 [627],
 [628],
 [1007],
 [273],
 [274],
 [1751],
 [4895],
 [1503],
 [232],
 [1728],
 [2078],
 [3247],
 [2070],
 [2696],
 [8982],
 [2],
 [2079],
 [2248],
 [3210],
 [5248],
 [3211],
 [2697],
 [2080],
 [2698],
 [3212],
 [3248],
 [1713],
 [3452],
 [3249],
 [2223],
 [2224],
 [2699],
 [1333],
 [1299],
 [1570],
 [1752],
 [2545],
 [1741],
 [1753],
 [2200],
 [808],
 [1754],
 [2055],
 [3885],
 [2249],
 [2686],
 [3250],
 [629],
 [3087],
 [1755],
 [884],
 [3251],
 [3252],
 [5348],
 [3886],
 [3117],
 [809],
 [233],
 [

In [96]:
user_encoder = user2user_encoded.get(user_id)
user_encoder

5

In [97]:
user_anime_array = np.hstack(([[user_encoder]] * len(animes_not_watched), animes_not_watched_index))
user_anime_array ## 1st column is user_index and 2nd col is the list of anime indexes not watched by the user

array([[   5, 8963],
       [   5,  880],
       [   5,  881],
       ...,
       [   5, 8960],
       [   5, 8961],
       [   5, 8962]])

In [98]:
ratings = model.predict([user_anime_array[:,0],user_anime_array[:,1]]).flatten()
ratings

[1m 30/304[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 2ms/step   



[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


array([0.5997213 , 0.1305527 , 0.12796123, ..., 0.5997213 , 0.5997213 ,
       0.5997213 ], dtype=float32)

In [99]:
top_ratings_indices = ratings.argsort()[-10:][::-1] # indices of highest 10 ratings

In [100]:
recommended_anime_ids = [anime_encoded2anime.get(animes_not_watched_index[x][0]) for x in top_ratings_indices]

In [101]:
print("Showing recommendations for user: {}".format(user_id))
print("====" * 9)
print("animes with high ratings from user")
print("----" * 8)
top_animes_user = (
    animes_watched_by_user.sort_values(by="rating", ascending=False)
    .head(5)
    .anime_id.values
)
anime_rating_data_rows = anime_rating_data[anime_rating_data["anime_id"].isin(top_animes_user)]
for row in anime_rating_data_rows.itertuples():
    print(row.name, ":", row.genre)


Showing recommendations for user: 6
animes with high ratings from user
--------------------------------
Kaichou wa Maid-sama! : Comedy, Romance, School, Shoujo
Gosick : Drama, Historical, Mystery, Romance
Naruto : Action, Comedy, Martial Arts, Shounen, Super Power
Deadman Wonderland : Action, Horror, Sci-Fi
Black?Rock Shooter (OVA) : Action, Drama, School, Slice of Life


In [102]:
print("----" * 8)
print("Top 10 anime recommendations")
print("----" * 8)
recommended_animes = anime_rating_data[anime_rating_data["anime_id"].isin(recommended_anime_ids)]
for row in recommended_animes.itertuples():
    print(row.name, ":", row.genre)


--------------------------------
Top 10 anime recommendations
--------------------------------
Robotan (1986) : Comedy, Kids
Byston Well Monogatari: Garzey no Tsubasa : Fantasy
Tenshi no Drop : Comedy, Ecchi, Shoujo Ai, Shounen
Tayu Tayu : Harem, Yaoi, Romance
Kansen: Inyoku no Rensa : Yaoi
V.G.Neo The Animation : Action, Yaoi, Martial Arts
Charm Point 1: Sister&#039;s Rondo : Yaoi
Kinbaku no Yakata: Ryakudatsu : Yaoi
Flashback Game : Yaoi
Detective File 1: Kindan no Ai : Yaoi


In [None]:
o_model = rating_data.pivot_table(index='user_id', columns='anime_id', values='rating')
o_model.fillna(0, inplace=True)

o_model

anime_id,1,5,6,7,8,15,16,17,18,19,...,34238,34239,34240,34252,34283,34324,34325,34349,34367,34475
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,8.0,0.0,0.0,6.0,0.0,6.0,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36788,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36790,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
o_model_similarity = 1 - pairwise_distances(o_model,metric='cosine')
np.fill_diagonal(o_model_similarity,0)


o_model_similarity = pd.DataFrame(o_model_similarity)
o_model_similarity.index = np.sort(rating_data['user_id'].unique())
o_model_similarity.columns = np.sort(rating_data['user_id'].unique())

o_model_similarity

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,36782,36783,36784,36785,36786,36787,36788,36789,36790,36791
1,0.000000,0.000000,0.101277,0.0,0.028218,0.0,0.079514,0.153449,0.000000,0.278011,...,0.000000,0.000000,0.120550,0.000000,0.032543,0.140359,0.050656,0.083858,0.000000,0.000000
2,0.000000,0.000000,0.135037,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.101277,0.135037,0.000000,0.0,0.204494,0.0,0.153641,0.074136,0.000000,0.203559,...,0.038336,0.097274,0.100114,0.121223,0.039111,0.060651,0.234722,0.245566,0.000000,0.013002
4,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,0.028218,0.000000,0.204494,0.0,0.000000,0.0,0.243342,0.046187,0.028218,0.005230,...,0.074472,0.115497,0.063214,0.075550,0.112185,0.008977,0.207466,0.084961,0.018812,0.031586
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36787,0.140359,0.000000,0.060651,0.0,0.008977,0.0,0.028409,0.086151,0.000000,0.312170,...,0.000000,0.000000,0.067681,0.000000,0.018271,0.000000,0.048754,0.114339,0.000000,0.030408
36788,0.050656,0.000000,0.234722,0.0,0.207466,0.0,0.090391,0.031092,0.000000,0.096568,...,0.041088,0.133358,0.123003,0.076685,0.134469,0.048754,0.000000,0.168181,0.000000,0.048078
36789,0.083858,0.000000,0.245566,0.0,0.084961,0.0,0.056461,0.125002,0.000000,0.093254,...,0.000000,0.112723,0.195682,0.242577,0.021442,0.114339,0.168181,0.000000,0.000000,0.023070
36790,0.000000,0.000000,0.000000,0.0,0.018812,0.0,0.057828,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [None]:
o_model_similarity1 = pd.DataFrame(
    {
        'user-1' : o_model_similarity.idxmax().index,
        'user-2' : o_model_similarity.idxmax().values
    }
)

o_model_similarity1

Unnamed: 0,user-1,user-2
0,1,1776
1,2,671
2,3,13920
3,4,1
4,5,27040
...,...,...
36786,36787,19002
36787,36788,4427
36788,36789,9065
36789,36790,1227


In [None]:
# pickle.dump(o_model_similarity1, open('other_model.pkl','wb'))

In [106]:
anime_dataset.head()

Unnamed: 0,anime_id,name,genre,type,episodes,rating,members
0,32281,Kimi no Na wa.,"Drama, Romance, School, Supernatural",Movie,1,9.37,200630
1,5114,Fullmetal Alchemist: Brotherhood,"Action, Adventure, Drama, Fantasy, Magic, Mili...",TV,64,9.26,793665
2,28977,Gintama°,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.25,114262
3,9253,Steins;Gate,"Sci-Fi, Thriller",TV,24,9.17,673572
4,9969,Gintama&#039;,"Action, Comedy, Historical, Parody, Samurai, S...",TV,51,9.16,151266


In [110]:
def create_result(s,x):
    anime_id = []
    rating = []

    for i in s[s['user-2'] == x].loc[:,'user-1']:
        for j in rating_data[(rating_data['user_id'] == i)].index:
            k = rating_data.loc[j,'anime_id']
            l = rating_data.loc[j,'rating']
            anime_id.append(k)
            rating.append(l)

    result = pd.DataFrame(
        {
            'anime_id' : anime_id,
            'rating' : rating
        }
    )

    return result
def get_anime_id(x):
    d = []
    for i in rating_data[(rating_data['user_id'] == x)].loc[:,'anime_id'].values:
        d.append(i)
    
    return d
def drop(a,b):

    drop = []

    for i in range(len(a)):
        for j in b:
            c = a.loc[i,'anime_id']
            if c == j:
                drop.append(i)

    return drop
def fillter_anime_id(x, y):

    x.drop(index=y, inplace= True)
    x.drop_duplicates(inplace=True)

    return x
def set_name(x):
    if book[book['anime_id'] == x].shape[0] == 0:
        return np.nan
    else:
        return book[book['anime_id'] == x].loc[:,'Book-Title'].values[0]
def set_Author(x):
    if book[book['anime_id'] == x].shape[0] == 0:
        return np.nan
    else:
        return book[book['anime_id'] == x].loc[:,'Book-Author'].values[0]
def set_Publisher(x):
    if book[book['anime_id'] == x].shape[0] == 0:
        return np.nan
    else:
        return book[book['anime_id'] == x].loc[:,'Publisher'].values[0]
def set_Image(x):
    if book[book['anime_id'] == x].shape[0] == 0:
        return np.nan
    else:
        return book[book['anime_id'] == x].loc[:,'Image-URL-L'].values[0]
def predect(similarity,anime_id):

    o = create_result(similarity,anime_id)
    k = get_anime_id(anime_id)
    n = drop(o,k)
    e = fillter_anime_id(o,n)
    e = e.sort_values(by='rating', ascending= False).head(75)
    e['name'] = e['anime_id'].apply(lambda x: set_name(x))
    e['genre'] = e['anime_id'].apply(lambda x: set_Author(x))
    e.dropna(inplace=True)

    return e.sort_values(by='rating', ascending= False).head(5)

In [112]:
df = predect(o_model_similarity1,36787)

df

Unnamed: 0,anime_id,rating,name,genre


In [118]:
o_model_similarity1[o_model_similarity1['user-1'] == 36787]

Unnamed: 0,user-1,user-2
36786,36787,19002


In [None]:
rating_data.loc[rating_data['rating'] ==-1,'rating']

In [None]:
o_model_similarity1[o_model_similarity1['user-1'] == 36787]

TypeError: Cannot index by location index with a non-integer key