In [1]:
import pandas as pd
import tensorflow as tf

import numpy as np
import pandas as pd

from collections import defaultdict
import matplotlib.pyplot as plt

In [2]:
def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

In [3]:
def getAnimeName(anime_id):
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]
    except:
        print('error')
    
    return name

In [4]:
def getSypnopsis(anime):
    if isinstance(anime, int):
        return sypnopsis_df[sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
    if isinstance(anime, str):
        return sypnopsis_df[sypnopsis_df.Name == anime].sypnopsis.values[0]

In [5]:
def getAnimeFrame(anime):
    if isinstance(anime, int):
        return df[df.anime_id == anime]
    if isinstance(anime, str):
        return df[df.eng_version == anime]

In [6]:
def find_similar_animes(name, n=10, return_dist=False, neg=False):
    try:
        index = getAnimeFrame(name).anime_id.values[0]
        encoded_index = anime2anime_encoded.get(index)
        weights = anime_weights
        
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1            
        
        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]

        print('animes closest to {}'.format(name))

        if return_dist:
            return dists, closest
        
        rindex = df

        SimilarityArr = []

        for close in closest:
            decoded_id = anime_encoded2anime.get(close)
            sypnopsis = getSypnopsis(decoded_id)
            anime_frame = getAnimeFrame(decoded_id)
            
            anime_name = anime_frame.eng_version.values[0]
            genre = anime_frame.Genres.values[0]
            similarity = float(dists[close])
            SimilarityArr.append({"anime_id": decoded_id, "name": anime_name,
                                  "similarity": similarity,"genre": genre,
                                  'sypnopsis': sypnopsis})

        return SimilarityArr

    except Exception as e:
        print(e)
        print('{}!, Not Found in Anime list'.format(name))

In [7]:
def find_similar_users(item_input:int, n=10,return_dist=False, neg=False):
    try:
        index = item_input
        encoded_index = user2user_encoded.get(index)
        weights = user_weights
    
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1
        
        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]

        print('> users similar to #{}'.format(item_input))

        if return_dist:
            return dists, closest
        
        rindex = df
        SimilarityArr = []
        
        for close in closest:
            similarity = dists[close]

            if isinstance(item_input, int):
                decoded_id = user_encoded2user.get(close)
                SimilarityArr.append({"similar_users": decoded_id, 
                                      "similarity": similarity})
        print(SimilarityArr)
        Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", 
                                                        ascending=False)
        
        return Frame
    
    except Exception as e:
        print('find_similar_users')
        print(e)

In [8]:
def getFavGenre(frame, plot=False):
    try:
        frame.dropna(inplace=False)
        all_genres = defaultdict(int)
        
        genres_list = []
        for genres in frame['Genres']:
            if isinstance(genres, str):
                for genre in genres.split(','):
                    if genre.strip() not in genres_list:
                        genres_list.append(genre.strip())
                        all_genres[genre.strip()] += 1    
        
        print("> Genre list:")
        print(genres_list)
        return(genres_list)
    
    except Exception as e:
        print("getFavGenre")
        print(e)

In [9]:
def get_user_preferences(user_id, plot=False, verbose=0):
    try:    
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        user_rating_percentile = np.percentile(animes_watched_by_user.rating, 75)
        animes_watched_by_user = animes_watched_by_user[animes_watched_by_user.rating >= user_rating_percentile]
        top_animes_user = (
            animes_watched_by_user.sort_values(by="rating", ascending=False)#.head(10)
            .anime_id.values
        )
        
        anime_df_rows = df[df["anime_id"].isin(top_animes_user)]
        anime_df_rows = anime_df_rows[["anime_id","eng_version", "Genres"]]
        
        if verbose != 0:
            print("> User #{} has rated {} movies (avg. rating = {:.1f})".format(
            user_id, len(animes_watched_by_user),
            animes_watched_by_user['rating'].mean(),
            ))
        
            print('> preferred genres')
        
        if plot:
            genres_list = getFavGenre(anime_df_rows, plot)
            return anime_df_rows, genres_list#.eng_version.values
        
        return anime_df_rows
    except Exception as e:
        print("get_user_preferences")
        print(e)

In [10]:
def get_recommended_animes(similar_users, n=10):
    recommended_animes = []
    anime_list = []
    for user_id in similar_users.similar_users.values:
        user_pref = get_user_preferences(user_id, plot=True, verbose=1)
        pref_list, genres = get_user_preferences(int(user_id), verbose=0)
        pref_list = pref_list[~ pref_list.eng_version.isin(user_pref.eng_version.values)]
        anime_list.append(pref_list.eng_version.values)
        
    anime_list = pd.DataFrame(anime_list)
    sorted_list = pd.DataFrame(pd.Series(anime_list.values.ravel()).value_counts()).head(n)
    
    for i, anime_name in enumerate(sorted_list.index):        
        n_user_pref = sorted_list[sorted_list.index == anime_name].values[0][0]
        if isinstance(anime_name, str):
            try:
                frame = getAnimeFrame(anime_name)
                anime_id = frame.anime_id.values[0]
                genre = frame.Genres.values[0]
                sypnopsis = getSypnopsis(int(anime_id))
                recommended_animes.append({#"anime_id": anime_id ,
                                            "n": n_user_pref,
                                            "anime_name": anime_name, 
                                            "Genres": genre, 
                                            "sypnopsis": sypnopsis})
            except:
                pass
    
    return recommended_animes

In [11]:
MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = './saved_model/my_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [12]:
MODEL_PATH = '../saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)



In [13]:
INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

In [14]:
n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()

In [15]:
min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

In [16]:
duplicates = rating_df.duplicated()

if duplicates.sum() > 0:
    print('> {} duplicates'.format(duplicates.sum()))
    rating_df = rating_df[~duplicates]

> 1 duplicates


In [17]:
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

In [18]:
anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

In [19]:
type(user2user_encoded)

dict

In [20]:
user2user_encoded

{2: 0,
 6: 1,
 12: 2,
 16: 3,
 17: 4,
 19: 5,
 21: 6,
 41: 7,
 42: 8,
 44: 9,
 47: 10,
 53: 11,
 55: 12,
 60: 13,
 66: 14,
 73: 15,
 74: 16,
 85: 17,
 89: 18,
 90: 19,
 94: 20,
 98: 21,
 102: 22,
 108: 23,
 111: 24,
 112: 25,
 120: 26,
 121: 27,
 122: 28,
 135: 29,
 145: 30,
 146: 31,
 147: 32,
 153: 33,
 155: 34,
 156: 35,
 172: 36,
 174: 37,
 184: 38,
 190: 39,
 193: 40,
 194: 41,
 198: 42,
 204: 43,
 205: 44,
 209: 45,
 214: 46,
 219: 47,
 222: 48,
 227: 49,
 228: 50,
 235: 51,
 238: 52,
 240: 53,
 243: 54,
 248: 55,
 251: 56,
 252: 57,
 257: 58,
 264: 59,
 267: 60,
 272: 61,
 274: 62,
 275: 63,
 284: 64,
 285: 65,
 286: 66,
 290: 67,
 291: 68,
 293: 69,
 300: 70,
 301: 71,
 306: 72,
 308: 73,
 310: 74,
 313: 75,
 314: 76,
 316: 77,
 320: 78,
 321: 79,
 324: 80,
 325: 81,
 326: 82,
 327: 83,
 330: 84,
 336: 85,
 340: 86,
 345: 87,
 346: 88,
 349: 89,
 350: 90,
 366: 91,
 367: 92,
 371: 93,
 372: 94,
 375: 95,
 381: 96,
 382: 97,
 386: 98,
 389: 99,
 398: 100,
 405: 101,
 406: 102,
 

In [21]:
user_encoded2user

{0: 2,
 1: 6,
 2: 12,
 3: 16,
 4: 17,
 5: 19,
 6: 21,
 7: 41,
 8: 42,
 9: 44,
 10: 47,
 11: 53,
 12: 55,
 13: 60,
 14: 66,
 15: 73,
 16: 74,
 17: 85,
 18: 89,
 19: 90,
 20: 94,
 21: 98,
 22: 102,
 23: 108,
 24: 111,
 25: 112,
 26: 120,
 27: 121,
 28: 122,
 29: 135,
 30: 145,
 31: 146,
 32: 147,
 33: 153,
 34: 155,
 35: 156,
 36: 172,
 37: 174,
 38: 184,
 39: 190,
 40: 193,
 41: 194,
 42: 198,
 43: 204,
 44: 205,
 45: 209,
 46: 214,
 47: 219,
 48: 222,
 49: 227,
 50: 228,
 51: 235,
 52: 238,
 53: 240,
 54: 243,
 55: 248,
 56: 251,
 57: 252,
 58: 257,
 59: 264,
 60: 267,
 61: 272,
 62: 274,
 63: 275,
 64: 284,
 65: 285,
 66: 286,
 67: 290,
 68: 291,
 69: 293,
 70: 300,
 71: 301,
 72: 306,
 73: 308,
 74: 310,
 75: 313,
 76: 314,
 77: 316,
 78: 320,
 79: 321,
 80: 324,
 81: 325,
 82: 326,
 83: 327,
 84: 330,
 85: 336,
 86: 340,
 87: 345,
 88: 346,
 89: 349,
 90: 350,
 91: 366,
 92: 367,
 93: 371,
 94: 372,
 95: 375,
 96: 381,
 97: 382,
 98: 386,
 99: 389,
 100: 398,
 101: 405,
 102: 406,
 

In [22]:
anime_encoded2anime

{0: 24833,
 1: 235,
 2: 36721,
 3: 40956,
 4: 31933,
 5: 5042,
 6: 7593,
 7: 21,
 8: 35446,
 9: 24,
 10: 22,
 11: 38034,
 12: 17251,
 13: 5762,
 14: 31580,
 15: 33253,
 16: 35028,
 17: 9513,
 18: 368,
 19: 11633,
 20: 31964,
 21: 33486,
 22: 31740,
 23: 1470,
 24: 1575,
 25: 2904,
 26: 1535,
 27: 28223,
 28: 226,
 29: 38671,
 30: 32872,
 31: 15,
 32: 71,
 33: 7661,
 34: 245,
 35: 263,
 36: 5258,
 37: 270,
 38: 24703,
 39: 15451,
 40: 8074,
 41: 11061,
 42: 28961,
 43: 34542,
 44: 14719,
 45: 20899,
 46: 26055,
 47: 34933,
 48: 37086,
 49: 1604,
 50: 22535,
 51: 189,
 52: 34599,
 53: 29575,
 54: 10620,
 55: 32182,
 56: 23755,
 57: 20,
 58: 1735,
 59: 35581,
 60: 5040,
 61: 30276,
 62: 34134,
 63: 6893,
 64: 11499,
 65: 29786,
 66: 16498,
 67: 28171,
 68: 32282,
 69: 9253,
 70: 3455,
 71: 22319,
 72: 10851,
 73: 40658,
 74: 29809,
 75: 25397,
 76: 2852,
 77: 1635,
 78: 1636,
 79: 40337,
 80: 39085,
 81: 857,
 82: 9721,
 83: 36625,
 84: 6987,
 85: 8577,
 86: 9849,
 87: 4010,
 88: 16642,
 

In [23]:
n_animes

17560

In [24]:
n_users

91641

In [25]:
rating_df

Unnamed: 0,user_id,anime_id,rating,user,anime
213,2,24833,0.0,0,0
214,2,235,1.0,0,1
215,2,36721,0.0,0,2
216,2,40956,0.0,0,3
217,2,31933,0.0,0,4
...,...,...,...,...,...
109224268,353398,34086,0.0,91640,4428
109224269,353398,17909,0.0,91640,1453
109224270,353398,32924,0.0,91640,4837
109224271,353398,24627,0.0,91640,2095


In [26]:
anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

In [27]:
df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

In [28]:
df['anime_id'] = df['MAL_ID']
df["eng_version"] = df['English name']
df['eng_version'] = df.anime_id.apply(lambda x: getAnimeName(x))

In [29]:
df = df[["anime_id", "eng_version", 
         "Score", "Genres", "Episodes", 
         "Type", "Premiered", "Members"]]

In [30]:
cols = ["MAL_ID", "Name", "Genres", "sypnopsis"]
sypnopsis_df = pd.read_csv(INPUT_DIR + '/anime_with_synopsis.csv', usecols=cols)

In [31]:
users = []
    for i in range(5):
        ratings_per_user = rating_df.groupby('user_id').size()
        random_user = ratings_per_user[ratings_per_user < 500].sample(1, random_state=None).index[0]
        users.append(random_user)
users

IndentationError: unexpected indent (2447959008.py, line 2)

In [32]:
users = []
for i in range(5):
    ratings_per_user = rating_df.groupby('user_id').size()
    random_user = ratings_per_user[ratings_per_user < 500].sample(1, random_state=None).index[0]
    users.append(random_user)
users

[np.int64(141802),
 np.int64(33051),
 np.int64(40666),
 np.int64(44619),
 np.int64(239055)]

In [33]:
user_pref = []

In [34]:
for user in users:
    user_pref.append(get_recommendations(user))

NameError: name 'get_recommendations' is not defined

In [35]:
def get_recommendations(user_id):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:10]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        
        Results = []
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            rating = ratings[index]
            rating = float(np.nan_to_num(rating, nan=0.0, posinf=1.0, neginf=0.0))
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
                try:
                    condition = (df.anime_id == id_)
                    name = df[condition]['eng_version'].values[0]
                    genre = df[condition].Genres.values[0]
                    score = df[condition].Score.values[0]
                    sypnopsis = getSypnopsis(int(id_))
                except:
                    continue
                    
                Results.append({"anime_id": int(id_), 
                                "name": str(name), 
                                "pred_rating": float(rating),
                                "genre": str(genre), 
                                'sypnopsis': str(sypnopsis)})
        return Results
    
    except Exception as e:
        print(e)

In [36]:
for user in users:
    user_pref.append(get_recommendations(user))

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step 
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  


In [37]:
user_pref

[[{'anime_id': 33929,
   'name': 'Boku no Hero Academia: Sukue! Kyuujo Kunren!',
   'pred_rating': 0.9571894407272339,
   'genre': 'Action, Comedy, School, Shounen, Super Power',
   'sypnopsis': 'UA High School must regain the public\'s confidence after the surprise villain attack during class 1-A\'s training session. Although some of the teachers were gravely injured in the attack, Izuku "Deku" Midoriya and his classmates must continue to learn and train, and utilize their quirks in varying environments and circumstances. Boku no Hero Academia: Sukue! Kyuujo Kunren! follows class 1-A as they attempt to finally complete their training. However, there\'s a masked figure roaming around the training center. Have the villains responsible for the previous incident returned to finish the job? If so, are the students ready to fight back?'},
  {'anime_id': 42076,
   'name': 'How to Eat Life',
   'pred_rating': 0.9689503908157349,
   'genre': 'Music, Horror, Supernatural',
   'sypnopsis': 'usic

In [38]:
def get_recommendations(user_id):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:10]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        
        Results = []
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            rating = ratings[index]
            rating = float(np.nan_to_num(rating, nan=0.0, posinf=1.0, neginf=0.0))
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
                Results.append({"anime_id": int(id_)})
        return Results
    
    except Exception as e:
        print(e)

In [39]:
for user in users:
    user_pref.append(get_recommendations(user))

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step 
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step 
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [40]:
user_pref

[[{'anime_id': 33929,
   'name': 'Boku no Hero Academia: Sukue! Kyuujo Kunren!',
   'pred_rating': 0.9571894407272339,
   'genre': 'Action, Comedy, School, Shounen, Super Power',
   'sypnopsis': 'UA High School must regain the public\'s confidence after the surprise villain attack during class 1-A\'s training session. Although some of the teachers were gravely injured in the attack, Izuku "Deku" Midoriya and his classmates must continue to learn and train, and utilize their quirks in varying environments and circumstances. Boku no Hero Academia: Sukue! Kyuujo Kunren! follows class 1-A as they attempt to finally complete their training. However, there\'s a masked figure roaming around the training center. Have the villains responsible for the previous incident returned to finish the job? If so, are the students ready to fight back?'},
  {'anime_id': 42076,
   'name': 'How to Eat Life',
   'pred_rating': 0.9689503908157349,
   'genre': 'Music, Horror, Supernatural',
   'sypnopsis': 'usic

In [41]:
def get_recommendations(user_id):
    try:
        animes_watched_by_user = rating_df[rating_df.user_id == user_id]
        anime_not_watched_df = df[~df["anime_id"].isin(animes_watched_by_user.anime_id.values)]
        
        anime_not_watched = [
            anime2anime_encoded.get(x) for x in anime_not_watched_df["anime_id"]
            if x in anime2anime_encoded
        ]
        
        user_encoder = user2user_encoded.get(user_id)
        user_anime_array = [np.full(len(anime_not_watched), user_encoder), anime_not_watched]

        ratings = model.predict(user_anime_array).flatten()
        top_ratings_indices = (-ratings).argsort()[:10]

        return [anime_encoded2anime.get(anime_not_watched[x]) for x in top_ratings_indices]
    
    except Exception as e:
        print(e)
        return []

In [42]:
user_pref = []
for user in users:
    user_pref.append(get_recommendations(user))
user_pref

Unrecognized data type: x=[array([36683, 36683, 36683, ..., 36683, 36683, 36683]), [859, 1000, 1009, 9251, 31, 3421, 6250, 2487, 10, 7866, 9, 2150, 1001, 1590, 7153, 943, 2305, 1575, 4902, 1129, 2558, 1599, 766, 912, 2564, 515, 6150, 6578, 1286, 855, 545, 862, 3540, 3728, 727, 730, 844, 3725, 848, 4706, 32, 579, 161, 1319, 1439, 927, 2184, 937, 3562, 4600, 8085, 4601, 4073, 4607, 4608, 6836, 6365, 3561, 3828, 3847, 3827, 4071, 4193, 5086, 1380, 930, 932, 3742, 3581, 2770, 6688, 843, 6037, 3530, 6632, 7167, 2570, 4955, 7806, 6151, 4739, 2716, 11934, 7813, 6579, 6601, 3729, 821, 3610, 3463, 3614, 1793, 1612, 5508, 7588, 2476, 4365, 4366, 1325, 1533, 1335, 2483, 1346, 4488, 4486, 4487, 4665, 3629, 637, 3630, 3013, 3737, 2647, 4716, 663, 496, 6468, 976, 3520, 4541, 4651, 814, 670, 5490, 1619, 1403, 6635, 3571, 1408, 2406, 961, 3834, 1581, 7791, 984, 2632, 6659, 1002, 3746, 6661, 4540, 6146, 987, 6583, 6148, 2800, 7522, 51, 287, 2594, 2595, 1488, 5399, 1558, 3315, 6076, 977, 6266, 6273, 101

[[], [], [], [], []]

In [43]:
def get_recommendations(user_id):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:10]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)

In [44]:
user_pref = []
for user in users:
    user_pref.append(get_recommendations(user))
user_pref

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


[[33929, 42076, 22961, 23775, 23777, 24655, 24913, 37608, 31121, 31156],
 [41458, 42082, 39453, 44408, 46630, 47623, 48422, 48492, 30814, 32581],
 [4811, 7494, 7647, 8063, 9074, 32949, 10083, 10743, 28223, 30367],
 [37348, 7329, 40410, 34626, 17437, 19363, 19775, 24893, 38096, 29623],
 [4097, 39174, 7472, 40909, 42497, 36104, 36561, 28587, 31658, 31674]]

In [45]:
def get_recommendations(user_id):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:50]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)

In [46]:
user_pref = []
for user in users:
    user_pref.append(get_recommendations(user))
user_pref

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  


[[37402,
  39314,
  33929,
  39989,
  40105,
  40868,
  42076,
  42378,
  43590,
  12503,
  33071,
  35459,
  35466,
  14617,
  35551,
  15039,
  16866,
  36305,
  22961,
  23775,
  23777,
  24151,
  24655,
  24807,
  24913,
  24991,
  25291,
  37608,
  25781,
  26449,
  37921,
  37923,
  37924,
  27939,
  28285,
  28299,
  30458,
  31121,
  31156,
  31165,
  31297,
  31326,
  31553,
  31765,
  38921,
  32291,
  32343,
  32561,
  32634,
  32698],
 [38867,
  39110,
  39401,
  39403,
  39428,
  39488,
  39758,
  40621,
  8425,
  41277,
  41443,
  41458,
  41522,
  42082,
  42144,
  39453,
  42482,
  42542,
  42765,
  42766,
  42948,
  42966,
  42979,
  39158,
  43517,
  39504,
  43603,
  39507,
  43751,
  44075,
  44076,
  44389,
  44408,
  41448,
  12189,
  35026,
  39626,
  46630,
  39627,
  47623,
  48422,
  48466,
  48492,
  18983,
  26295,
  26321,
  30814,
  30927,
  38871,
  32581],
 [2815,
  33129,
  33220,
  37089,
  37401,
  4811,
  5774,
  39164,
  39911,
  7494,
  40410,
  76

In [47]:
from collections import defaultdict

In [48]:
def find_common_anime(user_recommendations, min_count=3):
    anime_count = defaultdict(int)
    for recommendations in user_recommendations:
        for anime_id in set(recommendations):
            anime_count[anime_id] += 1

    common_anime = [anime_id for anime_id, count in anime_count.items() if count >= min_count]

    return common_anime


In [49]:
common_anime_ids = find_common_anime(user_pref, min_count=3)

In [50]:
common_anime_ids

[]

In [51]:
def get_recommendations(user_id):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:100]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)

In [52]:
users = []
for i in range(5):
    ratings_per_user = rating_df.groupby('user_id').size()
    random_user = ratings_per_user[ratings_per_user < 500].sample(1, random_state=None).index[0]
    users.append(random_user)

for user in users:
    user_pref.append(get_recommendations(user))

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step 
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step 


In [53]:
common_anime_ids = find_common_anime(user_pref, min_count=3)

In [54]:
common_anime_ids

[48492, 21855]

In [55]:
common_anime_ids = find_common_anime(user_pref, min_count=2)
common_anime_ids

[39314,
 15039,
 32343,
 47623,
 48422,
 32581,
 39110,
 48466,
 8425,
 48492,
 26015,
 15037,
 2815,
 40410,
 39911,
 38410,
 16918,
 19363,
 443,
 21855,
 28025,
 185,
 186,
 372,
 32182,
 17913,
 15689,
 48470,
 48481,
 22535,
 40052,
 14075,
 25013,
 21995]

In [56]:
get_user_preferences(352464)

Unnamed: 0,anime_id,eng_version,Genres
25,44,Samurai X:Trust and Betrayal,"Action, Historical, Drama, Romance, Martial Ar..."
26,45,Rurouni Kenshin,"Action, Adventure, Comedy, Historical, Romance..."
27,46,Samurai X:The Motion Picture,"Samurai, Historical, Drama, Shounen"
30,49,Oh! My Goddess,"Comedy, Magic, Romance, Seinen, Supernatural"
31,50,Ah! My Goddess,"Comedy, Supernatural, Magic, Romance, Seinen"
...,...,...,...
10341,30091,Chaos Dragon,"Action, Fantasy, Supernatural"
10407,30191,Durarara!! x2 Shou:My Heart Is in the Pattern ...,"Action, Mystery, Supernatural"
10459,30296,Chivalry of a Failed Knight,"Action, Romance, Ecchi, Fantasy, School"
10463,30307,Monster Musume:Everyday Life with Monster Girls,"Harem, Comedy, Romance, Ecchi, Fantasy, Seinen"


In [57]:
get_user_preferences(352464, verbose=1)

> User #352464 has rated 454 movies (avg. rating = 0.0)
> preferred genres


Unnamed: 0,anime_id,eng_version,Genres
25,44,Samurai X:Trust and Betrayal,"Action, Historical, Drama, Romance, Martial Ar..."
26,45,Rurouni Kenshin,"Action, Adventure, Comedy, Historical, Romance..."
27,46,Samurai X:The Motion Picture,"Samurai, Historical, Drama, Shounen"
30,49,Oh! My Goddess,"Comedy, Magic, Romance, Seinen, Supernatural"
31,50,Ah! My Goddess,"Comedy, Supernatural, Magic, Romance, Seinen"
...,...,...,...
10341,30091,Chaos Dragon,"Action, Fantasy, Supernatural"
10407,30191,Durarara!! x2 Shou:My Heart Is in the Pattern ...,"Action, Mystery, Supernatural"
10459,30296,Chivalry of a Failed Knight,"Action, Romance, Ecchi, Fantasy, School"
10463,30307,Monster Musume:Everyday Life with Monster Girls,"Harem, Comedy, Romance, Ecchi, Fantasy, Seinen"


In [58]:
type(get_user_preferences(352464))

pandas.core.frame.DataFrame

In [59]:
def get_user_preferences(user_id, plot=False, verbose=0):
    try:    
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        user_rating_percentile = np.percentile(animes_watched_by_user.rating, 75)
        animes_watched_by_user = animes_watched_by_user[animes_watched_by_user.rating >= user_rating_percentile]
        top_animes_user = (
            animes_watched_by_user.sort_values(by="rating", ascending=False)#.head(10)
            .anime_id.values
        )
        
        anime_df_rows = df[df["anime_id"].isin(top_animes_user)]
        anime_df_rows = anime_df_rows[["anime_id","eng_version", "Genres"]]
        
        if verbose != 0:
            print("> User #{} has rated {} movies (avg. rating = {:.1f})".format(
            user_id, len(animes_watched_by_user),
            animes_watched_by_user['rating'].mean(),
            ))
        
            print('> preferred genres')
        
        if plot:
            genres_list = getFavGenre(anime_df_rows, plot)
            return anime_df_rows, genres_list#.eng_version.values
        
        return anime_df_rows
    except Exception as e:
        print("get_user_preferences")
        print(e)

In [60]:
get_user_preferences(352464)

Unnamed: 0,anime_id,eng_version,Genres
25,44,Samurai X:Trust and Betrayal,"Action, Historical, Drama, Romance, Martial Ar..."
26,45,Rurouni Kenshin,"Action, Adventure, Comedy, Historical, Romance..."
27,46,Samurai X:The Motion Picture,"Samurai, Historical, Drama, Shounen"
30,49,Oh! My Goddess,"Comedy, Magic, Romance, Seinen, Supernatural"
31,50,Ah! My Goddess,"Comedy, Supernatural, Magic, Romance, Seinen"
...,...,...,...
10341,30091,Chaos Dragon,"Action, Fantasy, Supernatural"
10407,30191,Durarara!! x2 Shou:My Heart Is in the Pattern ...,"Action, Mystery, Supernatural"
10459,30296,Chivalry of a Failed Knight,"Action, Romance, Ecchi, Fantasy, School"
10463,30307,Monster Musume:Everyday Life with Monster Girls,"Harem, Comedy, Romance, Ecchi, Fantasy, Seinen"


In [61]:
anime2anime_encoded

{24833: 0,
 235: 1,
 36721: 2,
 40956: 3,
 31933: 4,
 5042: 5,
 7593: 6,
 21: 7,
 35446: 8,
 24: 9,
 22: 10,
 38034: 11,
 17251: 12,
 5762: 13,
 31580: 14,
 33253: 15,
 35028: 16,
 9513: 17,
 368: 18,
 11633: 19,
 31964: 20,
 33486: 21,
 31740: 22,
 1470: 23,
 1575: 24,
 2904: 25,
 1535: 26,
 28223: 27,
 226: 28,
 38671: 29,
 32872: 30,
 15: 31,
 71: 32,
 7661: 33,
 245: 34,
 263: 35,
 5258: 36,
 270: 37,
 24703: 38,
 15451: 39,
 8074: 40,
 11061: 41,
 28961: 42,
 34542: 43,
 14719: 44,
 20899: 45,
 26055: 46,
 34933: 47,
 37086: 48,
 1604: 49,
 22535: 50,
 189: 51,
 34599: 52,
 29575: 53,
 10620: 54,
 32182: 55,
 23755: 56,
 20: 57,
 1735: 58,
 35581: 59,
 5040: 60,
 30276: 61,
 34134: 62,
 6893: 63,
 11499: 64,
 29786: 65,
 16498: 66,
 28171: 67,
 32282: 68,
 9253: 69,
 3455: 70,
 22319: 71,
 10851: 72,
 40658: 73,
 29809: 74,
 25397: 75,
 2852: 76,
 1635: 77,
 1636: 78,
 40337: 79,
 39085: 80,
 857: 81,
 9721: 82,
 36625: 83,
 6987: 84,
 8577: 85,
 9849: 86,
 4010: 87,
 16642: 88,
 

In [62]:
def find_similar_animes(name, n=10, return_dist=False, neg=False):
    try:
        index = getAnimeFrame(name).anime_id.values[0]
        print(index)
        encoded_index = anime2anime_encoded.get(index)
        weights = anime_weights
        
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1            
        closest = sorted_dists[-n:]
        
        SimilarityArr = []

        for close in closest:
            decoded_id = anime_encoded2anime.get(close)
            SimilarityArr.append({"anime_id": decoded_id})

        return SimilarityArr

    except Exception as e:
        print(e)
        print('{}!, Not Found in Anime list'.format(name))


In [63]:
find_similar_animes(368)

368


[{'anime_id': 2411},
 {'anime_id': 7456},
 {'anime_id': 1787},
 {'anime_id': 2413},
 {'anime_id': 2852},
 {'anime_id': 2432},
 {'anime_id': 5347},
 {'anime_id': 2873},
 {'anime_id': 1401},
 {'anime_id': 972},
 {'anime_id': 368}]

In [64]:
def find_similar_animes(name, n=10, return_dist=False, neg=False):
    try:
        index = getAnimeFrame(name).anime_id.values[0]
        print(index)
        encoded_index = anime2anime_encoded.get(index)
        weights = anime_weights
        
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1            
        closest = sorted_dists[-n:]
        
        SimilarityArr = []

        for close in closest:
            decoded_id = anime_encoded2anime.get(close)
            SimilarityArr.append(decoded_id)

        return SimilarityArr

    except Exception as e:
        print(e)
        print('{}!, Not Found in Anime list'.format(name))

In [65]:
find_similar_animes(368)

368


[2411, 7456, 1787, 2413, 2852, 2432, 5347, 2873, 1401, 972, 368]

In [66]:
find_similar_animes(2904)

2904


[356, 10087, 121, 11741, 16498, 9253, 6547, 5114, 1535, 1575, 2904]

In [1]:
get_group_recommendations()

NameError: name 'get_group_recommendations' is not defined

In [2]:
import pandas as pd
import tensorflow as tf

import numpy as np
import pandas as pd

from collections import defaultdict
import matplotlib.pyplot as plt

from fastapi import FastAPI

app = FastAPI()

def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

def getAnimeName(anime_id):
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]
    except:
        print('error')
    
    return name

def getSypnopsis(anime):
    if isinstance(anime, int):
        return sypnopsis_df[sypnopsis_df.MAL_ID == anime].sypnopsis.values[0]
    if isinstance(anime, str):
        return sypnopsis_df[sypnopsis_df.Name == anime].sypnopsis.values[0]

def getAnimeFrame(anime):
    if isinstance(anime, int):
        return df[df.anime_id == anime]
    if isinstance(anime, str):
        return df[df.eng_version == anime]

def find_similar_animes(name, n=10, return_dist=False, neg=False):
    try:
        index = getAnimeFrame(name).anime_id.values[0]
        encoded_index = anime2anime_encoded.get(index)
        weights = anime_weights
        
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1            
        closest = sorted_dists[-n:]
        
        SimilarityArr = []

        for close in closest:
            decoded_id = anime_encoded2anime.get(close)
            SimilarityArr.append(decoded_id)

        return SimilarityArr

    except Exception as e:
        print(e)
        print('{}!, Not Found in Anime list'.format(name))

def find_similar_users(item_input:int, n=10,return_dist=False, neg=False):
    try:
        index = item_input
        encoded_index = user2user_encoded.get(index)
        weights = user_weights
    
        dists = np.dot(weights, weights[encoded_index])
        sorted_dists = np.argsort(dists)
        
        n = n + 1
        
        if neg:
            closest = sorted_dists[:n]
        else:
            closest = sorted_dists[-n:]

        print('> users similar to #{}'.format(item_input))

        if return_dist:
            return dists, closest
        
        rindex = df
        SimilarityArr = []
        
        for close in closest:
            similarity = dists[close]

            if isinstance(item_input, int):
                decoded_id = user_encoded2user.get(close)
                SimilarityArr.append({"similar_users": decoded_id, 
                                      "similarity": similarity})
        print(SimilarityArr)
        Frame = pd.DataFrame(SimilarityArr).sort_values(by="similarity", 
                                                        ascending=False)
        
        return Frame
    
    except Exception as e:
        print('find_similar_users')
        print(e)

def getFavGenre(frame, plot=False):
    try:
        frame.dropna(inplace=False)
        all_genres = defaultdict(int)
        
        genres_list = []
        for genres in frame['Genres']:
            if isinstance(genres, str):
                for genre in genres.split(','):
                    if genre.strip() not in genres_list:
                        genres_list.append(genre.strip())
                        all_genres[genre.strip()] += 1    
        
        print("> Genre list:")
        print(genres_list)
        return(genres_list)
    
    except Exception as e:
        print("getFavGenre")
        print(e)

def get_user_preferences(user_id, plot=False, verbose=0):
    try:    
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        user_rating_percentile = np.percentile(animes_watched_by_user.rating, 75)
        animes_watched_by_user = animes_watched_by_user[animes_watched_by_user.rating >= user_rating_percentile]
        top_animes_user = (
            animes_watched_by_user.sort_values(by="rating", ascending=False)#.head(10)
            .anime_id.values
        )
        
        anime_df_rows = df[df["anime_id"].isin(top_animes_user)]
        anime_df_rows = anime_df_rows[["anime_id","eng_version", "Genres"]]
        
        if verbose != 0:
            print("> User #{} has rated {} movies (avg. rating = {:.1f})".format(
            user_id, len(animes_watched_by_user),
            animes_watched_by_user['rating'].mean(),
            ))
        
        
        if plot:
            print('> preferred genres')
            genres_list = getFavGenre(anime_df_rows, plot)
            return anime_df_rows, genres_list#.eng_version.values
        
        return anime_df_rows
    except Exception as e:
        print("get_user_preferences")
        print(e)

def GetRandomUsers():
    users = []
    for i in range(5):
        ratings_per_user = rating_df.groupby('user_id').size()
        random_user = ratings_per_user[ratings_per_user < 500].sample(1, random_state=None).index[0]
        users.append(random_user)
    return users

def GetGroupRecommendation(users):
    user_pref = []
    for user in users:
        user_pref.append(get_recommendations(user))
    common_anime_ids = find_common_anime(user_pref, min_count=3)
    return common_anime_ids

def find_common_anime(user_recommendations, min_count=3):
    anime_count = defaultdict(int)
    for recommendations in user_recommendations:
        for anime_id in set(recommendations):
            anime_count[anime_id] += 1

    common_anime = [anime_id for anime_id, count in anime_count.items() if count >= min_count]

    return common_anime

In [3]:
MODEL_PATH = '../saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)



In [4]:
INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()

min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

df['anime_id'] = df['MAL_ID']
df["eng_version"] = df['English name']
df['eng_version'] = df.anime_id.apply(lambda x: getAnimeName(x))

df = df[["anime_id", "eng_version", 
         "Score", "Genres", "Episodes", 
         "Type", "Premiered", "Members"]]

cols = ["MAL_ID", "Name", "Genres", "sypnopsis"]
sypnopsis_df = pd.read_csv(INPUT_DIR + '/anime_with_synopsis.csv', usecols=cols)

In [5]:
get_group_recommendations()

NameError: name 'get_group_recommendations' is not defined

In [6]:
def get_recommendations(user_id, n=500):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:int(n)]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids

SyntaxError: incomplete input (3197483944.py, line 36)

In [7]:
def get_recommendations(user_id, n=500):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:int(n)]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    except Exception as e:
        print(e)

In [8]:
def get_group_recommendations():
    try:
        users = GetRandomUsers()
        user_pref = []
        for user in users:
            user_pref.append(get_recommendations(user))
        common_anime_ids = find_common_anime(user_pref, min_count=3)
        return common_anime_ids
    
    except Exception as e: 
        print("/get-group-recommendation")
        print(e)

In [9]:
get_group_recommendations()

[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


[[14349,
  36904,
  6280,
  2313,
  6505,
  6675,
  31321,
  731,
  15077,
  9204,
  37985,
  5365,
  29943,
  38410,
  38815,
  5,
  18,
  137,
  185,
  186,
  570,
  27663,
  5397,
  5578,
  8743,
  2666,
  13629]]

In [10]:
type(get_group_recommendations())

[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  


list

In [11]:
def get_group_recommendations():
    try:
        users = GetRandomUsers()
        user_pref = []
        for user in users:
            user_pref.append(get_recommendations(user))
        common_anime_ids = find_common_anime(user_pref, min_count=3)
        return {"anime_ids": common_anime_ids, "user_ids": users}
    
    except Exception as e: 
        print("/get-group-recommendation")
        print(e)

In [12]:
get_group_recommendations()

[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step  
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step  


[16894,
 2815,
 11113,
 37985,
 40215,
 9547,
 7664,
 42496,
 42497,
 32380,
 30347,
 24415,
 532,
 3081,
 1239,
 38234,
 26055,
 5712,
 2952,
 36466]

In [13]:
def get_group_recommendations():
    try:
        users = GetRandomUsers()
        user_pref = []
        for user in users:
            user_pref.append(get_recommendations(user))
        common_anime_ids = find_common_anime(user_pref, min_count=3)
        return {"anime_ids": common_anime_ids, "user_ids": (int)users}
    
    except Exception as e: 
        print("/get-group-recommendation")
        print(e)

In [14]:
get_group_recommendations()

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m534/534[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m536/536[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m537/537[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


{'anime_ids': [65,
  2150,
  2235,
  644,
  13281,
  7465,
  30263,
  5712,
  20159,
  12067,
  1943,
  2117,
  2407,
  4471,
  16782,
  5177,
  34240,
  24459],
 'user_ids': [np.int64(223683),
  np.int64(332916),
  np.int64(244477),
  np.int64(150007),
  np.int64(96040)]}

In [15]:
def get_group_recommendations():
    try:
        users = GetRandomUsers()
        user_pref = []
        for user in users:
            user_pref.append(get_recommendations(user))
        common_anime_ids = find_common_anime(user_pref, min_count=3)
        return {"anime_ids": common_anime_ids, "user_ids": (int)users}
    
    except Exception as e: 
        print("/get-group-recommendation")
        print(e)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (302297665.py, line 8)

In [16]:
def get_group_recommendations():
    try:
        users = GetRandomUsers()
        user_pref = []
        for user in users:
            user_pref.append(get_recommendations(user))
        common_anime_ids = find_common_anime(user_pref, min_count=3)
        return {"anime_ids": common_anime_ids, "user_ids": users}
    
    except Exception as e: 
        print("/get-group-recommendation")
        print(e)

In [17]:
get_user_preferences(223683)

Unnamed: 0,anime_id,eng_version,Genres
0,1,Cowboy Bebop,"Action, Adventure, Comedy, Drama, Sci-Fi, Space"
1,5,Cowboy Bebop:The Movie,"Action, Drama, Mystery, Sci-Fi, Space"
2,6,Trigun,"Action, Sci-Fi, Adventure, Comedy, Drama, Shounen"
3,7,Witch Hunter Robin,"Action, Mystery, Police, Supernatural, Drama, ..."
8,18,Initial D Fourth Stage,"Action, Cars, Sports, Drama, Seinen"
...,...,...,...
4509,6211,Tokyo Magnitude 8.0,Drama
4649,6573,Darker than Black:Gemini of the Meteor,"Action, Sci-Fi, Mystery, Super Power"
5683,9253,Steins;Gate,"Thriller, Sci-Fi"
6006,9969,Gintama Season 2,"Action, Sci-Fi, Comedy, Historical, Parody, Sa..."
