In [2]:
import pandas as pd
import tensorflow as tf
import numpy as np

from collections import defaultdict


def getAnimeName(anime_id):
    try:
        name = df[df.anime_id == anime_id].eng_version.values[0]
        if name is np.nan:
            name = df[df.anime_id == anime_id].Name.values[0]
    except:
        print('error')
    
    return name


MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

#global variables initialization

INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()

min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

# encoding categorical data start

user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

#encoding categorical data end

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

df['anime_id'] = df['MAL_ID']
df["eng_version"] = df['English name']
df['eng_version'] = df.anime_id.apply(lambda x: getAnimeName(x))

df = df[["anime_id", "eng_version", "Genres", "Score"]]
print("Server ready... ")

def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights


# recommendation function

def get_recommendations(user_id, n=500):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:int(n)]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)




Server ready... 


In [3]:
get_recommendations(105315, 10)

[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step


[2512, 2514, 4418, 37893, 6438, 40591, 19511, 22335, 22661, 31994]

In [7]:
from sklearn.metrics import confusion_matrix, classification_report

def evaluate_recommendations(user_id):
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 7)]['anime_id'])
    predicted_animes = set(get_recommendations(user_id) or [])  # Ensure it's a set

    print(true_animes)
    all_anime = set(true_animes).union(predicted_animes)
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]

    if len(y_true) == 0:  # Handle case where there are no recommendations or true labels
        print(f"No recommendations or true data for user {user_id}")
        return None

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))
    return cm

# Example usage
user_id = 0
evaluate_recommendations(user_id)


Invalid dtype: object
set()
No recommendations or true data for user 0


In [8]:
rating_df[(rating_df.user_id == 0) & (rating_df.rating >= 7)]['anime_id']

Series([], Name: anime_id, dtype: int64)

In [9]:
rating_df[(rating_df["user_id"] == 0) & (rating_df["rating"] >= 7)]['anime_id']

Series([], Name: anime_id, dtype: int64)

In [10]:
rating_df

Unnamed: 0,user_id,anime_id,rating,user,anime
213,2,24833,0.0,0,0
214,2,235,1.0,0,1
215,2,36721,0.0,0,2
216,2,40956,0.0,0,3
217,2,31933,0.0,0,4
...,...,...,...,...,...
109224268,353398,34086,0.0,91640,4428
109224269,353398,17909,0.0,91640,1453
109224270,353398,32924,0.0,91640,4837
109224271,353398,24627,0.0,91640,2095


In [11]:
rating_df[(rating_df["rating"] >= 7)]['anime_id']

Series([], Name: anime_id, dtype: int64)

In [12]:
rating_df["rating"]

213          0.0
214          1.0
215          0.0
216          0.0
217          0.0
            ... 
109224268    0.0
109224269    0.0
109224270    0.0
109224271    0.0
109224272    0.0
Name: rating, Length: 71418114, dtype: float64

In [15]:
rating_df[(rating_df["anime_id"] >= 7.0)]

Unnamed: 0,user_id,anime_id,rating,user,anime
213,2,24833,0.0,0,0
214,2,235,1.0,0,1
215,2,36721,0.0,0,2
216,2,40956,0.0,0,3
217,2,31933,0.0,0,4
...,...,...,...,...,...
109224268,353398,34086,0.0,91640,4428
109224269,353398,17909,0.0,91640,1453
109224270,353398,32924,0.0,91640,4837
109224271,353398,24627,0.0,91640,2095


In [16]:
rating_df[(rating_df["rating"] >= 7.0)]

Unnamed: 0,user_id,anime_id,rating,user,anime


In [17]:
rating_df[(rating_df["rating"] >= 7.0)]

Unnamed: 0,user_id,anime_id,rating,user,anime


In [19]:
rating_df[(int(rating_df["rating"]) >= 3.0)]

TypeError: cannot convert the series to <class 'int'>

In [20]:
user_id = 0

In [21]:
INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()

MemoryError: Unable to allocate 1.00 MiB for an array with shape (131072,) and data type int64

In [22]:
user_id

0

In [23]:
user_id = 105315

In [24]:
INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()

MemoryError: Unable to allocate 2.44 GiB for an array with shape (3, 109224747) and data type int64

In [25]:
rating_df[(rating_df["user_id"] == 105315)]['anime_id']

32455124    39783
32455125    37823
32455126    41074
32455127    40852
32455128    40454
            ...  
32455590    14355
32455591    36726
32455592    10495
32455593    43325
32455594    37976
Name: anime_id, Length: 471, dtype: int64

In [35]:
rating_df[(rating_df["user_id"] == 105315) & (rating_df["rating"] >= np.float64(7.0))]['anime_id']

Series([], Name: anime_id, dtype: int64)

In [31]:
rating_df[(rating_df["user_id"] == 105315) & (rating_df["rating"] >= 7.0)]

Unnamed: 0,user_id,anime_id,rating,user,anime


In [32]:

rating_df["rating"].dtype

dtype('float64')

In [39]:
rating_df[(rating_df["user_id"] == 105315)]['rating']

32455124    0.0
32455125    0.7
32455126    0.8
32455127    0.0
32455128    0.0
           ... 
32455590    0.0
32455591    0.0
32455592    0.0
32455593    0.0
32455594    0.0
Name: rating, Length: 471, dtype: float64

In [40]:
rating_df[(np.float64(rating_df["rating"]) >= np.float64(0.7))]['anime_id']

MemoryError: Unable to allocate 220. MiB for an array with shape (28796952, 1) and data type int64

In [42]:
from sklearn.metrics import confusion_matrix, classification_report

def evaluate_recommendations(user_id):
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 0.8)]['anime_id'])
    predicted_animes = set(get_recommendations(user_id) or [])  # Ensures a valid set

    all_anime = true_animes.union(predicted_animes)  # Combine all anime IDs
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]

    if not y_true:  # If there's no data, avoid further computation
        print(f"No recommendations or relevant data for user {user_id}")
        return None

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:\n", cm)
    
    # 🛠️ FIX: Handle undefined metrics by setting `zero_division=0`
    print("\nClassification Report:\n", classification_report(y_true, y_pred, zero_division=0))
    
    return cm

# Example usage
user_id = 105315
evaluate_recommendations(user_id)


[1m535/535[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
Confusion Matrix:
 [[  0 500]
 [190   0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     500.0
           1       0.00      0.00      0.00     190.0

    accuracy                           0.00     690.0
   macro avg       0.00      0.00      0.00     690.0
weighted avg       0.00      0.00      0.00     690.0



array([[  0, 500],
       [190,   0]])

In [43]:
def get_recommendations(user_id, n=500):
    try:
        # Ensure user_id is of correct type
        user_id = np.int64(user_id)

        # Get all the available animes in the system
        all_animes = df['anime_id'].unique()

        # Encode all anime IDs using the encoder
        all_animes_encoded = [anime2anime_encoded.get(x) for x in all_animes if anime2anime_encoded.get(x) is not None]
        
        # Get the user encoder (mapping user_id to a numerical value)
        user_encoder = user2user_encoded.get(user_id)
        
        # Prepare the input data for prediction (user and anime IDs)
        user_anime_array = np.hstack(
            ([[user_encoder]] * len(all_animes_encoded), all_animes_encoded)
        )

        # Separate the data into user and anime ID columns for prediction
        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]

        # Get the predicted ratings for all animes
        ratings = model.predict(user_anime_array).flatten()

        # Get the top 'n' anime recommendations based on the highest predicted ratings
        top_ratings_indices = (-ratings).argsort()[:int(n)]

        # Get the anime IDs corresponding to the top recommendations
        recommended_anime_ids = [
            anime_encoded2anime.get(all_animes_encoded[x]) for x in top_ratings_indices
        ]
        
        return recommended_anime_ids
    
    except Exception as e:
        print(e)
        return []


In [44]:
user_id = 105315
evaluate_recommendations(user_id)

all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
Confusion Matrix:
 [[  0   0]
 [190   0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       0.0
           1       0.00      0.00      0.00     190.0

    accuracy                           0.00     190.0
   macro avg       0.00      0.00      0.00     190.0
weighted avg       0.00      0.00      0.00     190.0



array([[  0,   0],
       [190,   0]])