In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Add, Activation, Lambda, BatchNormalization, Concatenate, Dropout, Input, Embedding, Dot, Reshape, Dense, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from wordcloud import WordCloud
from collections import defaultdict
import matplotlib.pyplot as plt
from IPython.display import FileLink
%matplotlib inline

In [2]:
INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

In [3]:
n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()
len(rating_df)

71418114

In [4]:
# Scaling BTW (0 , 1.0)
min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

AvgRating = np.mean(rating_df['rating'])

In [5]:
duplicates = rating_df.duplicated()

if duplicates.sum() > 0:
    print('> {} duplicates'.format(duplicates.sum()))
    rating_df = rating_df[~duplicates]

print('> {} duplicates'.format(rating_df.duplicated().sum()))

> 1 duplicates
> 0 duplicates


In [6]:
g = rating_df.groupby('user_id')['rating'].count()
top_users = g.dropna().sort_values(ascending=False)[:20]
top_r = rating_df.join(top_users, rsuffix='_r', how='inner', on='user_id')

g = rating_df.groupby('anime_id')['rating'].count()
top_animes = g.dropna().sort_values(ascending=False)[:20]
top_r = top_r.join(top_animes, rsuffix='_r', how='inner', on='anime_id')

pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)

  pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)


anime_id,226,1535,1575,2001,2167,4224,5081,5114,6547,6746,9253,9989,10620,11757,15809,16498,19815,20507,22319,30276
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
20807,1.0,1.0,1.0,1.0,0.8,1.0,0.9,1.0,1.0,1.0,1.0,0.9,1.0,1.0,0.8,1.0,1.0,1.0,1.0,1.0
50485,0.0,0.6,1.0,1.0,0.9,1.0,1.0,1.0,1.0,0.9,1.0,0.9,0.6,1.0,1.0,0.9,1.0,1.0,1.0,0.8
63900,0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.9,0.0,0.0,0.0,0.8,0.0,0.0,0.0,0.8,0.0,0.8,0.8,0.8
68042,0.8,1.0,0.9,1.0,0.7,0.9,1.0,0.9,0.7,0.8,0.9,0.8,0.4,0.4,0.6,0.9,0.3,0.2,0.5,0.9
85472,0.7,0.0,0.0,1.0,0.9,0.9,0.0,0.0,1.0,0.0,0.8,0.0,0.8,0.9,0.8,0.8,0.7,0.7,0.7,0.9
92529,0.9,0.9,1.0,1.0,0.0,0.8,0.0,0.9,0.9,0.0,1.0,0.0,0.8,0.9,0.8,0.9,0.9,0.8,0.8,1.0
122341,0.2,0.3,0.2,0.7,0.4,0.3,0.3,0.8,0.5,0.6,0.7,0.3,0.2,0.1,0.3,0.1,0.4,0.4,0.2,0.6
131988,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
140590,0.3,0.6,0.5,0.8,0.6,0.8,0.8,0.9,0.5,0.8,0.9,0.7,0.3,0.4,0.6,0.5,0.7,0.7,0.5,0.7
147331,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# Encoding categorical data
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

print("Num of users: {}, Num of animes: {}".format(n_users, n_animes))
print("Min rating: {}, Max rating: {}".format(min(rating_df['rating']), max(rating_df['rating'])))

Num of users: 91641, Num of animes: 17560
Min rating: 0.0, Max rating: 1.0


In [8]:
# Shuffle
rating_df = rating_df.sample(frac=1, random_state=73)

X = rating_df[['user', 'anime']].values
y = rating_df["rating"]

In [9]:
# Split
test_set_size = 10000 #10k for test set
train_indices = rating_df.shape[0] - test_set_size 

X_train, X_test, y_train, y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

print('> Train set ratings: {}'.format(len(y_train)))
print('> Test set ratings: {}'.format(len(y_test)))

> Train set ratings: 71408113
> Test set ratings: 10000


In [10]:
MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = './saved_model/my_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [11]:
MODEL_PATH = '../saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)



In [12]:
def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

In [13]:
df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)


In [14]:
def evaluate_model(model, X_test, y_test):
    """
    Evaluates the trained model on the test set and prints key performance metrics.

    Parameters:
        model: Loaded Keras model
        X_test: List of test input arrays [users, animes]
        y_test: Actual ratings in the test set
    """
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print results
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2}


In [15]:
metrics = evaluate_model(model, X_test_array, y_test)
metrics  

NameError: name 'X_test_array' is not defined

In [16]:
# Shuffle and split test data
rating_df = rating_df.sample(frac=1, random_state=73)
train_indices = rating_df.shape[0] - test_set_size 

# Extract test data
X_test = rating_df[['user', 'anime']].values[train_indices:]
y_test = rating_df["rating"].values[train_indices:]

X_test_array = [X_test[:, 0], X_test[:, 1]]  # Prepare test input format


In [17]:
metrics = evaluate_model(model, X_test_array, y_test)
metrics  # Display metrics as output

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step    


NameError: name 'mean_absolute_error' is not defined

In [18]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [19]:
metrics = evaluate_model(model, X_test_array, y_test)
metrics  # Display metrics as output

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Mean Absolute Error (MAE): 0.1620
Mean Squared Error (MSE): 0.0550
Root Mean Squared Error (RMSE): 0.2345
R² Score: 0.6277


{'MAE': 0.1620166979283857,
 'MSE': 0.05497119133816982,
 'RMSE': np.float64(0.2344593596727796),
 'R2': 0.6276701567864817}

In [1]:

def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm


In [2]:
metrics = evaluate_model(model, X_test_array, y_test)

NameError: name 'evaluate_model' is not defined

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Add, Activation, Lambda, BatchNormalization, Concatenate, Dropout, Input, Embedding, Dot, Reshape, Dense, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from wordcloud import WordCloud
from collections import defaultdict
import matplotlib.pyplot as plt
from IPython.display import FileLink
%matplotlib inline

INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()
len(rating_df)

# Scaling BTW (0 , 1.0)
min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

AvgRating = np.mean(rating_df['rating'])

duplicates = rating_df.duplicated()

if duplicates.sum() > 0:
    print('> {} duplicates'.format(duplicates.sum()))
    rating_df = rating_df[~duplicates]

print('> {} duplicates'.format(rating_df.duplicated().sum()))

g = rating_df.groupby('user_id')['rating'].count()
top_users = g.dropna().sort_values(ascending=False)[:20]
top_r = rating_df.join(top_users, rsuffix='_r', how='inner', on='user_id')

g = rating_df.groupby('anime_id')['rating'].count()
top_animes = g.dropna().sort_values(ascending=False)[:20]
top_r = top_r.join(top_animes, rsuffix='_r', how='inner', on='anime_id')

pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)

# Encoding categorical data
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

print("Num of users: {}, Num of animes: {}".format(n_users, n_animes))
print("Min rating: {}, Max rating: {}".format(min(rating_df['rating']), max(rating_df['rating'])))

# Shuffle
rating_df = rating_df.sample(frac=1, random_state=73)

X = rating_df[['user', 'anime']].values
y = rating_df["rating"]

# Split
test_set_size = 10000 #10k for test set
train_indices = rating_df.shape[0] - test_set_size 

X_train, X_test, y_train, y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

print('> Train set ratings: {}'.format(len(y_train)))
print('> Test set ratings: {}'.format(len(y_test)))

MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

def evaluate_model(model, X_test, y_test):
    """
    Evaluates the trained model on the test set and prints key performance metrics.

    Parameters:
        model: Loaded Keras model
        X_test: List of test input arrays [users, animes]
        y_test: Actual ratings in the test set
    """
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print results
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2}


def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm


> 1 duplicates
> 0 duplicates


  pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)


Num of users: 91641, Num of animes: 17560


KeyboardInterrupt: 

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Add, Activation, Lambda, BatchNormalization, Concatenate, Dropout, Input, Embedding, Dot, Reshape, Dense, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from wordcloud import WordCloud
from collections import defaultdict
import matplotlib.pyplot as plt
from IPython.display import FileLink
%matplotlib inline

INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()
len(rating_df)

# Scaling BTW (0 , 1.0)
min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

AvgRating = np.mean(rating_df['rating'])

duplicates = rating_df.duplicated()

if duplicates.sum() > 0:
    print('> {} duplicates'.format(duplicates.sum()))
    rating_df = rating_df[~duplicates]

print('> {} duplicates'.format(rating_df.duplicated().sum()))

g = rating_df.groupby('user_id')['rating'].count()
top_users = g.dropna().sort_values(ascending=False)[:20]
top_r = rating_df.join(top_users, rsuffix='_r', how='inner', on='user_id')

g = rating_df.groupby('anime_id')['rating'].count()
top_animes = g.dropna().sort_values(ascending=False)[:20]
top_r = top_r.join(top_animes, rsuffix='_r', how='inner', on='anime_id')


# Encoding categorical data
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

print("Num of users: {}, Num of animes: {}".format(n_users, n_animes))
print("Min rating: {}, Max rating: {}".format(min(rating_df['rating']), max(rating_df['rating'])))

# Shuffle
rating_df = rating_df.sample(frac=1, random_state=73)

X = rating_df[['user', 'anime']].values
y = rating_df["rating"]

# Split
test_set_size = 10000 #10k for test set
train_indices = rating_df.shape[0] - test_set_size 

X_train, X_test, y_train, y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

print('> Train set ratings: {}'.format(len(y_train)))
print('> Test set ratings: {}'.format(len(y_test)))

MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

def evaluate_model(model, X_test, y_test):
    """
    Evaluates the trained model on the test set and prints key performance metrics.

    Parameters:
        model: Loaded Keras model
        X_test: List of test input arrays [users, animes]
        y_test: Actual ratings in the test set
    """
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print results
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2}


def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm


> 1 duplicates
> 0 duplicates
Num of users: 91641, Num of animes: 17560
Min rating: 0.0, Max rating: 1.0
> Train set ratings: 71408113
> Test set ratings: 10000


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = './saved_model/my_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Add, Activation, Lambda, BatchNormalization, Concatenate, Dropout, Input, Embedding, Dot, Reshape, Dense, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from wordcloud import WordCloud
from collections import defaultdict
import matplotlib.pyplot as plt
from IPython.display import FileLink
%matplotlib inline

INPUT_DIR = 'E:/anime-recommendation/data'
rating_df = pd.read_csv(INPUT_DIR + '/animelist.csv', 
                            usecols=["user_id", "anime_id", "rating"]
                            #, nrows=90000000
                            )

n_ratings = rating_df['user_id'].value_counts()
rating_df = rating_df[rating_df['user_id'].isin(n_ratings[n_ratings >= 400].index)].copy()
len(rating_df)

# Scaling BTW (0 , 1.0)
min_rating = min(rating_df['rating'])
max_rating = max(rating_df['rating'])
avg_rating = np.mean(rating_df['rating'])
rating_df['rating'] = rating_df["rating"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values.astype(np.float64)

AvgRating = np.mean(rating_df['rating'])

duplicates = rating_df.duplicated()

if duplicates.sum() > 0:
    print('> {} duplicates'.format(duplicates.sum()))
    rating_df = rating_df[~duplicates]

print('> {} duplicates'.format(rating_df.duplicated().sum()))

g = rating_df.groupby('user_id')['rating'].count()
top_users = g.dropna().sort_values(ascending=False)[:20]
top_r = rating_df.join(top_users, rsuffix='_r', how='inner', on='user_id')

g = rating_df.groupby('anime_id')['rating'].count()
top_animes = g.dropna().sort_values(ascending=False)[:20]
top_r = top_r.join(top_animes, rsuffix='_r', how='inner', on='anime_id')

pd.crosstab(top_r.user_id, top_r.anime_id, top_r.rating, aggfunc=np.sum)

# Encoding categorical data
user_ids = rating_df["user_id"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
user_encoded2user = {i: x for i, x in enumerate(user_ids)}
rating_df["user"] = rating_df["user_id"].map(user2user_encoded)
n_users = len(user2user_encoded)

anime_ids = rating_df["anime_id"].unique().tolist()
anime2anime_encoded = {x: i for i, x in enumerate(anime_ids)}
anime_encoded2anime = {i: x for i, x in enumerate(anime_ids)}
rating_df["anime"] = rating_df["anime_id"].map(anime2anime_encoded)
n_animes = len(anime2anime_encoded)

print("Num of users: {}, Num of animes: {}".format(n_users, n_animes))
print("Min rating: {}, Max rating: {}".format(min(rating_df['rating']), max(rating_df['rating'])))

# Shuffle
rating_df = rating_df.sample(frac=1, random_state=73)

X = rating_df[['user', 'anime']].values
y = rating_df["rating"]

# Split
test_set_size = 10000 #10k for test set
train_indices = rating_df.shape[0] - test_set_size 

X_train, X_test, y_train, y_test = (
    X[:train_indices],
    X[train_indices:],
    y[:train_indices],
    y[train_indices:],
)

print('> Train set ratings: {}'.format(len(y_train)))
print('> Test set ratings: {}'.format(len(y_test)))

MODEL_PATH = '../saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights

anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)

def evaluate_model(model, X_test, y_test):
    """
    Evaluates the trained model on the test set and prints key performance metrics.

    Parameters:
        model: Loaded Keras model
        X_test: List of test input arrays [users, animes]
        y_test: Actual ratings in the test set
    """
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print results
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2}


def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm


ParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.

In [6]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Add, Activation, Lambda, BatchNormalization, Concatenate, Dropout, Input, Embedding, Dot, Reshape, Dense, Flatten
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from wordcloud import WordCloud
from collections import defaultdict
import matplotlib.pyplot as plt
from IPython.display import FileLink
%matplotlib inline

In [7]:
rating_df

Unnamed: 0,user_id,anime_id,rating,user,anime
28551215,92529,2299,0.0,23948,11200
86371831,279246,2001,0.9,72463,757
75048666,242991,15417,0.9,62985,1808
107942884,349258,37982,0.7,90552,1774
86707495,280319,3272,0.0,72755,7671
...,...,...,...,...,...
63115062,204556,36649,0.8,52964,3381
108425491,350789,34662,0.9,90972,1789
78226595,253208,34561,0.0,65683,1094
92866082,300007,12879,0.0,77931,1757


In [8]:
user_ids

[2,
 6,
 12,
 16,
 17,
 19,
 21,
 41,
 42,
 44,
 47,
 53,
 55,
 60,
 66,
 73,
 74,
 85,
 89,
 90,
 94,
 98,
 102,
 108,
 111,
 112,
 120,
 121,
 122,
 135,
 145,
 146,
 147,
 153,
 155,
 156,
 172,
 174,
 184,
 190,
 193,
 194,
 198,
 204,
 205,
 209,
 214,
 219,
 222,
 227,
 228,
 235,
 238,
 240,
 243,
 248,
 251,
 252,
 257,
 264,
 267,
 272,
 274,
 275,
 284,
 285,
 286,
 290,
 291,
 293,
 300,
 301,
 306,
 308,
 310,
 313,
 314,
 316,
 320,
 321,
 324,
 325,
 326,
 327,
 330,
 336,
 340,
 345,
 346,
 349,
 350,
 366,
 367,
 371,
 372,
 375,
 381,
 382,
 386,
 389,
 398,
 405,
 406,
 413,
 414,
 418,
 423,
 426,
 428,
 431,
 432,
 436,
 437,
 438,
 440,
 442,
 444,
 445,
 446,
 455,
 457,
 459,
 464,
 467,
 468,
 469,
 478,
 481,
 483,
 484,
 486,
 493,
 498,
 500,
 516,
 517,
 519,
 524,
 526,
 529,
 531,
 538,
 542,
 547,
 549,
 559,
 563,
 564,
 566,
 569,
 571,
 577,
 590,
 593,
 596,
 601,
 603,
 608,
 613,
 614,
 617,
 620,
 629,
 631,
 639,
 642,
 643,
 644,
 649,
 652,
 655

In [9]:
MODEL_PATH = '../saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)



In [10]:
MODEL_PATH = './saved_model/my_model.h5'
model = tf.keras.models.load_model(MODEL_PATH)

FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = './saved_model/my_model.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [11]:
def extract_weights(name, model):
    weight_layer = model.get_layer(name)
    weights = weight_layer.get_weights()[0]
    weights = weights / np.linalg.norm(weights, axis = 1).reshape((-1, 1))
    return weights



In [12]:
anime_weights

NameError: name 'anime_weights' is not defined

In [13]:
anime_weights = extract_weights('anime_embedding', model)
user_weights = extract_weights('user_embedding', model)



In [14]:
anime_weights

array([[-0.07572285, -0.06128556, -0.10664532, ...,  0.02076363,
         0.07852753, -0.12656632],
       [ 0.17172639,  0.09843872,  0.08381804, ..., -0.06545132,
         0.01367777, -0.01569617],
       [ 0.06262159,  0.04926249,  0.13620275, ..., -0.01060472,
         0.11352816, -0.01517436],
       ...,
       [-0.04081078, -0.13276866,  0.03978901, ...,  0.11427706,
        -0.11117698, -0.13471437],
       [ 0.11241616, -0.06669106, -0.13638647, ...,  0.06394409,
         0.10491727, -0.07412891],
       [-0.07345021, -0.03141955,  0.01581988, ...,  0.14019403,
        -0.11126844, -0.10670616]], dtype=float32)

In [15]:

df = pd.read_csv(INPUT_DIR + '/anime.csv', low_memory=True)
df = df.replace("Unknown", np.nan)



In [16]:
def evaluate_model(model, X_test, y_test):
    """
    Evaluates the trained model on the test set and prints key performance metrics.

    Parameters:
        model: Loaded Keras model
        X_test: List of test input arrays [users, animes]
        y_test: Actual ratings in the test set
    """
    # Get predictions
    y_pred = model.predict(X_test)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    
    # Print results
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

    return {"MAE": mae, "MSE": mse, "RMSE": rmse, "R2": r2}


def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm


In [17]:
conf_matrix = evaluate_recommendations('105315')

NameError: name 'get_recommendations' is not defined

In [18]:

def get_recommendations(user_id, n=500):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:int(n)]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)


In [19]:
def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set(get_recommendations(user_id))

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm

In [20]:
conf_matrix = evaluate_recommendations('105315')

'anime_id'


TypeError: 'NoneType' object is not iterable

In [21]:
import seaborn as sns


In [22]:

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

NameError: name 'conf_matrix' is not defined

In [23]:
get_recommendations(105315)

'anime_id'


In [24]:
anime_id

NameError: name 'anime_id' is not defined

In [25]:
get_recommendations(105315,500)

'anime_id'


In [26]:

def get_recommendations(user_id, n=500):
    try:
        user_id = np.int64(user_id)
        animes_watched_by_user = rating_df[rating_df.user_id==user_id]
        anime_not_watched_df = df[
            ~df["anime_id"].isin(animes_watched_by_user.anime_id.values)
        ]
        
        anime_not_watched = list(
            set(anime_not_watched_df['anime_id']).intersection(set(anime2anime_encoded.keys()))
        )

        anime_not_watched = [[anime2anime_encoded.get(x)] for x in anime_not_watched]

        user_encoder = user2user_encoded.get(user_id)

        user_anime_array = np.hstack(
            ([[user_encoder]] * len(anime_not_watched), anime_not_watched)
        )

        user_anime_array = [user_anime_array[:, 0], user_anime_array[:, 1]]
        ratings = model.predict(user_anime_array).flatten()

        top_ratings_indices = (-ratings).argsort()[:int(n)]

        recommended_anime_ids = [
            anime_encoded2anime.get(anime_not_watched[x][0]) for x in top_ratings_indices
        ]
        top_rated_ids = []

        for index, anime_id in enumerate(anime_not_watched):
            id_ = anime_encoded2anime.get(anime_id[0])
            
            if id_ in recommended_anime_ids:
                top_rated_ids.append(id_)
        return top_rated_ids
    
    except Exception as e:
        print(e)


In [27]:
get_recommendations(105315)

'anime_id'


In [28]:
type(get_recommendations(105315))

'anime_id'


NoneType

In [29]:
type(get_recommendations('105315'))

'anime_id'


NoneType

In [30]:
model

<Functional name=functional, built=True>

In [35]:
def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set([
    32775,
    32997,
    442,
    459,
    460,
    461,
    462,
    463,
    464,
    465,
    466,
    531,
    594,
    33499,
    761,
    762,
    779,
    780,
    781,
    33606,
    936,
    950,
    1074,
    1094,
    1118,
    1120,
    1278,
    1358,
    1363,
    1364,
    1365,
    1366,
    1367,
    1368,
    1369,
    32853,
    1505,
    1506,
    1686,
    32902,
    1894,
    2006,
    2020,
    2107,
    2144,
    2171,
    2248,
    2385,
    2386,
    2397,
    2472,
    2490,
    2512,
    2513,
    2514,
    2515,
    2597,
    2680,
    2729,
    2847,
    2889,
    33142,
    2937,
    2951,
    35752,
    35806,
    33187,
    36043,
    36125,
    36215,
    36286,
    3738,
    3782,
    3783,
    3848,
    33338,
    36744,
    4134,
    4138,
    4155,
    4280,
    4282,
    4383,
    4418,
    4437,
    4447,
    37341,
    37370,
    37507,
    4835,
    37596,
    4890,
    32760,
    37814,
    37873,
    5117,
    37893,
    5168,
    37954,
    5204,
    5205,
    38002,
    5252,
    5298,
    5299,
    38106,
    5348,
    5397,
    38163,
    5458,
    5460,
    38234,
    38414,
    5661,
    33692,
    5799,
    5962,
    38732,
    38733,
    38770,
    38793,
    32806,
    6115,
    38935,
    6198,
    6231,
    33811,
    32813,
    6325,
    39093,
    32816,
    32817,
    6421,
    39195,
    6438,
    39213,
    6460,
    6462,
    6467,
    6533,
    6566,
    39360,
    6609,
    33885,
    6692,
    39470,
    33910,
    6793,
    39568,
    39576,
    6831,
    6862,
    33929,
    6878,
    33934,
    39689,
    6945,
    33949,
    33950,
    39815,
    39836,
    7164,
    40031,
    7295,
    40083,
    7367,
    40176,
    7472,
    40262,
    40286,
    40313,
    40337,
    40394,
    40417,
    40591,
    40690,
    40776,
    40807,
    40815,
    40827,
    40841,
    34178,
    40936,
    8187,
    8245,
    8246,
    41011,
    41053,
    41061,
    8331,
    8358,
    41132,
    8408,
    8410,
    41271,
    8609,
    8646,
    8718,
    8740,
    41531,
    41542,
    41555,
    32915,
    41673,
    8964,
    9032,
    41945,
    32928,
    32930,
    40914,
    42057,
    9366,
    34430,
    9515,
    32943,
    34467,
    32946,
    34480,
    42488,
    9733,
    42507,
    34503,
    34514,
    42600,
    42603,
    42629,
    9910,
    9963,
    9982,
    9999,
    10075,
    10083,
    42984,
    42996,
    10302,
    34614,
    34615,
    10370,
    10445,
    10471,
    10513,
    10531,
    10589,
    10659,
    10686,
    10703,
    10720,
    10766,
    10810,
    34723,
    43704,
    34742,
    10999,
    34766,
    11113,
    34777,
    44059,
    34855,
    11553,
    11679,
    11687,
    11689,
    11691,
    11701,
    34904,
    11795,
    11869,
    12069,
    12117,
    34982,
    45053,
    33069,
    35110,
    35111,
    45598,
    12859,
    12979,
    13053,
    13153,
    13231,
    13261,
    39605,
    13403,
    35229,
    35262,
    13667,
    13795,
    13837,
    13839,
    13857,
    14027,
    35370,
    14123,
    14189,
    35386,
    35403,
    14317,
    47307,
    35459,
    14735,
    14837,
    15021,
    15323,
    15417,
    15423,
    15525,
    39705,
    35714,
    16239,
    16331,
    16347,
    16444,
    16468,
    35840,
    16560,
    16576,
    16604,
    16610,
    16870,
    16894,
    16916,
    17010,
    17068,
    17259,
    36010,
    17341,
    17391,
    17535,
    36071,
    17699,
    17717,
    17743,
    36104,
    18177,
    18397,
    18425,
    18429,
    18441,
    18661,
    36289,
    18795,
    33323,
    19123,
    19137,
    19285,
    19511,
    19669,
    20021,
    20039,
    20449,
    20463,
    20613,
    20743,
    20767,
    20801,
    20811,
    20871,
    20945,
    20977,
    21065,
    21201,
    21419,
    21479,
    21571,
    21659,
    21667,
    21701,
    21751,
    21875,
    22049,
    22059,
    22335,
    22537,
    22567,
    22661,
    22677,
    39258,
    22961,
    22983,
    22985,
    23247,
    23299,
    23301,
    23441,
    23735,
    23775,
    23777,
    24277,
    24365,
    24415,
    24429,
    24591,
    24751,
    24789,
    24823,
    24991,
    25161,
    25303,
    25313,
    25389,
    25647,
    25781,
    25805,
    25861,
    25975,
    26013,
    26359,
    27891,
    27957,
    28069,
    28257,
    28285,
    28431,
    28479,
    28683,
    28715,
    28755,
    28789,
    28843,
    28861,
    29027,
    29101,
    29103,
    29105,
    38412,
    29585,
    29731,
    29755,
    29757,
    29778,
    29946,
    30165,
    30167,
    30191,
    30206,
    30234,
    30289,
    30347,
    30364,
    30370,
    30404,
    30405,
    30458,
    30514,
    30714,
    30771,
    30825,
    30885,
    30907,
    30915,
    31014,
    31051,
    31098,
    31115,
    31138,
    31289,
    31297,
    31327,
    31483,
    31490,
    31551,
    31553,
    31561,
    31608,
    31665,
    31668,
    31683,
    31704,
    31733,
    31736,
    31757,
    31758,
    31772,
    31783,
    38883,
    31873,
    31908,
    31909,
    31972,
    31994,
    32005,
    32051,
    32122,
    32228,
    32264,
    32282,
    32311,
    32346,
    32365,
    32446,
    32473,
    39035,
    32561,
    32587,
    32600,
    32695,
    32698,
    32728,
    32759,
    32764
])

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm

In [32]:
evaluate_recommendations(105315)

NameError: name 'confusion_matrix' is not defined

In [33]:
from sklearn.metrics import confusion_matrix, classification_report

In [34]:
evaluate_recommendations(105315)

Confusion Matrix:
 [[ 0 10]
 [ 0  0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00      10.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00      10.0
   macro avg       0.00      0.00      0.00      10.0
weighted avg       0.00      0.00      0.00      10.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([[ 0, 10],
       [ 0,  0]])

In [36]:
evaluate_recommendations(105315)

Confusion Matrix:
 [[  0 500]
 [  0   0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     500.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00     500.0
   macro avg       0.00      0.00      0.00     500.0
weighted avg       0.00      0.00      0.00     500.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([[  0, 500],
       [  0,   0]])

In [39]:
from sklearn.metrics import confusion_matrix, classification_report

def evaluate_recommendations(user_id):
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])
    predicted_animes = set(get_recommendations(user_id) or [])  # Ensures a valid set

    all_anime = true_animes.union(predicted_animes)  # Combine all anime IDs
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]

    if not y_true:  # If there's no data, avoid further computation
        print(f"No recommendations or relevant data for user {user_id}")
        return None

    cm = confusion_matrix(y_true, y_pred)
    print("Confusion Matrix:\n", cm)
    print(true_animes)
    # 🛠️ FIX: Handle undefined metrics by setting `zero_division=0`
    print("\nClassification Report:\n", classification_report(y_true, y_pred, zero_division=0))
    
    return cm

# Example usage
user_id = 105315
evaluate_recommendations(user_id)


'anime_id'
No recommendations or relevant data for user 105315


In [40]:
def evaluate_recommendations(user_id):
    # True labels: Anime the user actually watched and rated highly (e.g., 8+ rating)
    true_animes = set(rating_df[(rating_df.user_id == user_id) & (rating_df.rating >= 8)]['anime_id'])

    # Predicted labels: Anime recommended by the model
    predicted_animes = set([
    32775,
    32997,
    442,
    459,
    460,
    461,
    462,
    463,
    464,
    465,
    466,
    531,
    594,
    33499,
    761,
    762,
    779,
    780,
    781,
    33606,
    936,
    950,
    1074,
    1094,
    1118,
    1120,
    1278,
    1358,
    1363,
    1364,
    1365,
    1366,
    1367,
    1368,
    1369,
    32853,
    1505,
    1506,
    1686,
    32902,
    1894,
    2006,
    2020,
    2107,
    2144,
    2171,
    2248,
    2385,
    2386,
    2397,
    2472,
    2490,
    2512,
    2513,
    2514,
    2515,
    2597,
    2680,
    2729,
    2847,
    2889,
    33142,
    2937,
    2951,
    35752,
    35806,
    33187,
    36043,
    36125,
    36215,
    36286,
    3738,
    3782,
    3783,
    3848,
    33338,
    36744,
    4134,
    4138,
    4155,
    4280,
    4282,
    4383,
    4418,
    4437,
    4447,
    37341,
    37370,
    37507,
    4835,
    37596,
    4890,
    32760,
    37814,
    37873,
    5117,
    37893,
    5168,
    37954,
    5204,
    5205,
    38002,
    5252,
    5298,
    5299,
    38106,
    5348,
    5397,
    38163,
    5458,
    5460,
    38234,
    38414,
    5661,
    33692,
    5799,
    5962,
    38732,
    38733,
    38770,
    38793,
    32806,
    6115,
    38935,
    6198,
    6231,
    33811,
    32813,
    6325,
    39093,
    32816,
    32817,
    6421,
    39195,
    6438,
    39213,
    6460,
    6462,
    6467,
    6533,
    6566,
    39360,
    6609,
    33885,
    6692,
    39470,
    33910,
    6793,
    39568,
    39576,
    6831,
    6862,
    33929,
    6878,
    33934,
    39689,
    6945,
    33949,
    33950,
    39815,
    39836,
    7164,
    40031,
    7295,
    40083,
    7367,
    40176,
    7472,
    40262,
    40286,
    40313,
    40337,
    40394,
    40417,
    40591,
    40690,
    40776,
    40807,
    40815,
    40827,
    40841,
    34178,
    40936,
    8187,
    8245,
    8246,
    41011,
    41053,
    41061,
    8331,
    8358,
    41132,
    8408,
    8410,
    41271,
    8609,
    8646,
    8718,
    8740,
    41531,
    41542,
    41555,
    32915,
    41673,
    8964,
    9032,
    41945,
    32928,
    32930,
    40914,
    42057,
    9366,
    34430,
    9515,
    32943,
    34467,
    32946,
    34480,
    42488,
    9733,
    42507,
    34503,
    34514,
    42600,
    42603,
    42629,
    9910,
    9963,
    9982,
    9999,
    10075,
    10083,
    42984,
    42996,
    10302,
    34614,
    34615,
    10370,
    10445,
    10471,
    10513,
    10531,
    10589,
    10659,
    10686,
    10703,
    10720,
    10766,
    10810,
    34723,
    43704,
    34742,
    10999,
    34766,
    11113,
    34777,
    44059,
    34855,
    11553,
    11679,
    11687,
    11689,
    11691,
    11701,
    34904,
    11795,
    11869,
    12069,
    12117,
    34982,
    45053,
    33069,
    35110,
    35111,
    45598,
    12859,
    12979,
    13053,
    13153,
    13231,
    13261,
    39605,
    13403,
    35229,
    35262,
    13667,
    13795,
    13837,
    13839,
    13857,
    14027,
    35370,
    14123,
    14189,
    35386,
    35403,
    14317,
    47307,
    35459,
    14735,
    14837,
    15021,
    15323,
    15417,
    15423,
    15525,
    39705,
    35714,
    16239,
    16331,
    16347,
    16444,
    16468,
    35840,
    16560,
    16576,
    16604,
    16610,
    16870,
    16894,
    16916,
    17010,
    17068,
    17259,
    36010,
    17341,
    17391,
    17535,
    36071,
    17699,
    17717,
    17743,
    36104,
    18177,
    18397,
    18425,
    18429,
    18441,
    18661,
    36289,
    18795,
    33323,
    19123,
    19137,
    19285,
    19511,
    19669,
    20021,
    20039,
    20449,
    20463,
    20613,
    20743,
    20767,
    20801,
    20811,
    20871,
    20945,
    20977,
    21065,
    21201,
    21419,
    21479,
    21571,
    21659,
    21667,
    21701,
    21751,
    21875,
    22049,
    22059,
    22335,
    22537,
    22567,
    22661,
    22677,
    39258,
    22961,
    22983,
    22985,
    23247,
    23299,
    23301,
    23441,
    23735,
    23775,
    23777,
    24277,
    24365,
    24415,
    24429,
    24591,
    24751,
    24789,
    24823,
    24991,
    25161,
    25303,
    25313,
    25389,
    25647,
    25781,
    25805,
    25861,
    25975,
    26013,
    26359,
    27891,
    27957,
    28069,
    28257,
    28285,
    28431,
    28479,
    28683,
    28715,
    28755,
    28789,
    28843,
    28861,
    29027,
    29101,
    29103,
    29105,
    38412,
    29585,
    29731,
    29755,
    29757,
    29778,
    29946,
    30165,
    30167,
    30191,
    30206,
    30234,
    30289,
    30347,
    30364,
    30370,
    30404,
    30405,
    30458,
    30514,
    30714,
    30771,
    30825,
    30885,
    30907,
    30915,
    31014,
    31051,
    31098,
    31115,
    31138,
    31289,
    31297,
    31327,
    31483,
    31490,
    31551,
    31553,
    31561,
    31608,
    31665,
    31668,
    31683,
    31704,
    31733,
    31736,
    31757,
    31758,
    31772,
    31783,
    38883,
    31873,
    31908,
    31909,
    31972,
    31994,
    32005,
    32051,
    32122,
    32228,
    32264,
    32282,
    32311,
    32346,
    32365,
    32446,
    32473,
    39035,
    32561,
    32587,
    32600,
    32695,
    32698,
    32728,
    32759,
    32764
])

    # Create labels (1 for liked, 0 for not liked)
    all_anime = set(true_animes).union(set(predicted_animes))  # Combine both sets
    y_true = [1 if anime in true_animes else 0 for anime in all_anime]  # Ground truth
    y_pred = [1 if anime in predicted_animes else 0 for anime in all_anime]  # Model predictions

    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)

    # Print results
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

    return cm

In [41]:
user_id = 105315
evaluate_recommendations(user_id)

Confusion Matrix:
 [[  0 500]
 [  0   0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00     500.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00     500.0
   macro avg       0.00      0.00      0.00     500.0
weighted avg       0.00      0.00      0.00     500.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


array([[  0, 500],
       [  0,   0]])