In [13]:
import pandas as pd
import numpy as np
import scipy.sparse as sparse
import random
import math
from collections import Counter

In [7]:
df_train = pd.read_csv('dataset/train.csv',
                         sep=',',
                         names = ['user_id','item_id','rating'],
                         header=0)

df_items = pd.read_csv('dataset/anime.csv',
                        sep=',',
                        names = ['anime_id','name','genre','type','episodes','rating','members'],
                        header=0)

df_test = pd.read_csv('dataset/validation.csv',
                        sep=',',
                        names = ['user_id','item_id','rating'],
                        header=0)

df_train.head()

Unnamed: 0,user_id,item_id,rating
0,40748,9926,-1
1,35757,79,10
2,18266,51,-1
3,31006,8795,7
4,68084,14837,8


In [8]:
unique_user_ids = df_train['user_id'].unique()
unique_item_ids = df_train['item_id'].unique()

user_id_map = {uid: idx for idx, uid in enumerate(unique_user_ids)}
item_id_map = {iid: idx for idx, iid in enumerate(unique_item_ids)}
reverse_item_id_map = {idx: iid for iid, idx in item_id_map.items()} 

rows = []
cols = []
data = []

for row in df_train.itertuples(index=False):
    user_idx = user_id_map[row.user_id]
    item_idx = item_id_map[row.item_id]

    rows.append(user_idx)
    cols.append(item_idx)
    data.append(1)

user_item_matrix = sparse.csr_matrix((data, (rows, cols)), shape=(len(user_id_map), len(item_id_map)))

In [9]:
def recommend_random(user_idx, user_items, N=10):
    seen_items = set(user_items.indices)
    all_items = set(range(user_item_matrix.shape[1]))
    unseen_items = list(all_items - seen_items)

    if len(unseen_items) < N:
        N = len(unseen_items)

    recommended_items = random.sample(unseen_items, N)
    return recommended_items

def show_recommendations_random(user_id, n):
    user_idx = user_id_map[user_id]
    user_items = user_item_matrix[user_idx]
    rec = recommend_random(user_idx, user_items, N=n)
    return df_items[df_items['anime_id'].isin([reverse_item_id_map[i] for i in rec])]['name']

In [10]:
print(show_recommendations_random(user_id=20881, n=10))


1514                               Slayers Evolution-R
1524     Detective Conan Bonus File: Fantasista Flower
1953                                  The Sky Crawlers
2443                                 Fushigi Yuugi OVA
2830                                    Buki yo Saraba
2941                            Deadman Wonderland OVA
3043                             Arslan Senki (TV) OVA
5566                                       Sekkou Boys
11369                                           Garden
12272                                        Nudl Nude
Name: name, dtype: object


In [11]:
user_items_test = df_test.groupby('user_id')['item_id'].apply(list).to_dict()
recommendations_random = {}

for user_id in user_items_test.keys():
    if user_id in user_id_map:
        user_idx = user_id_map[user_id]
        user_items = user_item_matrix[user_idx]
        recs = recommend_random(user_idx, user_items, N=10)
        recommendations_random[user_id] = [reverse_item_id_map[i] for i in recs]

def evaluate_model(recommendations, ground_truth, n=10):
    
    map_scores = []
    ndcg_scores = []
    recall_scores = []

    for user, rec_items in recommendations.items():
        if user not in ground_truth:
            continue

        relevant_items = set(ground_truth[user])
        hits = [1 if item in relevant_items else 0 for item in rec_items]

        # MAP
        if any(hits):
            precisions = [np.mean(hits[:i + 1]) for i in range(len(hits)) if hits[i] == 1]
            map_score = np.mean(precisions) if precisions else 0
        else:
            map_score = 0

        # NDCG
        dcg = sum([hit / math.log2(idx + 2) for idx, hit in enumerate(hits)])
        idcg = sum([1.0 / math.log2(i + 2) for i in range(min(len(relevant_items), n))])
        ndcg = dcg / idcg if idcg > 0 else 0

        # Recall
        recall = sum(hits) / len(relevant_items) if relevant_items else 0

        map_scores.append(map_score)
        ndcg_scores.append(ndcg)
        recall_scores.append(recall)

    return np.mean(map_scores), np.mean(ndcg_scores), np.mean(recall_scores)

def diversity(recommendations):
    all_recs = [item for recs in recommendations.values() for item in recs]
    unique_items = set(all_recs)
    total_recs = len(all_recs)
    return len(unique_items) / total_recs if total_recs > 0 else 0

def calculate_novelty(recommendations, train_df=df_train):
    item_counts = Counter(train_df['item_id'])
    total_interactions = sum(item_counts.values())

    novelty_scores = []
    for user, rec_items in recommendations.items():
        for item in rec_items:
            count = item_counts.get(item, 1)
            prob = count / total_interactions
            novelty = -math.log(prob)
            novelty_scores.append(novelty)

    return np.mean(novelty_scores) if novelty_scores else 0


In [None]:
map_random, ndcg_random, recall_random = evaluate_model(recommendations_random, user_items_test, n=10)
div_random = diversity(recommendations_random)
nov_random = calculate_novelty(recommendations_random)

print(f"MAP@10: {map_random:.4f}")
print(f"NDCG@10: {ndcg_random:.4f}")
print(f"Recall@10: {recall_random:.4f}")
print(f"Diversity: {div_random:.4f}")
print(f"Novelty: {nov_random:.4f}")

MAP@10: 0.0012
NDCG@10: 0.0018
Recall@10: 0.0041
Diversity: 0.1903
Novelty: 9.0035
