In [13]:
import pandas as pd
from collections import defaultdict
import numpy as np
import surprise
from surprise import accuracy
from surprise.model_selection import PredefinedKFold

In [2]:
# WARNING: Only run once or data will be lost

# Instance datasets
train_file = pd.read_csv('dataset/train.csv', sep=',', header=0)
test_file = pd.read_csv('dataset/validation.csv', sep=',', header=0)
anime_file = pd.read_csv('dataset/anime.csv', sep=',', header=0)

train_file.to_csv("dataset/train.csv", index=False, header=False)  # Remove 1st row from CSV
test_file.to_csv("dataset/validation.csv", index=False, header=False)
anime_file.to_csv("dataset/anime.csv", index=False, header=False)

train_file = pd.read_csv('dataset/train.csv', names = ['user_id','item_id','rating'] ,sep=',', header=None) 
test_file = pd.read_csv('dataset/validation.csv', names = ['user_id','item_id','rating'], sep=',', header=None)
anime_file = pd.read_csv('dataset/anime.csv', names = ['anime_id','name','genre','type','episodes','rating','members'], sep=',', header=None)

train_file.head()

Unnamed: 0,user_id,item_id,rating
0,40748,9926,-1
1,35757,79,10
2,18266,51,-1
3,31006,8795,7
4,68084,14837,8


In [3]:
# Convert data 
reader = surprise.Reader(line_format='user item rating', sep=',', rating_scale=(1,10))
data = surprise.Dataset.load_from_folds([("dataset/train.csv", "dataset/validation.csv")], reader=reader)
pkf = PredefinedKFold()
trainset, testset = next(pkf.split(data))
a_testset = trainset.build_anti_testset()

In [4]:
myItemKnn = surprise.KNNBasic(k=7, sim_options={'name': 'pearson', 'user_based': False})
myItemKnn.fit(trainset)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x124b47b50>

In [5]:
predictions = myItemKnn.test(a_testset)

In [6]:
RMSE_VALUE = accuracy.rmse(predictions)
print(RMSE_VALUE)

RMSE: 0.0162
0.016166101738789324


Lets check in depth these results

In [7]:
print(predictions[0:5])

[Prediction(uid='40748', iid='79', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='51', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='8795', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='14837', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='1536', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'})]


In [8]:
valid_predictions = [pred for pred in predictions if not pred.details['was_impossible']]
print(valid_predictions[0:5])

[Prediction(uid='31006', iid='1239', r_ui=6.165656934306569, est=6.0, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='46414', iid='20159', r_ui=6.165656934306569, est=5.0, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='67409', iid='8675', r_ui=6.165656934306569, est=7.0, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='32545', iid='31737', r_ui=6.165656934306569, est=7.0, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='70120', iid='27899', r_ui=6.165656934306569, est=7.0, details={'actual_k': 1, 'was_impossible': False})]


In [9]:
print(f"Valid predictions: {len(valid_predictions)}")
print(f"All predictions: {len(predictions)}")
print(f"rate: {len(valid_predictions) / len(predictions)}")

Valid predictions: 2086
All predictions: 78455796
rate: 2.6588220454738615e-05


We see that the situation with UserKNN happened again.

Lets now analize a top 10 generated list via predictions.

In [10]:
def get_top_n(predictions, n=10):
    """Devuelve las N-mejores recomendaciones para cada usuario de un set de predicción.

    Args:
        predictions(lista de objetos Prediction): La lista de predicción obtenida del método test.
        n(int): El número de recomendaciónes por usuario

    Returns:
    Un diccionario donde las llaves son ids de usuario y los valores son listas de tuplas:
        [(item id, rating estimation), ...] de tamaño n.
    """

    # First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# Predict top 10 ratings of user 31006
top_n = get_top_n(predictions, n=10)
print(top_n["31006"])

[('9926', 6.165656934306569), ('79', 6.165656934306569), ('51', 6.165656934306569), ('14837', 6.165656934306569), ('1536', 6.165656934306569), ('1241', 6.165656934306569), ('8668', 6.165656934306569), ('6325', 6.165656934306569), ('1887', 6.165656934306569), ('258', 6.165656934306569)]


In [15]:
def apk(actual, predicted, k=10):
    if not actual:
        return 0.0
    predicted = predicted[:k]
    score = 0.0
    hits = 0.0
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            hits += 1.0
            score += hits / (i + 1.0)
    return score / min(len(actual), k)

def mapk(actual_dict, predicted_dict, k=10):
    return np.mean([apk(actual_dict[u], [iid for iid, _ in predicted_dict[u]], k) for u in predicted_dict if u in actual_dict])

def ndcg_at_k(actual_dict, predicted_dict, k=10):
    def dcg(relevance_scores):
        return sum([rel / np.log2(idx + 2) for idx, rel in enumerate(relevance_scores)])

    scores = []
    for u in predicted_dict:
        if u not in actual_dict:
            continue
        pred_items = [iid for iid, _ in predicted_dict[u][:k]]
        actual_items = actual_dict[u]
        relevance = [1 if iid in actual_items else 0 for iid in pred_items]
        ideal_relevance = sorted(relevance, reverse=True)
        if dcg(ideal_relevance) == 0:
            scores.append(0.0)
        else:
            scores.append(dcg(relevance) / dcg(ideal_relevance))
    return np.mean(scores)

def recall_at_k(actual_dict, predicted_dict, k=10):
    recalls = []
    for u in predicted_dict:
        if u not in actual_dict:
            continue
        pred_items = [iid for iid, _ in predicted_dict[u][:k]]
        actual_items = actual_dict[u]
        if not actual_items:
            continue
        hit_count = len(set(pred_items) & actual_items)
        recalls.append(hit_count / len(actual_items))
    return np.mean(recalls)

def diversity(top_n, sim_matrix):
    diversities = []
    for user, recs in top_n.items():
        sims = []
        items = [iid for iid, _ in recs]
        for i in range(len(items)):
            for j in range(i+1, len(items)):
                sims.append(sim_matrix[items[i]].get(items[j], 0))
        if sims:
            diversities.append(1 - np.mean(sims))
    return np.mean(diversities)

def calculate_novelty(top_n, item_popularity, total_items):
    novelty_scores = []
    for user, recs in top_n.items():
        for iid, _ in recs:
            freq = item_popularity.get(iid, 1)
            novelty_scores.append(np.log2(total_items / freq))  
    return np.mean(novelty_scores)

true_items = defaultdict(set)
for uid, iid, true_r in testset:
    true_items[uid].add(iid)

total_items = train_file['item_id'].nunique()

item_popularity = defaultdict(int)
for _, iid, _ in trainset.all_ratings():
    raw_iid = trainset.to_raw_iid(iid)
    item_popularity[raw_iid] += 1

sim_matrix = defaultdict(dict)
for inner_iid1 in range(len(myItemKnn.sim)):
    raw_iid1 = trainset.to_raw_iid(inner_iid1)
    for inner_iid2 in range(len(myItemKnn.sim[inner_iid1])):
        raw_iid2 = trainset.to_raw_iid(inner_iid2)
        sim_matrix[raw_iid1][raw_iid2] = myItemKnn.sim[inner_iid1][inner_iid2]

In [17]:
map_score = mapk(true_items, top_n, k=10)
ndcg_score = ndcg_at_k(true_items, top_n, k=10)
recall_score = recall_at_k(true_items, top_n, k=10)
div_score = diversity(top_n, sim_matrix)
nov_score = calculate_novelty(top_n, item_popularity, total_items)

print(f"MAP@10: {map_score:.4f}")
print(f"NDCG@10: {ndcg_score:.4f}")
print(f"Recall@10: {recall_score:.4f}")
print(f"Diversity: {div_score:.4f}")
print(f"Novelty: {nov_score:.4f}")

MAP@10: 0.0010
NDCG@10: 0.0019
Recall@10: 0.0037
Diversity: 1.0000
Novelty: 8.4038


Lets try to modify the parameters to have more valid predictions.

In [18]:
myItemKnn = surprise.KNNBasic(k=5, sim_options={'name': 'cosine', 'user_based': False})
myItemKnn.fit(trainset)
predictions = myItemKnn.test(a_testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [19]:
print(predictions[0:5])

[Prediction(uid='40748', iid='79', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='51', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='8795', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='14837', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'}), Prediction(uid='40748', iid='1536', r_ui=6.165656934306569, est=6.165656934306569, details={'was_impossible': True, 'reason': 'Not enough neighbors.'})]


In [20]:
valid_predictions = [pred for pred in predictions if not pred.details['was_impossible']]
print(valid_predictions[0:5])

[Prediction(uid='40748', iid='2904', r_ui=6.165656934306569, est=1, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='40748', iid='10702', r_ui=6.165656934306569, est=1, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='35757', iid='6045', r_ui=6.165656934306569, est=10, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='35757', iid='3652', r_ui=6.165656934306569, est=10, details={'actual_k': 1, 'was_impossible': False}), Prediction(uid='35757', iid='6746', r_ui=6.165656934306569, est=10, details={'actual_k': 1, 'was_impossible': False})]


In [21]:
print(f"Valid predictions: {len(valid_predictions)}")
print(f"All predictions: {len(predictions)}")
print(f"rate: {len(valid_predictions) / len(predictions)}")

Valid predictions: 373449
All predictions: 78455796
rate: 0.004759992493097642


In [22]:
RMSE_VALUE = accuracy.rmse(predictions)
print(RMSE_VALUE)

RMSE: 0.2124
0.21242250627319215


In [23]:
# Predict top 10 ratings of user 31006
top_n = get_top_n(predictions, n=10)
print(top_n["31006"])

[('986', 7.0), ('1303', 7.0), ('849', 7.0), ('3702', 7.0), ('433', 7.0), ('3791', 7.0), ('16355', 7.0), ('5204', 7.0), ('9926', 6.165656934306569), ('79', 6.165656934306569)]


In [24]:
map_score = mapk(true_items, top_n, k=10)
ndcg_score = ndcg_at_k(true_items, top_n, k=10)
recall_score = recall_at_k(true_items, top_n, k=10)
div_score = diversity(top_n, sim_matrix)
nov_score = calculate_novelty(top_n, item_popularity, total_items)

print(f"MAP@10: {map_score:.4f}")
print(f"NDCG@10: {ndcg_score:.4f}")
print(f"Recall@10: {recall_score:.4f}")
print(f"Diversity: {div_score:.4f}")
print(f"Novelty: {nov_score:.4f}")

MAP@10: 0.0014
NDCG@10: 0.0027
Recall@10: 0.0052
Diversity: 0.9995
Novelty: 7.9690
