In [1]:
import pandas as pd
from surprise import KNNWithMeans, Dataset, Reader
from surprise.model_selection import cross_validate, KFold
from surprise import accuracy
from collections import defaultdict
import math

# Function to get movie names
def get_movie_names():
    movie_names = {}
    with open('ml-100k/u.item', encoding='latin-1') as f:
        for line in f:
            parts = line.split('|')
            movie_id = int(parts[0])
            movie_name = parts[1]
            movie_names[movie_id] = movie_name
    return movie_names

# Function to get Top-N recommendations
def get_top_n(predictions, n=10):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

# NDCG calculation class
class NDCGAtK:
    def __init__(self, k=10):
        self.k = k

    def compute_ndcg(self, predictions, top_n):
        dcg = 0
        idcg = 0
        for i, (_, est) in enumerate(top_n[:self.k]):
            rank = i + 1
            dcg += (2 ** est - 1) / math.log(rank + 1, 2)
            idcg += (2 ** (self.k - rank) - 1) / math.log(rank + 1, 2)
        return dcg / idcg if idcg > 0 else 0

# Loading dataset and evaluation
data = Dataset.load_builtin('ml-100k')
kf = KFold(n_splits=5)
movie_names = get_movie_names()

# Lists to store results
mae_scores = []
precisions = []
recalls = []
ndcg_scores = []

# 5-Fold Cross-Validation
for trainset, testset in kf.split(data):
    algo = KNNWithMeans(k=50, sim_options={'name': 'pearson', 'user_based': True})
    algo.fit(trainset)
    predictions = algo.test(testset)
    
    # Calculate MAE
    mae = accuracy.mae(predictions, verbose=True)
    mae_scores.append(mae)
    
    # Compute Top-10 recommendations
    top_n = get_top_n(predictions, n=10)
    
    # Calculate Precision@10 and Recall@10
    precision = sum([1 for uid, user_ratings in top_n.items() if any([true_r >= 4.0 for (_, true_r) in user_ratings])]) / len(top_n)
    recall = precision  # Precision and Recall@10 are calculated the same way
    precisions.append(precision)
    recalls.append(recall)
    
    # Calculate NDCG@10
    ndcg = NDCGAtK(k=10).compute_ndcg(predictions, top_n)
    ndcg_scores.append(ndcg)

# Calculate average results
avg_mae = sum(mae_scores) / len(mae_scores)
avg_precision = sum(precisions) / len(precisions)
avg_recall = sum(recalls) / len(recalls)
avg_ndcg = sum(ndcg_scores) / len(ndcg_scores)

# Display results in table format
print("\nResults Table:")
print("| MAE | Precision@10 | Recall@10 | NDCG@10 |")
for i in range(len(mae_scores)):
    print(f"| {mae_scores[i]:.3f} | {precisions[i]:.3f} | {recalls[i]:.3f} | {ndcg_scores[i]:.3f} |")
print(f"| {avg_mae:.3f} | {avg_precision:.3f} | {avg_recall:.3f} | {avg_ndcg:.3f} |\n")

# Display recommendations
top_n_recommendations = get_top_n(predictions, n=10)
print("Top-10 Recommendations for Sample Users:")
for uid, user_ratings in list(top_n_recommendations.items())[:5]:  # Show the first 5 users
    print(f"Recommendations for User {uid}:")
    for iid, est in user_ratings:
        print(f"  Movie: {movie_names[int(iid)]} (Movie ID: {iid}), Estimated Rating: {est:.2f}")
    print("\n")


Computing the pearson similarity matrix...
Done computing similarity matrix.
MAE:  0.7406
Computing the pearson similarity matrix...
Done computing similarity matrix.
MAE:  0.7436
Computing the pearson similarity matrix...
Done computing similarity matrix.
MAE:  0.7342
Computing the pearson similarity matrix...
Done computing similarity matrix.
MAE:  0.7493
Computing the pearson similarity matrix...
Done computing similarity matrix.
MAE:  0.7490

Results Table:
| MAE | Precision@10 | Recall@10 | NDCG@10 |
| 0.741 | 0.709 | 0.709 | 0.000 |
| 0.744 | 0.703 | 0.703 | 0.000 |
| 0.734 | 0.728 | 0.728 | 0.000 |
| 0.749 | 0.739 | 0.739 | 0.000 |
| 0.749 | 0.688 | 0.688 | 0.000 |
| 0.743 | 0.713 | 0.713 | 0.000 |

Top-10 Recommendations for Sample Users:
Recommendations for User 892:
  Movie: Silence of the Lambs, The (1991) (Movie ID: 98), Estimated Rating: 4.85
  Movie: Godfather: Part II, The (1974) (Movie ID: 187), Estimated Rating: 4.74
  Movie: Maltese Falcon, The (1941) (Movie ID: 484),