In [14]:
import pandas as pd

ground_truth_rankings = pd.read_csv('ground_truth.csv', index_col=0).to_dict()
predicted_rankings = pd.read_csv('predicted_rankings.csv', index_col=0).to_dict()

In [15]:
total_ground_truth = sum(len(v) for v in ground_truth_rankings.values())
total_predicted = sum(len(v) for v in predicted_rankings.values())

print(f"Total ground truth items: {total_ground_truth}")
print(f"Total predicted items: {total_predicted}")



Total ground truth items: 616
Total predicted items: 616


In [4]:
print("🔍 Ground Truth Rankings (values):")
for user_id, items in ground_truth_rankings.items():
    print(f"{user_id}: {items}")

print("\n🔮 Predicted Rankings (values):")
for user_id, items in predicted_rankings.items():
    print(f"{user_id}: {items}")


🔍 Ground Truth Rankings (values):
movies: {196: '[580, 257, 13, 173, 110]', 186: '[288, 258, 1083, 12, 269]', 22: '[932, 688, 926, 405, 998]', 244: '[732, 468, 235, 144, 121]', 166: '[294, 748, 346, 286, 687]', 298: '[742, 679, 275, 91, 276]', 115: '[237, 69, 596, 969, 496]', 62: '[569, 1028, 271, 931, 924]', 286: '[229, 401, 325, 790, 455]', 200: '[982, 48, 235, 1028, 934]', 210: '[230, 926, 662, 243, 763]', 224: '[92, 980, 715, 544, 991]', 303: '[398, 145, 63, 1086, 363]', 122: '[214, 1268, 69, 57, 11]', 194: '[582, 756, 1206, 67, 228]', 234: '[12, 243, 403, 557, 874]', 119: '[755, 916, 294, 410, 931]', 299: '[235, 94, 17, 1227, 749]', 308: '[472, 1028, 452, 97, 309]', 95: '[768, 573, 715, 398, 552]', 160: '[230, 864, 969, 21, 832]', 301: '[1230, 423, 395, 411, 53]', 225: '[1443, 286, 479, 245, 143]', 290: '[650, 151, 473, 449, 180]', 181: '[1137, 1358, 1317, 1352, 544]', 278: '[258, 882, 245, 302, 301]', 276: '[43, 1095, 796, 1413, 1090]', 7: '[669, 440, 324, 70, 145]', 10: '[319, 6

In [5]:
def is_stringified_list(val):
    return isinstance(val, str) and val.strip().startswith('[') and val.strip().endswith(']')

print("🔍 Ground Truth Rankings - Stringified List Check:")
for user_id, items in ground_truth_rankings.items():
    print(f"{user_id}: type = {type(items)} | Is stringified list? {is_stringified_list(items) if isinstance(items, str) else 'No'}")

print("\n🔮 Predicted Rankings - Stringified List Check:")
for user_id, items in predicted_rankings.items():
    print(f"{user_id}: type = {type(items)} | Is stringified list? {is_stringified_list(items) if isinstance(items, str) else 'No'}")


🔍 Ground Truth Rankings - Stringified List Check:
movies: type = <class 'dict'> | Is stringified list? No

🔮 Predicted Rankings - Stringified List Check:
predicted_ranking: type = <class 'dict'> | Is stringified list? No


In [14]:
import numpy as np
from math import log2
import ast

def preprocess_rankings(rankings):
    """
    Convert stringified list values in rankings to actual lists.
    
    Args:
        rankings (dict): Dictionary with user_id as keys and stringified lists as values.
    
    Returns:
        dict: Dictionary with user_id as keys and lists of movie IDs as values.
    """
    processed = {}
    for user_id, value in rankings.items():
        if isinstance(value, list) and len(value) == 1 and isinstance(value[0], str):
            try:
                parsed_list = ast.literal_eval(value[0])
                if isinstance(parsed_list, list):
                    processed[user_id] = [str(item) for item in parsed_list]
                else:
                    raise ValueError(f"Parsed value for user {user_id} is not a list: {parsed_list}")
            except (ValueError, SyntaxError) as e:
                print(f"Error parsing value for user {user_id}: {value[0]}, Error: {e}")
                processed[user_id] = []
        else:
            processed[user_id] = [str(item) for item in value]
    return processed

def calculate_metrics(predicted, ground_truth):
    """
    Calculate average MAP, MRR, and NDCG across all users for predicted and ground truth rankings.
    
    Args:
        predicted (dict): {user_id: [movie_id1, movie_id2, ..., movie_id5]}
        ground_truth (dict): {user_id: [movie_id1, movie_id2, ..., movie_id5]}
    
    Returns:
        dict: {"MAP": float, "MRR": float, "NDCG": float} with average metrics
    """
    def average_precision(pred, gt):
        """Calculate Average Precision for a single user."""
        score = 0.0
        num_hits = 0
        # Relevance: 5 for rank 1, 4 for rank 2, ..., 1 for rank 5
        gt_relevance = {movie: 6 - i for i, movie in enumerate(gt, 1)}
        
        for i, movie in enumerate(pred, 1):
            if movie in gt_relevance:
                num_hits += 1
                precision = num_hits / i
                # Normalize relevance to [0, 1] for AP
                normalized_rel = gt_relevance[movie] / 5.0  # Max relevance = 5
                score += precision * normalized_rel
        
        # Normalize by number of relevant items
        num_relevant = len(gt_relevance)
        ap = score / num_relevant if num_hits > 0 else 0.0
        return ap

    def reciprocal_rank(pred, gt):
        """Calculate Reciprocal Rank for a single user."""
        for i, movie in enumerate(pred, 1):
            if movie in gt:
                for j, gt_movie in enumerate(gt, 1):
                    if gt_movie == movie:
                        return 1.0 / j
        return 0.0

    def ndcg(pred, gt):
        """Calculate NDCG for a single user."""
        gt_relevance = {movie: 6 - i for i, movie in enumerate(gt, 1)}
        
        # DCG for predicted
        dcg = 0.0
        for i, movie in enumerate(pred, 1):
            if movie in gt_relevance:
                rel = gt_relevance[movie]
                dcg += rel / log2(i + 1)
        
        # IDCG: DCG of ideal ranking
        ideal_rels = sorted(gt_relevance.values(), reverse=True)
        idcg = sum(rel / log2(i + 1) for i, rel in enumerate(ideal_rels, 1))
        
        return dcg / idcg if idcg > 0 else 0.0

    # Preprocess predicted and ground truth
    predicted = preprocess_rankings(predicted)
    ground_truth = preprocess_rankings(ground_truth)
    
    # Calculate metrics for each user
    map_scores = []
    mrr_scores = []
    ndcg_scores = []
    
    for user_id in predicted:
        if user_id in ground_truth:
            pred = predicted[user_id]
            gt = ground_truth[user_id]
            
            if not pred or not gt:
                continue
                
            map_scores.append(average_precision(pred, gt))
            mrr_scores.append(reciprocal_rank(pred, gt))
            ndcg_scores.append(ndcg(pred, gt))
    
    # Return mean metrics
    return {
        "MAP": np.mean(map_scores) if map_scores else 0.0,
        "MRR": np.mean(mrr_scores) if mrr_scores else 0.0,
        "NDCG": np.mean(ndcg_scores) if ndcg_scores else 0.0
    }

if __name__ == "__main__":
    metrics = calculate_metrics(predicted_rankings, ground_truth_rankings)
    print("Average Metrics:", metrics)

Average Metrics: {'MAP': 0.6, 'MRR': 0.4631222943722943, 'NDCG': 0.861289209501837}
