In [1]:
import numpy as np

def precision_at_k(dataframe, k):
    hits = 0
    for _, row in dataframe.iterrows():
        if row['true_item'] in row['predicted_items'][:k]:
            hits += 1
    return hits / (len(dataframe) * k)

def recall_at_k(dataframe, k):
    # Since there's only one true item, it's same as hit ratio
    return hit_ratio_at_k(dataframe, k)

def mrr_at_k(dataframe, k):
    rr_total = 0
    for _, row in dataframe.iterrows():
        try:
            rank = row['predicted_items'][:k].index(row['true_item']) + 1
            rr_total += 1 / rank
        except ValueError:
            rr_total += 0
    return rr_total / len(dataframe)

def ndcg_at_k(dataframe, k):
    def dcg(rel_list):
        return sum([rel / np.log2(idx + 2) for idx, rel in enumerate(rel_list)])
    
    total_ndcg = 0
    for _, row in dataframe.iterrows():
        rel = [1 if item == row['true_item'] else 0 for item in row['predicted_items'][:k]]
        ideal_rel = sorted(rel, reverse=True)
        dcg_score = dcg(rel)
        idcg_score = dcg(ideal_rel)
        ndcg = dcg_score / idcg_score if idcg_score != 0 else 0
        total_ndcg += ndcg
    return total_ndcg / len(dataframe)

# === Set K and calculate all metrics ===
k = 5
precision = precision_at_k(user_item_df, k)
recall = recall_at_k(user_item_df, k)
mrr = mrr_at_k(user_item_df, k)
ndcg = ndcg_at_k(user_item_df, k)

# === Print results ===
print(f"\n📊 Evaluation Metrics @ {k}:")
print(f"Precision@{k}: {precision:.4f}")
print(f"Recall@{k}:    {recall:.4f}")
print(f"MRR@{k}:       {mrr:.4f}")
print(f"nDCG@{k}:      {ndcg:.4f}")


NameError: name 'user_item_df' is not defined

In [2]:
import pandas as pd
import numpy as np

# === Step 1: Load the CSV ===
file_path = "reliability_scores_matrics.csv"
df = pd.read_csv(file_path)

# === Step 2: Show the structure of the data ===
print("\n📌 Available columns:")
print(df.columns.tolist())
print("\n🔍 Sample data:")
print(df.head())

# === Step 3: Define columns ===
# Replace with actual column names from your dataset
GROUND_TRUTH_COL = 'Item ID'       # True item per user
PREDICTED_COL = 'top_ij'           # Assumed as predicted score for ranking

# === Step 4: Generate Top-K Predictions Per User ===
df_sorted = df.sort_values(by=['User ID', PREDICTED_COL], ascending=[True, False])
top_k_df = df_sorted.groupby('User ID').head(5)

# === Step 5: Create predicted list per user ===
user_predicted = top_k_df.groupby('User ID')['Item ID'].apply(list).reset_index()
user_predicted.columns = ['User ID', 'predicted_items']

# === Step 6: Get true item per user (assuming highest 'top_ij' is ground truth) ===
user_true = df_sorted.groupby('User ID').first().reset_index()
user_true = user_true[['User ID', 'Item ID']]
user_true.columns = ['User ID', 'true_item']

# === Step 7: Merge both ===
df = pd.merge(user_true, user_predicted, on='User ID')

# === Step 8: Define evaluation metrics ===
def hit_ratio_at_k(dataframe, k):
    hits = sum(row['true_item'] in row['predicted_items'][:k] for _, row in dataframe.iterrows())
    return hits / len(dataframe)

def precision_at_k(dataframe, k):
    hits = sum(row['true_item'] in row['predicted_items'][:k] for _, row in dataframe.iterrows())
    return hits / (len(dataframe) * k)

def recall_at_k(dataframe, k):
    # 1 relevant item per user
    return hit_ratio_at_k(dataframe, k)

def mrr_at_k(dataframe, k):
    rr_total = 0
    for _, row in dataframe.iterrows():
        try:
            rank = row['predicted_items'][:k].index(row['true_item']) + 1
            rr_total += 1 / rank
        except ValueError:
            rr_total += 0
    return rr_total / len(dataframe)

def ndcg_at_k(dataframe, k):
    def dcg(rel_list):
        return sum([rel / np.log2(idx + 2) for idx, rel in enumerate(rel_list)])
    
    total_ndcg = 0
    for _, row in dataframe.iterrows():
        rel = [1 if item == row['true_item'] else 0 for item in row['predicted_items'][:k]]
        ideal_rel = sorted(rel, reverse=True)
        dcg_score = dcg(rel)
        idcg_score = dcg(ideal_rel)
        total_ndcg += dcg_score / idcg_score if idcg_score != 0 else 0
    return total_ndcg / len(dataframe)

# === Step 9: Evaluate ===
k = 5
print(f"\n📊 Evaluation Metrics @ {k}:")
print(f"Hit Ratio@{k}:  {hit_ratio_at_k(df, k):.4f}")
print(f"Precision@{k}:  {precision_at_k(df, k):.4f}")
print(f"Recall@{k}:     {recall_at_k(df, k):.4f}")
print(f"MRR@{k}:         {mrr_at_k(df, k):.4f}")
print(f"nDCG@{k}:        {ndcg_at_k(df, k):.4f}")

# === Step 10: Save to CSV ===
df.to_csv("user_predictions_evaluation.csv", index=False)
print("\n✅ File saved as 'user_predictions_evaluation.csv'")



📌 Available columns:
['User ID', 'Item ID', 'h_ij', 'd_ij', 'avg_score', 'review_rank', 'n_prime', 'z_ij', 'most_ij', 'sum_z', 'q_ij', 'sum_q', 'top_ij']

🔍 Sample data:
          User ID     Item ID      h_ij      d_ij  avg_score  review_rank  \
0   AO94DHGC771SJ  0528881469  0.000000  0.653309   0.156307            1   
1   AMO214LNFCEI4  0528881469  0.163296  0.202288   0.211416            2   
2  A3N7T0DY83Y4IG  0528881469  0.698923  0.089906   0.451825            3   
3  A1H8PY3QHMQQA0  0528881469  0.137781  0.040832   0.141980            4   
4  A24EV6RXELQZ63  0528881469  0.000000  0.013665   0.038471            5   

   n_prime      z_ij   most_ij     sum_z      q_ij     sum_q    top_ij  
0        5  1.000000  0.683242  1.463611  4.000000  6.416667  0.623377  
1        5  0.250000  0.170810  1.463611  1.500000  6.416667  0.233766  
2        5  0.111111  0.075916  1.463611  0.666667  6.416667  0.103896  
3        5  0.062500  0.042703  1.463611  0.250000  6.416667  0.038961  
4