In [21]:
import pandas as pd
import numpy as np
import os

import json

In [22]:
def accuracy_at_k(preds_df, true_items, max_k=25):
    accs = []
    for k in range(1, max_k + 1):
        correct = 0
        for i in range(len(true_items)):
            scores = preds_df.iloc[i]
            top_k_items = scores.sort_values(ascending=False).index[:k]
            if true_items.iloc[i] in top_k_items:
                correct += 1
        accs.append(correct / len(true_items))
    return accs

def fast_accuracy_at_k(preds_df: pd.DataFrame, true_items: pd.Series, max_k: int = 25):
    # Convert DataFrame to numpy array
    scores = preds_df.to_numpy()
    item_ids = np.array(preds_df.columns)

    # Get indices of top-k items for each row
    topk_indices = np.argpartition(-scores, range(max_k), axis=1)[:, :max_k]  # partial sort
    topk_scores = np.take_along_axis(scores, topk_indices, axis=1)

    # Fully sort only the top-k items
    sorted_topk_idx = np.argsort(-topk_scores, axis=1)
    topk_sorted_indices = np.take_along_axis(topk_indices, sorted_topk_idx, axis=1)

    # Map true_items to column indices
    item_to_index = {item: i for i, item in enumerate(preds_df.columns)}
    true_indices = true_items.map(item_to_index).to_numpy()

    # Compute match matrix: shape (num_samples, max_k)
    matches = (topk_sorted_indices == true_indices[:, None])

    # Cumulative accuracy: if true item is in top-k
    acc_at_k = matches.cumsum(axis=1).clip(0, 1).mean(axis=0)

    return acc_at_k.tolist()

In [23]:
top_k = 50

results = np.array([0.0 for _ in range(top_k)])

total_count = 0

for file in os.listdir('original_results')[:2]:
    print(f'Loading ./original_results/{file}')
    df_results = pd.read_parquet(f'./original_results/{file}')
    print('\tCalculating...')
    results += np.array(fast_accuracy_at_k(df_results, df_results[0], max_k=top_k)) * len(df_results)
    total_count += len(df_results)

Loading ./original_results/original_results_ordered_0.parquet
	Calculating...
Loading ./original_results/original_results_ordered_1.parquet
	Calculating...


In [24]:
np.savetxt('original_accuracy.txt', results / total_count)