<a href="https://colab.research.google.com/github/nikhilsingh13/PythonHacks/blob/main/ndcg_sample_nb1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [2]:
# DCG
def dcg_at_k(relevance, k):
    """Compute DCG@K"""

    relevance = np.asarray(relevance)[:k]
    dcg = np.sum(relevance / np.log2(np.arange(2, relevance.size + 2)))
    return dcg

In [3]:
#NDCG
def ndcg_at_k(true_relevance, predicted_scores, k=None):
    """NDCG@K by sorting predictions and normalizing with IDCG(ideal))"""
    if k is None:
        k = len(true_relevance)

    # Sort true relevance by predicted scores as per a ranking system
    sorted_indices = np.argsort(predicted_scores)[::-1]
    # print(sorted_indices)
    ranked_relevance = np.asarray(true_relevance)[sorted_indices]
    # print(ranked_relevance)

    # DCG and IDCG
    dcg = dcg_at_k(ranked_relevance, k)
    idcg = dcg_at_k(sorted(true_relevance, reverse=True), k)

    ndcg = dcg / idcg if idcg > 0 else 0

    return ndcg

In [4]:
# sample data

movies = ["Movie A", "Movie B", "Movie C", "Movie D", "Movie E"]
true_relevance_scores = [10, 0, 0, 1, 5]  # explicit ratings provided by user

# Prediction / recommendation by any rec-sys algo
# Case 1: Poorly ranked predictions (bad ordering)
predicted_scores_1 = [0.1, 0.2, 0.3, 4, 70]
ndcg1 = ndcg_at_k(true_relevance_scores, predicted_scores_1, k=5)

# Case 2: Slightly better predictions
predicted_scores_2 = [0.5, 0.1, 1.0, 0.5, 5.0]
ndcg2 = ndcg_at_k(true_relevance_scores, predicted_scores_2, k=5)

# Case 3: Ideal ranking (should return NDCG=1.0)
ndcg3 = ndcg_at_k(true_relevance_scores, true_relevance_scores, k=5)

# Case 4: NDCG with truncation at k=4
pred_scores_4 = [0.05, 1.1, 1.0, 0.5, 0.0]
ndcg4 = ndcg_at_k(true_relevance_scores, pred_scores_4, k=4)

# Case 5: Ties in predictions affecting ranking
pred_scores_5 = [1, 0, 0, 0, 1]
ndcg5 = ndcg_at_k(true_relevance_scores, pred_scores_5, k=1)

In [5]:
df = pd.DataFrame({
    "Scenario": ["Bad Predictions", "Better Predictions", "Ideal Ranking", "Truncated (k=4)", "Tied Scores"],
    "k": [5,5,5,4,5],
    "NDCG score": [ndcg1, ndcg2, ndcg3, ndcg4, ndcg5]
})

df

Unnamed: 0,Scenario,k,NDCG score
0,Bad Predictions,5,0.695694
1,Better Predictions,5,0.7182
2,Ideal Ranking,5,1.0
3,Truncated (k=4),4,0.352024
4,Tied Scores,5,0.5


# Script Complete