# Ranking Evaluation

Below measures can be used to measure the gooodness of a ranked list. This might be a ranked list for a search query or a ranked list of recommendations.

In [None]:
import scipy.stats as stats
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, mean_absolute_error

### precision@k

Measures the proportion of relevant items at top-k.

In [None]:
def hits_at(ranked_items, rel_items, k):
    hits = 0
    n = min(k, len(ranked_items))
    for i in range(n):
        if (ranked_items[i] in rel_items):
            hits += 1
    return hits

In [None]:
ranked_items = ["a","b","c","d","e"]
rel_items = ["b","e"]

In [None]:
hits_at(ranked_items, rel_items, 5)

2

In [None]:
def precision_at(ranked_items, rel_items, k):
    hits = hits_at(ranked_items, rel_items, k)
    return hits/k

In [None]:
precision_at(ranked_items, rel_items,3)

0.3333333333333333

In [None]:
def recall_at(ranked_items, true_items, k):
    hits = hits_at(ranked_items, true_items, k)
    return hits/len(rel_items)

In [None]:
recall_at(ranked_items, rel_items,5)

1.0

### Average Precision

In [None]:
def average_precision(ranked_items, rel_items):
    hits = 0
    avg_prec = 0
    i = 0
    for item in ranked_items:
        if item in rel_items:
            hits += 1
            avg_prec += hits / (i+1)
        i += 1
    return avg_prec / len(rel_items)
    

In [None]:
ranked_items1 = ["a","t","e","s","c","f"]
ranked_items2 = ["a","t","g","s", "d","e"]
rel_items = ["a","d","e"]

In [None]:
average_precision(ranked_items1, rel_items)

0.5555555555555555

### Reciprocal Rank

In [None]:
# See http://en.wikipedia.org/wiki/Mean_reciprocal_rank
def  (ranked_items, rel_items):
    pos = 0;
    for item in ranked_items:
        if item in rel_items:
            return 1 / (pos+1)
        pos +=1
            

In [None]:
reciprocal_rank(ranked_items, rel_items)

1.0

### Normalized Discounted Cumulative Gain

In [None]:
# Computes the ideal DCG given the number of positive items
def compute_IDCG(n):
    idcg = 0
    for i in range(n):
        idcg += 1 / np.log2(i+2)
    return idcg

In [None]:
def NDCG(ranked_items, rel_items):
    dcg = 0
    idcg = compute_IDCG(len(rel_items))
    for i in range(len(ranked_items)):
        if ranked_items[i] in rel_items:
            dcg += 1 / np.log2(i+2)
    return dcg/idcg
    

In [None]:
ranked_items = ["a","t","g","d","e","s"]
rel_items = ["a","d","e"]

In [None]:
NDCG(ranked_items, rel_items)

0.8529278650606567