<a href="https://colab.research.google.com/github/nmarkin/Rec-Sys-Okko/blob/main/metrics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 0. Configuration

In [None]:
#TODO - add formulae in line with functions

# 1. Modules and functions

In [5]:
import numpy as np
from math import log2

# 2. Main

## 2.1. Precision@K

In [None]:
def precission_at_k(y_true: np.array, y_pred: np.array, k: int) -> float:
    """
    y_true: true labels
    y_pred: predicted lables
    k: cutoff length
    """

    if sum(y_true) == 0:
        return -1

    argsort = np.argsort(y_pred)[::-1]
    y_true_sorted = y_true[argsort]
    true_positives = y_true_sorted[:k].sum()

    return true_positives / k


In [None]:
# example array
y_true = np.array([1, 0, 0, 1, 0, 0])
y_pred = np.array([6, 2, 3, 5, 4, 1])

In [None]:
argsort = np.argsort(y_pred)[::-1]
argsort

array([0, 3, 4, 2, 1, 5])

In [None]:
y_true_sorted = y_true[argsort]
y_true_sorted

array([1, 1, 0, 0, 0, 0])

In [None]:
true_positives = y_true_sorted[:6].sum()
true_positives

2

In [None]:
# as expected
precission_at_k(y_true, y_pred, k = 6)

0.3333333333333333

## 2.2. AP@K, MAP@K

In [None]:
def average_precision(y_true: np.array, y_pred: np.array, k: int) -> float:

    if sum(y_true) == 0:
        return -1

    if len(y_pred) > k:
        y_pred = y_pred[:k]

    argsort = np.argsort(y_pred)[::-1]
    y_true_sorted = y_true[argsort]

    num_hits = 0
    score = 0

    for i, p in enumerate(y_true_sorted, 1):
        if p == 1:
            num_hits += 1
            score += num_hits / i
    if num_hits == 0:
        output = 0
    
    else:
        output = score / min(len(y_true), k)

    return output


In [None]:
average_precision(y_true, y_pred, k = 3)

0.3333333333333333

## 2.3. MRR

In [None]:
def reciprocal_rank(y_true: np.array, y_pred: np.array) -> float:
    
    argsort = np.argsort(y_pred)[::-1]
    y_true_sorted = y_true[argsort]
    for i, val in enumerate(y_true_sorted, 1):     
        if val == 1:
            return 1 / i
    return 0


In [None]:
# example array for MRR
y_true = np.array([1, 0, 0, 1, 0, 0])
y_pred = np.array([0, 2, 3, 3.5, 4, 1])

In [None]:
reciprocal_rank(y_true, y_pred)

0.5

## 2.4. NDCG

In [None]:
def compute_gain(y_value: float, gain_scheme: str) -> float:
    
    gain = {'exp2': 2 ** y_value - 1,
            'const': y_value}

    return float(gain[gain_scheme])

In [None]:
def dcg(y_true: np.array, y_pred: np.array, gain_scheme: str) -> float:
    
    dcg = 0
    argsort = np.argsort(y_pred)[::-1]
    y_true_sorted = y_true[argsort]

    for idx, val in enumerate(y_true_sorted, 1):
        gain = compute_gain(val, gain_scheme)
        dcg += gain / log2(idx + 1)
        
    return dcg


In [None]:
def ndcg(y_true: np.array, ys_pred: np.array, gain_scheme: str = 'const') -> float:
    
    # pred dcg then we calc the same to find max possible
    preds_dcg = dcg(y_true, ys_pred, gain_scheme)
    max_possible_dcg = dcg(y_true, y_true, gain_scheme)

    return preds_dcg / max_possible_dcg


In [None]:
y_pred = np.array([6, 5, 4, 3, 2, 1]) # some score to sort
y_true = np.array([3, 2, 3, 0, 1, 2])

In [None]:
ndcg(y_true, y_pred, 'exp2')

0.9488107485678985

# TODO
- Write MAP@K function;
- Modify ndcg() such that it incomporates cutoff param K

In [None]:
# MAP@K
def map(y_true: np.array, y_pred: np.array, k: int) -> float:
    return np.mean([average_precision(a,p,k) for a,p in zip(y_true, y_pred)])

In [8]:
# ngcd 

def compute_gain(y_value: float, gain_scheme: str) -> float:
    
    gain = {'exp2': 2 ** y_value - 1,
            'const': y_value}

    return float(gain[gain_scheme])

def dcg(y_true: np.array, y_pred: np.array, gain_scheme: str, k: int = None) -> float:
    
    dcg = 0
    argsort = np.argsort(y_pred)[::-1]
    y_true_sorted = y_true[argsort]

    if k is not None:
        argsort = argsort[:k]
        y_true_sorted = y_true_sorted[:k]
    for idx, val in enumerate(y_true_sorted, 1):
        gain = compute_gain(val, gain_scheme)
        dcg += gain / log2(idx + 1)
        
    return dcg

def ndcg(y_true: np.array, ys_pred: np.array, gain_scheme: str = 'const', k: int = None) -> float:
    
    # pred dcg then we calc the same to find max possible
    preds_dcg = dcg(y_true, ys_pred, gain_scheme, k)
    max_possible_dcg = dcg(y_true, y_true, gain_scheme, k)

    return preds_dcg / max_possible_dcg

In [11]:
ndcg(y_true, y_pred, 'exp2', 3)

0.9594535145926796