# Punto 1 Metricas de evaluacion de IR
## Integrantes
* Juan Esteban Arboleda
* Luccas Rojas

A continuación, se implementan las métricas de evaluación

# Importación de librarías

In [2]:
import numpy as np
import math

# Precisión

In [3]:
def presition(relevance: list) -> float:
    """
    Returns the presition of a query result.

        Params
        ------
            relevance: list
                A binary vector. The kth element of the vector
                represent if the kth returned document is relevant
                to the query. 1 represent that it is relevant. 0
                represent that it is not.
    """
    relevance = np.array(relevance)
    num = np.sum(relevance)
    den = len(relevance)

    return num / den

presition([0,0,0,1])

0.25

# Presición @ k

In [4]:
def presition_at_k(relevance: list, k: int) -> float:
    """
    Returns the presition @ k of a query result.

        Params
        ------
            relevance: list
                A binary vector. The kth element of the vector
                represent if the kth returned document is relevant
                to the query. 1 represent that it is relevant. 0
                represent that it is not.

            k: int
                Position untill which the metric should be evaluated.
    """
    relevance = relevance[:k]
    
    return presition(relevance)

presition_at_k([0,0,0,1], 1)

0.0

# Recall @ K

In [5]:
def recall_at_k(relevance: list, n_relevant_docs, k):
    """
    Returns the Recall @ k of a query result result.

        Params
        ------
            relevance: list
                A binary vector. The kth element of the vector
                represent if the kth returned document is relevant
                to the query. 1 represent that it is relevant. 0
                represent that it is not.
            
            n_relevant_docts: int
                The number of relevant documents to the query.

            k: int
                Position untill which the metric should be evaluated.
    """
    relevance = np.array(relevance)
    relevance = relevance[:k]

    num = np.sum(relevance)
    den = n_relevant_docs

    return num / den

recall_at_k([0,0,0,1], 4, 1)

0.0

# Average presition

In [6]:
def average_presition(relevance: list) -> float:
    """
    Returns the average presition of a query result

        Params
        ------
            relevance: list
                A binary vector. The kth element of the vector
                represent if the kth returned document is relevant
                to the query. 1 represent that it is relevant. 0
                represent that it is not.
                The relevance list MUST contain all the relevant documents.
    """

    k = 1
    n_relevant_documents = np.sum(relevance)
    current_rel_documents = 0
    current_p_at_k_sum = 0
    rec_at_k = 0
    while rec_at_k < 1:
        if relevance[k-1] == 1:
            current_rel_documents += 1
            current_p_at_k_sum += presition_at_k(relevance, k)
        
        rec_at_k = recall_at_k(relevance, n_relevant_documents, k)
        k += 1

    return current_p_at_k_sum / current_rel_documents

average_presition([0,1,0,1,1,1,1])

0.5961904761904762

# MAP

In [7]:
def mean_average_presition(relevances_lst: list) -> float:
    """
    Returns the mean average presition of a series of query results.

        Params
        ------
            relevances_lst: list
                A list of binary vectors. Each vector represents a query 
                result. The kth element of a vector represent if the kth
                returned document is relevant to its query. 1 represent 
                that it is relevant. 0 represent that it is not. Each
                vector MUST contain all the relevant documents to its 
                query.
    """
    average_presition_sum = 0
    n = len(relevances_lst)
    for relevance in relevances_lst:
        average_presition_sum += average_presition(relevance)

    return average_presition_sum / n

# DCG @ K

In [10]:
def dcg_i(relevance: list, i: int) -> float:
    """
    Returns the DCG_i. i.e. relevance_i / log2(max(i,2))

    Params
        ------
            relevance: list
                A numeric vector where the kth component of the vector
                represents the relevance of the kth returned document.
    """
    return relevance[i - 1] / math.log2(max(i,2))


def dcg_at_k(relevance: list, k: int) -> float:
    """
    Returns the DCG @ k of a query result.

        Params
        ------
            relevance: list
                A numeric vector where the kth component of the vector
                represents the relevance of the kth returned document.

            k: int
                Position untill which the metric should be evaluated.     
    """
    relevance = np.array(relevance)
    cr_sum = 0
    for i in range(1, k+1):
        cr_sum += dcg_i(relevance, i)

    return cr_sum

dcg_at_k([4,4,3,0,0,1,3,3,3,0], 6)

10.279642067948915

# NDCG

In [9]:
def ndcg_at_k(relevance: list, k: int) -> float:
    """
    Returns normalized DCG @ k of a query result.

        Params
        ------
            relevance: list
                A numeric vector where the kth component of the vector
                represents the relevance of the kth returned document.

            k: int
                Position untill which the metric should be evaluated.  
    """
    ordered_relevance = relevance.copy()
    ordered_relevance.sort(reverse=True)

    cr_sum1 = 0
    cr_sum2 = 0

    for i in range(1, k+1):
        cr_sum1 += dcg_i(relevance, i)
        cr_sum2 += dcg_i(ordered_relevance, i)

    return cr_sum1 / cr_sum2
        

ndcg_at_k([4,4,3,0,0,1,3,3,3,0], 6)

0.7424602308163405