In [1]:
import numpy as np
import pandas as pd

In [2]:
eval = np.array([
    [0.81, 0.88],
    [0.75, 0.77],
    [0.50, 0.07],
    [0.77, 0.54],
    [0.59, 0.39],
    [0.72, 0.76],
    [0.49, 0.44],
    [0.96, 0.52],
    [0.84, 0.45],
    [0.91, 0.82]

])

eval = pd.DataFrame(eval, columns=['prediction', 'ground_truth'], index=list('ABCDEFGHIJ'))

print(eval)

   prediction  ground_truth
A        0.81          0.88
B        0.75          0.77
C        0.50          0.07
D        0.77          0.54
E        0.59          0.39
F        0.72          0.76
G        0.49          0.44
H        0.96          0.52
I        0.84          0.45
J        0.91          0.82


In [3]:
# RMSE calculation
def rmse(predictions, targets):
    return ((predictions - targets) ** 2).mean() ** 0.5

print('RMSE:', rmse(eval['prediction'], eval['ground_truth']))


RMSE: 0.2531797780234432


In [4]:
# MAE calculation
def mae(predictions, targets):
    return (abs(predictions - targets)).mean()

print('MAE:', mae(eval['prediction'], eval['ground_truth']))


MAE: 0.196


In [5]:
# add two new columns with order according to previous columns
eval['pred_rank'] = eval['prediction'].rank(ascending=False)
eval['gt_rank'] = eval['ground_truth'].rank(ascending=False)

print(eval)

   prediction  ground_truth  pred_rank  gt_rank
A        0.81          0.88        4.0      1.0
B        0.75          0.77        6.0      3.0
C        0.50          0.07        9.0     10.0
D        0.77          0.54        5.0      5.0
E        0.59          0.39        8.0      9.0
F        0.72          0.76        7.0      4.0
G        0.49          0.44       10.0      8.0
H        0.96          0.52        1.0      6.0
I        0.84          0.45        3.0      7.0
J        0.91          0.82        2.0      2.0


In [6]:
# Pearson correlation
def pearson_correlation(predictions, targets):
    return np.sum((predictions - predictions.mean()) * (targets - targets.mean())) / np.sqrt(
        ((predictions - predictions.mean()) ** 2).sum() * ((targets - targets.mean()) ** 2).sum())

print('Pearson correlation:', pearson_correlation(eval['pred_rank'], eval['gt_rank']))

Pearson correlation: 0.5515151515151515


In [7]:
# Spearman correlation
def spearman_correlation(predictions, targets):
    n = len(predictions)
    return 1 - (6 * np.sum((predictions - targets) ** 2)) / (n * (n ** 2 - 1))

print('Spearman correlation:', spearman_correlation(eval['pred_rank'], eval['gt_rank']))

Spearman correlation: 0.5515151515151515


In [8]:
# Kendall rank correlation
def kendall_rank_correlation(predictions, targets):
    m = len(predictions) * (len(predictions) - 1) / 2
    correct = 0
    wrong = 0
    for i in range(len(predictions)):
        for j in range(i + 1, len(predictions)):
            if predictions.iloc[i] > predictions.iloc[j] and targets.iloc[i] > targets.iloc[j]:
                correct += 1
            elif predictions.iloc[i] < predictions.iloc[j] and targets.iloc[i] < targets.iloc[j]:
                correct += 1
            else:
                wrong += 1
    return (correct - wrong) / m

print('Kendall rank correlation:', kendall_rank_correlation(eval['pred_rank'], eval['gt_rank']))

Kendall rank correlation: 0.37777777777777777


In [9]:
# Add column observed in ground truth, True if ground_truth > 0.5 else False
eval['gt_observed'] = eval['ground_truth'] > 0.5

print(eval)

   prediction  ground_truth  pred_rank  gt_rank  gt_observed
A        0.81          0.88        4.0      1.0         True
B        0.75          0.77        6.0      3.0         True
C        0.50          0.07        9.0     10.0        False
D        0.77          0.54        5.0      5.0         True
E        0.59          0.39        8.0      9.0        False
F        0.72          0.76        7.0      4.0         True
G        0.49          0.44       10.0      8.0        False
H        0.96          0.52        1.0      6.0         True
I        0.84          0.45        3.0      7.0        False
J        0.91          0.82        2.0      2.0         True


In [10]:
# AUC
def auc(observation: pd.Series, prediction: pd.Series):
    n = len(prediction)
    pos = np.sum(observation)
    total = pos * (n - pos)
    correct_pairs = 0
    for i in range(n):
        for j in range(i + 1, n):
            if prediction.iloc[i] > prediction.iloc[j] and observation.iloc[i] == 1 and observation.iloc[j] == 0:
                correct_pairs += 1
            elif prediction.iloc[i] < prediction.iloc[j] and observation.iloc[i] == 0 and observation.iloc[j] == 1:
                correct_pairs += 1
    return correct_pairs / total

print('AUC:', auc(eval['gt_observed'], eval['prediction']))


AUC: 0.8333333333333334


In [11]:
# Precision, Recall, F1
def precision_recall_f1(observation: pd.Series, prediction: pd.Series, k: int):
    observations = np.sum(observation)
    count = 0
    for i in range(len(prediction)):
        if prediction.iloc[i] <= k and observation.iloc[i] == 1:
            count += 1
    precision = np.round(count / k, 2)
    recall = np.round(count / observations, 2)
    f1 = np.round(2 * (precision * recall) / (precision + recall), 2)
    return precision, recall, f1

precision, recall, f1 = precision_recall_f1(eval['gt_observed'], eval['pred_rank'], 5)
print('Precision:', precision)
print('Recall:', recall)
print('F1:', f1)

Precision: 0.8
Recall: 0.67
F1: 0.73


In [12]:
# Mean Average Precision (MAP)
def mean_average_precision(observation: pd.Series, prediction: pd.Series):
    df = pd.DataFrame({'observation': observation, 'prediction': prediction})
    df.sort_values(by='prediction', ascending=True, inplace=True)
    map_score = 0
    obs = 0
    for element in df.itertuples():
        if element.observation == 1:
            map_score += (obs + 1) / element.prediction
            obs += 1
    return map_score / obs

print('Mean Average Precision:', mean_average_precision(eval['gt_observed'], eval['pred_rank']))

Mean Average Precision: 0.8734126984126983


In [13]:
# Normalized Cumulative Reciprocal Rank (NCRR)
def ncrr(observation: pd.Series, prediction: pd.Series):
    df = pd.DataFrame({'observation': observation, 'prediction': prediction})
    df.sort_values(by='prediction', ascending=True, inplace=True)
    total_obs = np.sum(observation)
    score = np.sum(np.fromiter([i.observation / i.prediction for i in df.itertuples()], dtype=float))
    ideal_score = np.sum(1 / np.arange(1, total_obs + 1))
    return score / ideal_score

print('Normalized Cumulative Reciprocal Rank:', ncrr(eval['gt_observed'], eval['pred_rank']))

Normalized Cumulative Reciprocal Rank: 0.9222546161321672


In [14]:
# Discounted Cumulative Gain (DCG)
def dcg(observation: pd.Series, prediction: pd.Series, k: int):
    df = pd.DataFrame({'observation': observation, 'prediction': prediction})
    df.sort_values(by='prediction', ascending=True, inplace=True)
    score = np.sum(np.fromiter([df.observation.iloc[i] / np.log2(df.prediction.iloc[i] + 1) for i in range(k)], dtype=float))
    return score

print('Discounted Cumulative Gain:', dcg(eval['gt_observed'], eval['pred_rank'], 5))

Discounted Cumulative Gain: 2.4484591188793923


In [15]:
# Normalized Discounted Cumulative Gain (NDCG)
def ndcg(observation: pd.Series, prediction: pd.Series, k: int):
    df = pd.DataFrame({'observation': observation, 'prediction': prediction})
    df.sort_values(by='prediction', ascending=True, inplace=True)
    score = np.sum(np.fromiter([df.observation.iloc[i] / np.log2(df.prediction.iloc[i] + 1) for i in range(k)], dtype=float))
    ideal_score = np.sum(np.fromiter([1 / np.log2(i + 1) for i in range(1, k + 1)], dtype=float))
    return score / ideal_score

print('Normalized Discounted Cumulative Gain:', ndcg(eval['gt_observed'], eval['pred_rank'], 5))

Normalized Discounted Cumulative Gain: 0.830419897363192
