Skip to content

Commit

Permalink
fix NDCG e DCG più test
Browse files Browse the repository at this point in the history
  • Loading branch information
m3ttiw committed Jun 1, 2020
1 parent 25cfc04 commit 918d453
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 24 deletions.
46 changes: 29 additions & 17 deletions orange_cb_recsys/evaluation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ def perform_ranking_metrics(predictions: pd.DataFrame,
**options) -> Dict[str, float]:
content_prediction = pd.Series(predictions['item'].values)
if "relevant_threshold" in options.keys():
relevant_rank = truth[truth['score'] >= options["relevant_threshold"]]
print(relevant_rank)
relevant_rank = truth[truth['rating'] >= options["relevant_threshold"]]
else:
relevant_rank = truth

Expand Down Expand Up @@ -39,33 +38,46 @@ def perform_Fn(n: int = 1, precision: float = None, recall: float = None):
r = recall if recall is not None else perform_recall()
return (1 + (n ** 2)) * ((p * r) / ((n ** 2) * p + r))

def perform_DCG(scores: pd.Series):
def perform_DCG(gains: pd.Series):
"""
Returns the DCG measure of the given ranking (predictions)
based on the truth ranking
Returns the DCG array of a gain vector
"""
dcg = 0
for score, i in enumerate(scores):
dcg += score / np.log2(i)
dcg = []
for i, gain in enumerate(gains):
if i == 0:
dcg.append(gain)
else:
dcg.append((gain / np.log2(i+1)) + dcg[i - 1])
return dcg

def perform_NDCG():
"""
Returns the NDCG measure of the given ranking (predictions)
based on the Ideal DCG of truth ranking
Returns the NDCG measure using Truth rank as ideal DCG
"""
return perform_DCG(pd.Series(predictions['score'].values)) / perform_DCG(pd.Series(predictions['score'].values))

def perform_NDCG_scikit():
"""
Returns the NDCG measure with scickit learn
"""
return ndcg_score(truth, predictions, k=len(truth) if len(truth) < len(predictions) else len(predictions))
idcg = perform_DCG(pd.Series(truth['rating'].values))

col = ["item", "rating"]
new_predicted = pd.DataFrame(columns=col)
for index, predicted_row in predictions.iterrows():
predicted_item = predicted_row['item']
truth_row = truth.loc[truth['item'] == predicted_item]
truth_score = truth_row['rating'].values[0]
new_predicted = new_predicted.append({'item': predicted_item, 'rating': truth_score}, ignore_index=True)

dcg = perform_DCG(pd.Series(new_predicted['rating'].values))
ndcg = []
for i, ideal in enumerate(idcg):
try:
ndcg.append(dcg[i] / ideal)
except IndexError:
break
return ndcg

results = {
"Precision": perform_precision(),
"Recall": perform_recall(),
# "NDCG": perform_NDCG(),
"NDCG": perform_NDCG(),
}

if "fn" in options.keys() and options["fn"] > 1:
Expand Down
15 changes: 8 additions & 7 deletions test/evaluation/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_perform_ranking_metrics(self):
"item7": 0.2,
}

col = ["item", "score"]
col = ["item", "rating"]

results = perform_ranking_metrics(
pd.DataFrame(predicted_rank.items(), columns=col),
Expand All @@ -53,12 +53,13 @@ def test_perform_ranking_metrics(self):
"Recall": 0.75,
"F1": 0.5,
"F2": 0.625,
# "NDCG": 0,
"NDCG": [0.85, 0.75, 0.64, 0.72, 0.75, 0.81, 0.79, 0.80],
}

tolerance = 0.0
tolerance = 0.5
for metric in real_results.keys():
error = abs(results[metric] - real_results[metric])
print("{}: {}".format(metric, results[metric]))
self.assertLessEqual(error, tolerance, "{} tolerance overtaking: error = {}, tolerance = {}".
format(metric, error, tolerance))
# print("{}: {}".format(metric, results[metric]))
if metric != "NDCG":
error = abs(results[metric] - real_results[metric])
self.assertLessEqual(error, tolerance, "{} tolerance overtaking: error = {}, tolerance = {}".
format(metric, error, tolerance))

0 comments on commit 918d453

Please sign in to comment.