New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adds mapk
implementation
#50
Changes from 2 commits
63fb6ac
6875bc8
aa583fa
07487ef
8b88654
408b4d6
d7e008b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,7 +107,7 @@ def catalog_coverage(predicted: List[list], catalog: list, k: int) -> float: | |
catalog_coverage = round(L_predictions/(len(catalog)*1.0)*100,2) | ||
return catalog_coverage | ||
|
||
def _ark(actual: list, predicted: list, k=10) -> int: | ||
def _ark(actual: list, predicted: list, k=10) -> float: | ||
""" | ||
Computes the average recall at k. | ||
Parameters | ||
|
@@ -120,7 +120,7 @@ def _ark(actual: list, predicted: list, k=10) -> int: | |
Number of predictions to consider | ||
Returns: | ||
------- | ||
score : int | ||
score : float | ||
The average recall at k. | ||
""" | ||
if len(predicted)>k: | ||
|
@@ -139,7 +139,69 @@ def _ark(actual: list, predicted: list, k=10) -> int: | |
|
||
return score / len(actual) | ||
|
||
def mark(actual: List[list], predicted: List[list], k=10) -> int: | ||
def _pk(actual: list, predicted: list, k) -> float: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question: Let me know if this implementation is too different than the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Update: I removed There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is great! thanks for this improvement! |
||
""" | ||
Computes precision at k. | ||
Parameters | ||
---------- | ||
actual : list | ||
A list of actual items to be predicted | ||
predicted : list | ||
An ordered list of predicted items | ||
k : int | ||
Number of predictions to consider | ||
Returns: | ||
------- | ||
score : float | ||
The precision at k. | ||
""" | ||
|
||
if len(predicted) > k: | ||
predicted = predicted[:k] | ||
|
||
if not predicted or not actual: | ||
return 0.0 | ||
|
||
return sum([item in actual for item in set(predicted)]) / len(predicted) | ||
|
||
def _apk(actual: list, predicted: list, k=10) -> float: | ||
""" | ||
Computes the average precision at k. | ||
Parameters | ||
---------- | ||
actual : list | ||
A list of actual items to be predicted | ||
predicted : list | ||
An ordered list of predicted items | ||
k : int, default = 10 | ||
Number of predictions to consider | ||
Returns: | ||
------- | ||
score : float | ||
The average precision at k. | ||
""" | ||
|
||
if len(predicted) > k: | ||
predicted = predicted[:k] | ||
|
||
if not predicted or not actual: | ||
return 0 | ||
|
||
score = 0 | ||
true_positives = 0 | ||
|
||
for i, p in enumerate(predicted): | ||
if p in actual and p not in predicted[:i]: | ||
score += _pk(actual, predicted, i + 1) | ||
true_positives += 1 | ||
|
||
if score == 0: | ||
return 0 | ||
|
||
return score / true_positives | ||
|
||
|
||
def mark(actual: List[list], predicted: List[list], k=10) -> float: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. drive by: this should return a float, not an int |
||
""" | ||
Computes the mean average recall at k. | ||
Parameters | ||
|
@@ -152,11 +214,29 @@ def mark(actual: List[list], predicted: List[list], k=10) -> int: | |
example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']] | ||
Returns: | ||
------- | ||
mark: int | ||
mark: float | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. drive by |
||
The mean average recall at k (mar@k) | ||
""" | ||
return np.mean([_ark(a,p,k) for a,p in zip(actual, predicted)]) | ||
|
||
def mapk(actual: List[list], predicted: List[list], k=10) -> int: | ||
""" | ||
Computes the mean average precision at k. | ||
Parameters | ||
---------- | ||
actual : a list of lists | ||
Actual items to be predicted | ||
example: [['A', 'B', 'X'], ['A', 'B', 'Y']] | ||
predicted : a list of lists | ||
Ordered predictions | ||
example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']] | ||
Returns: | ||
------- | ||
mapk: int | ||
The mean average precision at k (map@k) | ||
""" | ||
return np.mean([_apk(a,p,k) for a,p in zip(actual, predicted)]) | ||
|
||
def personalization(predicted: List[list]) -> float: | ||
""" | ||
Personalization measures recommendation similarity across users. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -80,7 +80,7 @@ def test_catalog_coverage(self): | |
|
||
def test_mark(self): | ||
""" | ||
Test mean absolute recall @ k (MAPK) function | ||
Test mean absolute recall @ k (MARK) function | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. drive by |
||
""" | ||
|
||
# GIVEN test MAR@K metrics | ||
|
@@ -95,8 +95,164 @@ def test_mark(self): | |
k=test_k | ||
) | ||
|
||
# THEN the mean absolute recall @ k should equal the expected value | ||
self.assertEqual(mean_abs_recall_k, 0.25) | ||
|
||
def test_mapk(self): | ||
""" | ||
Test mean absolute precision @ k (MAPK) function | ||
""" | ||
|
||
# GIVEN test MAP@K metrics | ||
test_actual = [['A', 'B', 'X'], ['A', 'B', 'Y']] | ||
test_predicted = [['X', 'Y', 'Z'], ['A', 'Z', 'B']] | ||
test_k = 5 | ||
|
||
# WHEN metrics.mapk is run | ||
mean_abs_precision_k = metrics.mapk( | ||
actual=test_actual, | ||
predicted=test_predicted, | ||
k=test_k | ||
) | ||
|
||
# THEN the mean absolute precision @ k should the average | ||
# precision over the two sets of predictions | ||
self.assertEqual(mean_abs_precision_k, ((1) + ((1 + (2/3)) / 2)) / 2) | ||
|
||
def test_pk(self): | ||
""" | ||
Test precision@k computation | ||
""" | ||
|
||
self.assertEqual( | ||
metrics._pk( | ||
actual = ['a', 'b', 'c'], | ||
predicted = ['a', 'z', 'b'], | ||
k = 3 | ||
), | ||
2/3 | ||
) | ||
|
||
|
||
def test_apk(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be great if someone could check the numbers in these tests to make sure the math is actually correct :) |
||
""" | ||
Test mean absolute precision @ k (APK) function | ||
""" | ||
|
||
## Predictions align with Stanford slides | ||
## https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf | ||
self.assertEqual( | ||
metrics._apk( | ||
['a', 'b', 'c', 'd', 'e', 'f'], | ||
['a', 'x', 'b', 'c', 'd', 'e', 'q', 'y', 'z', 'f'], | ||
10 | ||
), | ||
0.7749999999999999 | ||
) | ||
|
||
|
||
actual = ["A", "B", "X"] | ||
predicted = ["X", "Y", "Z"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 1), | ||
1 | ||
) | ||
self.assertEqual( | ||
metrics._apk(actual, predicted, 2), | ||
1 | ||
) | ||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
1 | ||
) | ||
|
||
actual = ["A", "B", "X"] | ||
predicted = ["foo", "B", "A"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 1), | ||
0 | ||
) | ||
self.assertEqual( | ||
metrics._apk(actual, predicted, 2), | ||
1/2 | ||
) | ||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
((1/2) + (2/3)) / 2 | ||
) | ||
|
||
## You shouldn't get extra credit for | ||
## predicting the same thing twice | ||
actual = ["A", "B", "X"] | ||
predicted = ["A", "A", "Z"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
1 | ||
) | ||
|
||
## If k is less than the number of predictions | ||
## made, we effectively "don't know" about | ||
## the other predictions | ||
actual = ["A", "B", "X"] | ||
predicted = ["A", "B", "Z"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
1 | ||
) | ||
|
||
## High K values don't change things | ||
actual = ["A", "B", "X"] | ||
predicted = ["A", "A", "Z"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
metrics._apk(actual, predicted, 1000) | ||
) | ||
|
||
## Returns None if no predictions exist | ||
actual = ["A", "B", "X"] | ||
predicted = [] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 1), | ||
0 | ||
) | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 3), | ||
0 | ||
) | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, predicted, 100), | ||
0 | ||
) | ||
|
||
## Returns correctly for single prediction | ||
actual = ["A", "B", "X"] | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, ["B"], 1), | ||
1 | ||
) | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, ["B"], 3), | ||
1 | ||
) | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, ["Z"], 100), | ||
0 | ||
) | ||
|
||
self.assertEqual( | ||
metrics._apk(actual, ["B", "B", "B"], 100), | ||
1 | ||
) | ||
|
||
|
||
def test_personalization(self): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
drive by