Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds mapk implementation #50

Merged
merged 7 commits into from Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
88 changes: 84 additions & 4 deletions recmetrics/metrics.py
Expand Up @@ -107,7 +107,7 @@ def catalog_coverage(predicted: List[list], catalog: list, k: int) -> float:
catalog_coverage = round(L_predictions/(len(catalog)*1.0)*100,2)
return catalog_coverage

def _ark(actual: list, predicted: list, k=10) -> int:
def _ark(actual: list, predicted: list, k=10) -> float:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive by

"""
Computes the average recall at k.
Parameters
Expand All @@ -120,7 +120,7 @@ def _ark(actual: list, predicted: list, k=10) -> int:
Number of predictions to consider
Returns:
-------
score : int
score : float
The average recall at k.
"""
if len(predicted)>k:
Expand All @@ -139,7 +139,69 @@ def _ark(actual: list, predicted: list, k=10) -> int:

return score / len(actual)

def mark(actual: List[list], predicted: List[list], k=10) -> int:
def _pk(actual: list, predicted: list, k) -> float:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: Let me know if this implementation is too different than the _ark / mark one -- I figured it made sense to just factor this out. But in broader terms, maybe it makes sense to use the recommender_precision method inside of _apk?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: I removed _pk in favor of just using the pre-existing precision calc. hopefully that refactor is okay!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is great! thanks for this improvement!

"""
Computes precision at k.
Parameters
----------
actual : list
A list of actual items to be predicted
predicted : list
An ordered list of predicted items
k : int
Number of predictions to consider
Returns:
-------
score : float
The precision at k.
"""

if len(predicted) > k:
predicted = predicted[:k]

if not predicted or not actual:
return 0.0

return sum([item in actual for item in set(predicted)]) / len(predicted)

def _apk(actual: list, predicted: list, k=10) -> float:
"""
Computes the average precision at k.
Parameters
----------
actual : list
A list of actual items to be predicted
predicted : list
An ordered list of predicted items
k : int, default = 10
Number of predictions to consider
Returns:
-------
score : float
The average precision at k.
"""

if len(predicted) > k:
predicted = predicted[:k]

if not predicted or not actual:
return 0

score = 0
true_positives = 0

for i, p in enumerate(predicted):
if p in actual and p not in predicted[:i]:
score += _pk(actual, predicted, i + 1)
true_positives += 1

if score == 0:
return 0

return score / true_positives


def mark(actual: List[list], predicted: List[list], k=10) -> float:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive by: this should return a float, not an int

"""
Computes the mean average recall at k.
Parameters
Expand All @@ -152,11 +214,29 @@ def mark(actual: List[list], predicted: List[list], k=10) -> int:
example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
Returns:
-------
mark: int
mark: float
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive by

The mean average recall at k (mar@k)
"""
return np.mean([_ark(a,p,k) for a,p in zip(actual, predicted)])

def mapk(actual: List[list], predicted: List[list], k=10) -> int:
"""
Computes the mean average precision at k.
Parameters
----------
actual : a list of lists
Actual items to be predicted
example: [['A', 'B', 'X'], ['A', 'B', 'Y']]
predicted : a list of lists
Ordered predictions
example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
Returns:
-------
mapk: int
The mean average precision at k (map@k)
"""
return np.mean([_apk(a,p,k) for a,p in zip(actual, predicted)])

def personalization(predicted: List[list]) -> float:
"""
Personalization measures recommendation similarity across users.
Expand Down
162 changes: 159 additions & 3 deletions tests/test_metrics.py
Expand Up @@ -80,7 +80,7 @@ def test_catalog_coverage(self):

def test_mark(self):
"""
Test mean absolute recall @ k (MAPK) function
Test mean absolute recall @ k (MARK) function
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive by

"""

# GIVEN test MAR@K metrics
Expand All @@ -95,8 +95,164 @@ def test_mark(self):
k=test_k
)

# THEN the mean absolute recall @ k should equal the expected value
self.assertEqual(mean_abs_recall_k, 0.25)

def test_mapk(self):
"""
Test mean absolute precision @ k (MAPK) function
"""

# GIVEN test MAP@K metrics
test_actual = [['A', 'B', 'X'], ['A', 'B', 'Y']]
test_predicted = [['X', 'Y', 'Z'], ['A', 'Z', 'B']]
test_k = 5

# WHEN metrics.mapk is run
mean_abs_precision_k = metrics.mapk(
actual=test_actual,
predicted=test_predicted,
k=test_k
)

# THEN the mean absolute precision @ k should the average
# precision over the two sets of predictions
self.assertEqual(mean_abs_precision_k, ((1) + ((1 + (2/3)) / 2)) / 2)

def test_pk(self):
"""
Test precision@k computation
"""

self.assertEqual(
metrics._pk(
actual = ['a', 'b', 'c'],
predicted = ['a', 'z', 'b'],
k = 3
),
2/3
)


def test_apk(self):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be great if someone could check the numbers in these tests to make sure the math is actually correct :)

"""
Test mean absolute precision @ k (APK) function
"""

## Predictions align with Stanford slides
## https://web.stanford.edu/class/cs276/handouts/EvaluationNew-handout-1-per.pdf
self.assertEqual(
metrics._apk(
['a', 'b', 'c', 'd', 'e', 'f'],
['a', 'x', 'b', 'c', 'd', 'e', 'q', 'y', 'z', 'f'],
10
),
0.7749999999999999
)


actual = ["A", "B", "X"]
predicted = ["X", "Y", "Z"]

self.assertEqual(
metrics._apk(actual, predicted, 1),
1
)
self.assertEqual(
metrics._apk(actual, predicted, 2),
1
)
self.assertEqual(
metrics._apk(actual, predicted, 3),
1
)

actual = ["A", "B", "X"]
predicted = ["foo", "B", "A"]

self.assertEqual(
metrics._apk(actual, predicted, 1),
0
)
self.assertEqual(
metrics._apk(actual, predicted, 2),
1/2
)
self.assertEqual(
metrics._apk(actual, predicted, 3),
((1/2) + (2/3)) / 2
)

## You shouldn't get extra credit for
## predicting the same thing twice
actual = ["A", "B", "X"]
predicted = ["A", "A", "Z"]

self.assertEqual(
metrics._apk(actual, predicted, 3),
1
)

## If k is less than the number of predictions
## made, we effectively "don't know" about
## the other predictions
actual = ["A", "B", "X"]
predicted = ["A", "B", "Z"]

self.assertEqual(
metrics._apk(actual, predicted, 3),
1
)

## High K values don't change things
actual = ["A", "B", "X"]
predicted = ["A", "A", "Z"]

self.assertEqual(
metrics._apk(actual, predicted, 3),
metrics._apk(actual, predicted, 1000)
)

## Returns None if no predictions exist
actual = ["A", "B", "X"]
predicted = []

self.assertEqual(
metrics._apk(actual, predicted, 1),
0
)

self.assertEqual(
metrics._apk(actual, predicted, 3),
0
)

self.assertEqual(
metrics._apk(actual, predicted, 100),
0
)

## Returns correctly for single prediction
actual = ["A", "B", "X"]

self.assertEqual(
metrics._apk(actual, ["B"], 1),
1
)

self.assertEqual(
metrics._apk(actual, ["B"], 3),
1
)

self.assertEqual(
metrics._apk(actual, ["Z"], 100),
0
)

self.assertEqual(
metrics._apk(actual, ["B", "B", "B"], 100),
1
)


def test_personalization(self):
"""
Expand Down