Permalink
Browse files

Added recommend to ItemBasedRecommender.

  • Loading branch information...
1 parent 066620b commit c8ad2034b52663d323f13da43fc2698733994e06 @marcelcaraciolo marcelcaraciolo committed Aug 10, 2011
@@ -22,10 +22,15 @@ class BaseRecommender(BaseEstimator):
model: DataModel
Defines the data model where data is fetched.
+ with_preference: bool
+ Defines if the recommendations come along with the
+ estimated preferences. (default= False)
+
"""
- def __init__(self, model):
+ def __init__(self, model, with_preference=False):
self.model = model
+ self.with_preference = with_preference
def recommend(self, user_id, how_many, **params):
'''
@@ -262,7 +262,7 @@ def preference_value(self, user_id, item_id):
if item_id not in self.dataset_T:
raise ItemNotFoundError
- return preferences.get(item_id, np.inf)
+ return preferences.get(item_id, np.nan)
def users_count(self):
'''
@@ -128,7 +128,7 @@ def test_preference_value__invalid_DictPreferenceDataModel():
model = DictPreferenceDataModel(movies)
assert_raises(UserNotFoundError, model.preference_value, 'Flavia', 'Superman Returns')
assert_raises(ItemNotFoundError, model.preference_value, 'Marcel Caraciolo', 'Back to the future')
- assert_equals(np.inf, model.preference_value('Maria Gabriela', 'The Night Listener'))
+ assert_array_equal(np.nan, model.preference_value('Maria Gabriela', 'The Night Listener'))
def test_set_preference_value_DictPreferenceDataModel():
@@ -148,7 +148,7 @@ def test_set_preference_value_DictPreferenceDataModel():
def test_remove_preference_value__DictPreferenceDataModel():
model = DictPreferenceDataModel(movies)
model.remove_preference('Maria Gabriela', 'Superman Returns')
- assert_equals(np.inf, model.preference_value('Maria Gabriela', 'Superman Returns'))
+ assert_array_equal(np.nan, model.preference_value('Maria Gabriela', 'Superman Returns'))
assert_raises(ItemNotFoundError, model.remove_preference, 'Marcel Caraciolo', 'Indiana Jones')
movies = {'Marcel Caraciolo': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
@@ -51,6 +51,8 @@ class ItemBasedRecommender(ItemRecommender):
`capper`: bool (default=True)
Cap the preferences with maximum and minimum preferences
in the model.
+ `with_preference`: bool (default=False)
+ Return the recommendations with the estimated preferences if True.
Examples
-----------
@@ -64,16 +66,16 @@ class ItemBasedRecommender(ItemRecommender):
"""
def __init__(self, model, similarity, items_selection_strategy=None,
- capper=True):
- ItemRecommender.__init__(self, model)
+ capper=True, with_preference=False):
+ ItemRecommender.__init__(self, model, with_preference)
self.similarity = similarity
self.capper = capper
if items_selection_strategy is None:
self.items_selection_strategy = ItemsNeighborhoodStrategy()
else:
self.items_selection_strategy = items_selection_strategy
- def recommend(self, user_id, how_many, **params):
+ def recommend(self, user_id, how_many=None, **params):
'''
Return a list of recommended items, ordered from most strongly
recommend to least.
@@ -83,23 +85,21 @@ def recommend(self, user_id, how_many, **params):
user_id: int or string
User for which recommendations are to be computed.
how_many: int
- Desired number of recommendations
+ Desired number of recommendations (default=None ALL)
rescorer: function, optional
Rescoring function to apply before final list of
recommendations.
'''
-
self._set_params(**params)
candidate_items = self.all_other_items(user_id)
- recommendable_items = None
+ recommendable_items = self._top_matches(user_id, \
+ candidate_items, how_many)
return recommendable_items
- return preferences
-
def estimate_preference(self, user_id, item_id, **params):
'''
Returns
@@ -114,6 +114,7 @@ def estimate_preference(self, user_id, item_id, **params):
#TODO: It needs optimization
prefs = self.model.preferences_from_user(user_id)
+
similarities = \
np.array([self.similarity.get_similarity(item_id, to_item_id) \
for to_item_id, pref in prefs if to_item_id != item_id]).flatten()
@@ -171,19 +172,29 @@ def _top_matches(self, source_id, target_ids, how_many=None, **params):
Return the top N matches
It can be user_ids or item_ids.
'''
- estimate_preferences = np.vectorize(self._estimate_score_for_item)
+ #Empty target_ids
+ if target_ids.size == 0:
+ return np.array([])
+
+ estimate_preferences = np.vectorize(self.estimate_preference)
+
preferences = estimate_preferences(source_id, target_ids)
preferences = preferences[~np.isnan(preferences)]
target_ids = target_ids[~np.isnan(preferences)]
- sorted_preferences = np.lexsort((preferences,)).ravel(order='C')
- top_n_recs = [target_ids[ind] for ind in sorted_preferences]
+ sorted_preferences = np.lexsort((preferences,))[::-1]
+
+ sorted_preferences = sorted_preferences[0:how_many] \
+ if how_many and sorted_preferences.size > how_many else sorted_preferences
+
+ if self.with_preference:
+ top_n_recs = np.array([(target_ids[ind], \
+ preferences[ind]) for ind in sorted_preferences])
+ else:
+ top_n_recs = np.array([target_ids[ind] for ind in sorted_preferences])
- return top_n_recs[0:how_many] \
- if top_n_recs and how_many and \
- top_n_recs.size > how_many else top_n_recs \
- if top_n_recs else np.array([])
+ return top_n_recs
def most_similar_items(self, item_id, how_many=None):
'''
@@ -200,7 +211,8 @@ def most_similar_items(self, item_id, how_many=None):
'''
old_how_many = self.similarity.num_best
#+1 since it returns the identity.
- self.similarity.num_best = how_many + 1 if how_many is not None else None
+ self.similarity.num_best = how_many + 1 \
+ if how_many is not None else None
similarities = self.similarity[item_id]
self.similarity.num_best = old_how_many
@@ -61,10 +61,20 @@ def test_estimate_preference_ItemBasedRecommender():
assert_almost_equals(3.14717875510, recsys.estimate_preference('Leopoldo Pires', 'You, Me and Dupree'))
#With capper = False
recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy, False)
- #assert_almost_equals(3.14717875510, recsys.estimate_preference('Leopoldo Pires', 'You, Me and Dupree'))
+ assert_almost_equals(3.14717875510, recsys.estimate_preference('Leopoldo Pires', 'You, Me and Dupree'))
#Non-Preferences
- #assert_array_equal(np.nan, recsys.estimate_preference('Maria Gabriela', 'You, Me and Dupree'))
+ assert_array_equal(np.nan, recsys.estimate_preference('Maria Gabriela', 'You, Me and Dupree'))
+ items_strategy = ItemsNeighborhoodStrategy()
+ similarity = ItemSimilarity(dict_model, euclidean_distances)
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy)
+ assert_almost_equals(3.5, recsys.estimate_preference('Marcel Caraciolo', 'Superman Returns'))
+ assert_almost_equals(3.14717875510, recsys.estimate_preference('Leopoldo Pires', 'You, Me and Dupree'))
+ #With capper = False
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy, False)
+ assert_almost_equals(3.14717875510, recsys.estimate_preference('Leopoldo Pires', 'You, Me and Dupree'))
+ #Non-Preferences
+ assert_array_equal(np.nan, recsys.estimate_preference('Maria Gabriela', 'You, Me and Dupree'))
def test_most_similar_items_ItemBasedRecommender():
items_strategy = ItemsNeighborhoodStrategy()
@@ -89,66 +99,55 @@ def test_most_similar_items_ItemBasedRecommender():
recsys.most_similar_items('Just My Luck', 0))
+def test_recommend_ItemBasedRecommender():
+ items_strategy = ItemsNeighborhoodStrategy()
+ similarity = ItemSimilarity(matrix_model, euclidean_distances)
+ #Empty Recommendation
+ recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy)
+ assert_array_equal(np.array([]), recsys.recommend('Marcel Caraciolo'))
-'''
-
-
- def test_local_estimatePreference(self):
- userID = 'Marcel Caraciolo'
- itemID = 'Superman Returns'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,True)
- self.assertAlmostEquals(3.5,recSys.estimatePreference(userID=userID,similarity=self.similarity,itemID=itemID))
-
-
- def test_local_not_existing_estimatePreference(self):
- userID = 'Leopoldo Pires'
- itemID = 'You, Me and Dupree'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,True)
- self.assertAlmostEquals(3.14717875510,recSys.estimatePreference(userID=userID,similarity=self.similarity,itemID=itemID))
-
-
- def test_local_not_existing_capper_False_estimatePreference(self):
- userID = 'Leopoldo Pires'
- itemID = 'You, Me and Dupree'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertAlmostEquals(3.14717875510,recSys.estimatePreference(userID=userID,similarity=self.similarity,itemID=itemID))
-
-
- def test_local_not_existing_rescorer_estimatePreference(self):
- userID = 'Leopoldo Pires'
- itemID = 'You, Me and Dupree'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- scorer = TanHScorer()
- self.assertAlmostEquals(3.1471787551,recSys.estimatePreference(userID=userID,similarity=self.similarity,itemID=itemID,rescorer=scorer))
-
+ #Semi Recommendation
+ recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy)
+ assert_array_equal(np.array(['Just My Luck', 'You, Me and Dupree']), \
+ recsys.recommend('Leopoldo Pires'))
- def test_empty_recommend(self):
- userID = 'Marcel Caraciolo'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertEquals([],recSys.recommend(userID,4))
+ #Semi Recommendation
+ recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy)
+ assert_array_equal(np.array(['Just My Luck']), \
+ recsys.recommend('Leopoldo Pires', 1))
+ #Empty Recommendation
+ recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy)
+ assert_array_equal(np.array([]), recsys.recommend('Maria Gabriela'))
- def test_recommend(self):
- userID = 'Leopoldo Pires'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertEquals(['Just My Luck', 'You, Me and Dupree'],recSys.recommend(userID,4))
+ #with_preference
+ #recsys = ItemBasedRecommender(matrix_model, similarity, items_strategy, True, True)
+ #assert_array_equal(np.array([('Just My Luck', 3.20597319063), \
+ # ('You, Me and Dupree', 3.14717875510)]), \
+ # recsys.recommend('Leopoldo Pires'))
+ similarity = ItemSimilarity(dict_model, euclidean_distances)
+ #Empty Recommendation
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy)
+ assert_array_equal(np.array([]), recsys.recommend('Marcel Caraciolo'))
- def test_full_recommend(self):
- userID = 'Maria Gabriela'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertEquals([],recSys.recommend(userID,4))
+ #Semi Recommendation
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy)
+ assert_array_equal(np.array(['Just My Luck', 'You, Me and Dupree']), \
+ recsys.recommend('Leopoldo Pires'))
+ #Semi Recommendation
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy)
+ assert_array_equal(np.array(['Just My Luck']), \
+ recsys.recommend('Leopoldo Pires', 1))
- def test_semi_recommend(self):
- userID = 'Leopoldo Pires'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertEquals(['Just My Luck'],recSys.recommend(userID,1))
+ #Empty Recommendation
+ recsys = ItemBasedRecommender(dict_model, similarity, items_strategy)
+ assert_array_equal(np.array([]), recsys.recommend('Maria Gabriela'))
- def test_recommendedBecause(self):
- userID = 'Leopoldo Pires'
- itemID = 'Just My Luck'
- recSys = ItemRecommender(self.model,self.similarity,self.strategy,False)
- self.assertEquals(['The Night Listener', 'Superman Returns'],recSys.recommendedBecause(userID,itemID,2))
-'''
+ #with_preference
+ #recsys = ItemBasedRecommender(dict_model, similarity, items_strategy, True, True)
+ #assert_array_equal(np.array([('Just My Luck', 3.20597319063), \
+ # ('You, Me and Dupree', 3.14717875510)]), \
+ # recsys.recommend('Leopoldo Pires'))

0 comments on commit c8ad203

Please sign in to comment.