Skip to content

Commit

Permalink
Merge pull request #70 from orygens/master
Browse files Browse the repository at this point in the history
Bug Fixes.
  • Loading branch information
brunojm committed Oct 12, 2011
2 parents 0c0482a + 15f4056 commit e1218a4
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 100 deletions.
10 changes: 5 additions & 5 deletions scikits/crab/models/classes.py
Expand Up @@ -366,12 +366,12 @@ def __unicode__(self):
lines = matrix.split('\n')
headers = [repr(self)[1:-1]]
if self._item_ids.size:
col_headers = [('%-8s' % item[:8]) for item in self._item_ids[:5]]
col_headers = [('%-8s' % unicode(item)[:8]) for item in self._item_ids[:5]]
headers.append(' ' + (' '.join(col_headers)))

if self._user_ids.size:
for (i, line) in enumerate(lines):
lines[i] = ('%-8s' % self._user_ids[i][:8]) + line
lines[i] = ('%-8s' % unicode(self._user_ids[i])[:8]) + line
for (i, line) in enumerate(headers):
if i > 0:
headers[i] = ' ' * 8 + line
Expand Down Expand Up @@ -640,7 +640,7 @@ def preference_value(self, user_id, item_id):
if not item_id_loc[0].size:
raise ItemNotFoundError('item_id in the model not found')

return 1.0 if self.index[user_id_loc, item_id_loc].flatten()[0] else 0.0
return 1.0 if self.index[user_id_loc, item_id_loc].flatten()[0] else np.NaN

def set_preference(self, user_id, item_id, value=None):
'''
Expand Down Expand Up @@ -737,12 +737,12 @@ def __unicode__(self):
lines = matrix.split('\n')
headers = [repr(self)[1:-1]]
if self._item_ids.size:
col_headers = [('%-8s' % item[:8]) for item in self._item_ids[:5]]
col_headers = [('%-8s' % unicode(item)[:8]) for item in self._item_ids[:5]]
headers.append(' ' + (' '.join(col_headers)))

if self._user_ids.size:
for (i, line) in enumerate(lines):
lines[i] = ('%-8s' % self._user_ids[i][:8]) + line
lines[i] = ('%-8s' % unicode(self._user_ids[i])[:8]) + line
for (i, line) in enumerate(headers):
if i > 0:
headers[i] = ' ' * 8 + line
Expand Down
46 changes: 44 additions & 2 deletions scikits/crab/models/tests/test_models.py
Expand Up @@ -5,6 +5,8 @@
from ..classes import MatrixPreferenceDataModel, \
MatrixBooleanPrefDataModel
from ..utils import UserNotFoundError, ItemNotFoundError
from ...datasets import load_sample_songs


#Simple Movies DataSet
movies = {'Marcel Caraciolo': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
Expand Down Expand Up @@ -83,6 +85,24 @@ def test_basic_methods_MatrixPreferenceDataModel():

assert("MatrixPreferenceDataModel (8 by 6)" in model.__str__())

#SampleSongs DataSet
songs = load_sample_songs()
model = MatrixPreferenceDataModel(songs.data)
assert_equals(model.dataset, songs.data)
assert_array_equal(np.array([1, 2, 3, 4, 5, 6, 7, 8]),
model.user_ids())
assert_array_equal(np.array([1, 2, 3, 4, 5, 6, 7, 8]), model.item_ids())
assert_equals(True, model.has_preference_values())
assert_equals(8, model.users_count())
assert_equals(8, model.items_count())
assert_equals(5.0, model.maximum_preference_value())
assert_equals(1.0, model.minimum_preference_value())
assert_equals([(1, 2.5), (2, 3.5), (3, 5.0), (4, 2.0), (5, 4.5), (6, 1.5), (7, 2.0)], model[1])
elements = [pref for pref in model]
assert_array_equal([(1, 2.5), (2, 3.5), (3, 5.0), (4, 2.0), (5, 4.5), (6, 1.5), (7, 2.0)],
elements[0][1])
assert("MatrixPreferenceDataModel (8 by 8)" in model.__str__())


def test_preferences_from_user_exists_MatrixPreferenceDataModel():
model = MatrixPreferenceDataModel(movies)
Expand Down Expand Up @@ -191,6 +211,11 @@ def test_remove_preference_value_MatrixPreferenceDataModel():
'Maria Gabriela': []
}

songs_boolean = {1: [1, 2, 3, 4, 5, 6, 7], 2: [1, 2, 3, 5, 6],
3: [1, 2, 3, 4, 5, 6], 4: [1, 3, 4, 5, 6, 7, 8],
5: [1, 2, 3, 4, 6, 7, 8], 6: [2, 3, 4, 6, 7, 8],
7: [2, 3, 4, 5, 6, 8], 8: [8, 1, 4, 5, 7]}


def test_basic_methods_MatrixBooleanPrefDataModel():
#Empty Dataset
Expand Down Expand Up @@ -222,6 +247,23 @@ def test_basic_methods_MatrixBooleanPrefDataModel():
'Superman Returns', 'The Night Listener'], elements[0][1])
assert("MatrixBooleanPrefDataModel (8 by 6)" in model.__str__())

songs = load_sample_songs()
model = MatrixBooleanPrefDataModel(songs.data)
assert_equals(model.dataset, songs_boolean)
assert_array_equal(np.array([1, 2, 3, 4, 5, 6, 7, 8]),
model.user_ids())
assert_array_equal(np.array([1, 2, 3, 4, 5, 6, 7, 8]), model.item_ids())
assert_equals(False, model.has_preference_values())
assert_equals(8, model.users_count())
assert_equals(8, model.items_count())
assert_equals(1.0, model.maximum_preference_value())
assert_equals(0.0, model.minimum_preference_value())
assert_array_equal([1, 2, 3, 4, 5, 6, 7], model[1])
elements = [pref for pref in model]
assert_array_equal([1, 2, 3, 4, 5, 6, 7],
elements[0][1])
assert("MatrixBooleanPrefDataModel (8 by 8)" in model.__str__())


def test_preferences_from_user_exists_MatrixBooleanPrefDataModel():
model = MatrixBooleanPrefDataModel(movies_boolean)
Expand Down Expand Up @@ -287,7 +329,7 @@ def test_preference_value__invalid_MatrixBooleanPrefDataModel():
model = MatrixBooleanPrefDataModel(movies_boolean)
assert_raises(UserNotFoundError, model.preference_value, 'Flavia', 'Superman Returns')
assert_raises(ItemNotFoundError, model.preference_value, 'Marcel Caraciolo', 'Back to the future')
assert_array_equal(0.0, model.preference_value('Maria Gabriela', 'The Night Listener'))
assert_array_equal(np.NaN, model.preference_value('Maria Gabriela', 'The Night Listener'))


def test_set_preference_value_MatrixBooleanPrefDataModel():
Expand All @@ -307,5 +349,5 @@ def test_set_preference_value_MatrixBooleanPrefDataModel():
def test_remove_preference_value_MatrixBooleanPrefDataModel():
model = MatrixBooleanPrefDataModel(movies_boolean)
model.remove_preference('Maria Gabriela', 'Superman Returns')
assert_array_equal(0.0, model.preference_value('Maria Gabriela', 'Superman Returns'))
assert_array_equal(np.NaN, model.preference_value('Maria Gabriela', 'Superman Returns'))
assert_raises(ItemNotFoundError, model.remove_preference, 'Marcel Caraciolo', 'Indiana Jones')
60 changes: 30 additions & 30 deletions scikits/crab/recommenders/knn/classes.py
Expand Up @@ -95,12 +95,10 @@ class ItemBasedRecommender(ItemRecommender):
>>> recsys = ItemBasedRecommender(model, similarity, items_strategy)
>>> #Return the recommendations for the given user.
>>> recsys.recommend('Leopoldo Pires')
array(['Just My Luck', 'You, Me and Dupree'],\
dtype='|S18')
['Just My Luck', 'You, Me and Dupree']
>>> #Return the 2 explanations for the given recommendation.
>>> recsys.recommended_because('Leopoldo Pires', 'Just My Luck',2)
array(['The Night Listener', 'Superman Returns'],\
dtype='|S18')
['The Night Listener', 'Superman Returns']
Notes
-----------
Expand Down Expand Up @@ -163,12 +161,16 @@ def estimate_preference(self, user_id, item_id, **params):
item. If a preference cannot be estimated, returns None.
'''
preference = self.model.preference_value(user_id, item_id)

if not np.isnan(preference):
return preference

#TODO: It needs optimization
prefs = self.model.preferences_from_user(user_id)

if not self.model.has_preference_values():
prefs = [(pref, 1.0) for pref in prefs]

similarities = \
np.array([self.similarity.get_similarity(item_id, to_item_id) \
for to_item_id, pref in prefs if to_item_id != item_id]).flatten()
Expand Down Expand Up @@ -241,21 +243,21 @@ def _top_matches(self, source_id, target_ids, how_many=None, **params):

preferences = estimate_preferences(source_id, target_ids)

preferences = preferences[~np.isnan(preferences)]
preference_values = preferences[~np.isnan(preferences)]
target_ids = target_ids[~np.isnan(preferences)]

sorted_preferences = np.lexsort((preferences,))[::-1]
sorted_preferences = np.lexsort((preference_values,))[::-1]

sorted_preferences = sorted_preferences[0:how_many] \
if how_many and sorted_preferences.size > how_many \
else sorted_preferences

if self.with_preference:
top_n_recs = np.array([(target_ids[ind], \
preferences[ind]) for ind in sorted_preferences])
top_n_recs = [(target_ids[ind], \
preferences[ind]) for ind in sorted_preferences]
else:
top_n_recs = np.array([target_ids[ind]
for ind in sorted_preferences])
top_n_recs = [target_ids[ind]
for ind in sorted_preferences]

return top_n_recs

Expand All @@ -280,7 +282,7 @@ def most_similar_items(self, item_id, how_many=None):
self.similarity.num_best = old_how_many

return np.array([item for item, pref in similarities \
if item != item_id])
if item != item_id and not np.isnan(pref)])

def recommended_because(self, user_id, item_id, how_many=None, **params):
'''
Expand Down Expand Up @@ -332,11 +334,11 @@ def recommended_because(self, user_id, item_id, how_many=None, **params):
else sorted_preferences

if self.with_preference:
top_n_recs = np.array([(item_ids[ind], \
prefs[ind]) for ind in sorted_preferences])
top_n_recs = [(item_ids[ind], \
prefs[ind]) for ind in sorted_preferences]
else:
top_n_recs = np.array([item_ids[ind]
for ind in sorted_preferences])
top_n_recs = [item_ids[ind]
for ind in sorted_preferences]

return top_n_recs

Expand Down Expand Up @@ -421,12 +423,10 @@ class UserBasedRecommender(UserRecommender):
>>> recsys = UserBasedRecommender(model, similarity, nhood_strategy)
>>> #Return the recommendations for the given user.
>>> recsys.recommend('Leopoldo Pires')
array(['Just My Luck', 'You, Me and Dupree'],\
dtype='|S18')
['Just My Luck', 'You, Me and Dupree']
>>> #Return the 2 explanations for the given recommendation.
>>> recsys.recommended_because('Leopoldo Pires', 'Just My Luck',2)
array(['Lorena Abreu', 'Marcel Caraciolo'],\
dtype='|S16')
['Lorena Abreu', 'Marcel Caraciolo']
Notes
-----------
Expand Down Expand Up @@ -581,7 +581,7 @@ def most_similar_users(self, user_id, how_many=None):
similarities = self.similarity[user_id]
self.similarity.num_best = old_how_many
return np.array([to_user_id for to_user_id, pref in similarities \
if user_id != to_user_id])
if user_id != to_user_id and not np.isnan(pref)])

def recommend(self, user_id, how_many=None, **params):
'''
Expand Down Expand Up @@ -631,21 +631,21 @@ def _top_matches(self, source_id, target_ids, how_many=None, **params):

preferences = estimate_preferences(source_id, target_ids)

preferences = preferences[~np.isnan(preferences)]
preference_values = preferences[~np.isnan(preferences)]
target_ids = target_ids[~np.isnan(preferences)]

sorted_preferences = np.lexsort((preferences,))[::-1]
sorted_preferences = np.lexsort((preference_values,))[::-1]

sorted_preferences = sorted_preferences[0:how_many] \
if how_many and sorted_preferences.size > how_many \
else sorted_preferences

if self.with_preference:
top_n_recs = np.array([(target_ids[ind], \
preferences[ind]) for ind in sorted_preferences])
top_n_recs = [(target_ids[ind], \
preferences[ind]) for ind in sorted_preferences]
else:
top_n_recs = np.array([target_ids[ind]
for ind in sorted_preferences])
top_n_recs = [target_ids[ind]
for ind in sorted_preferences]

return top_n_recs

Expand Down Expand Up @@ -699,10 +699,10 @@ def recommended_because(self, user_id, item_id, how_many=None, **params):
else sorted_preferences

if self.with_preference:
top_n_recs = np.array([(user_ids[ind], \
prefs[ind]) for ind in sorted_preferences])
top_n_recs = [(user_ids[ind], \
prefs[ind]) for ind in sorted_preferences]
else:
top_n_recs = np.array([user_ids[ind]
for ind in sorted_preferences])
top_n_recs = [user_ids[ind]
for ind in sorted_preferences]

return top_n_recs

0 comments on commit e1218a4

Please sign in to comment.