In [154]:
import pandas as pd
import numpy as np
from lightfm.data import Dataset
from lightfm import LightFM
from ast import literal_eval
import itertools
from lightfm.evaluation import auc_score

In [155]:
users = {'user_id': [0, 1, 2], 'name': ['Jana', 'Ilya', 'Mansur']}
users = pd.DataFrame(data=users)
users

Unnamed: 0,user_id,name
0,0,Jana
1,1,Ilya
2,2,Mansur


In [156]:
recipes = {'id': [0, 1, 2, 3, 4, 5, 6, 7, 8], 
          'name': ['Lamb Curry', 'Chicken Curry', 'Vegetable Curry', 
                   'Fried Rice', 'Vegetable Fried Rice', 'Lamb Fried Rice', 
                   'Pho Soup', 'Lamb Soup', 'Lens Soup'],
          'ingredients': 
               [['lamb', 'curry', 'oil', 'water', 'salt'], 
               ['chicken', 'curry', 'oil', 'water', 'salt'],
               ['curry', 'oil', 'water', 'salt'],           
           
               ['rice', 'chicken', 'eggs', 'beans', 'butter', 'salt'], 
               ['rice', 'eggs', 'butter', 'beans', 'salt'], 
               ['rice', 'lamb', 'eggs', 'butter', 'salt'],           
           
               ['noodle', 'beef', 'kardamon', 'oil', 'water', 'salt', 'pepper', 'chile', 'onions'], 
               ['lamb', 'potatoes', 'carrots', 'kardamon', 'oil', 'water', 'salt', 'pepper', 'onions'],
               ['lens', 'potatoes', 'carrots', 'oil', 'water', 'salt', 'pepper', 'onions']],
           'tags':
               ['non-veg', 'non-veg', 'veg', 'non-veg', 'veg', 'non-veg', 'non-veg', 'non-veg', 'veg']
          }
recipes = pd.DataFrame(data=recipes)
recipes

Unnamed: 0,id,name,ingredients,tags
0,0,Lamb Curry,"[lamb, curry, oil, water, salt]",non-veg
1,1,Chicken Curry,"[chicken, curry, oil, water, salt]",non-veg
2,2,Vegetable Curry,"[curry, oil, water, salt]",veg
3,3,Fried Rice,"[rice, chicken, eggs, beans, butter, salt]",non-veg
4,4,Vegetable Fried Rice,"[rice, eggs, butter, beans, salt]",veg
5,5,Lamb Fried Rice,"[rice, lamb, eggs, butter, salt]",non-veg
6,6,Pho Soup,"[noodle, beef, kardamon, oil, water, salt, pep...",non-veg
7,7,Lamb Soup,"[lamb, potatoes, carrots, kardamon, oil, water...",non-veg
8,8,Lens Soup,"[lens, potatoes, carrots, oil, water, salt, pe...",veg


In [158]:
#rating = {'user_id':  [0,0,0,0,0,0,0,0,0,  1,1,1,1,1,1,1,1,1,  2,2,2,2,2,2,2,2,2],
#          'recipe_id': [0,1,2,3,4,5,6,7,8,  0,1,2,3,4,5,6,7,8,  0,1,2,3,4,5,6,7,8],
#          'rating':   [1,1.7,2,0,1,np.nan,np.nan,4,5,  
#                       5,4.5,np.nan,3,0,2,1,1.5,np.nan,
#                       np.nan,3,2.7,5,np.nan,4,1.5,0,1]}

rating = {'user_id':  [0,0,0,0,0,0,0,0,0,  1,1,1,1,1,1,1,1,1,  2,2,2,2,2,2,2,2,2],
          'recipe_id': [0,1,2,3,4,5,6,7,8,  0,1,2,3,4,5,6,7,8,  0,1,2,3,4,5,6,7,8],
          'rating':   [1,1.7,2,0,1,0,4,4,5,  
                       5,4.5,3.7,3,0,2,1,1.5,0,
                       2,3,2.7,5,4,4,1.5,0,1]}

rating = pd.DataFrame(data=rating)
rating = rating.dropna()
rating.head(10)

Unnamed: 0,user_id,recipe_id,rating
0,0,0,1.0
1,0,1,1.7
2,0,2,2.0
3,0,3,0.0
4,0,4,1.0
5,0,5,0.0
6,0,6,4.0
7,0,7,4.0
8,0,8,5.0
9,1,0,5.0


###### Create Dataset

In [160]:
from lightfm.data import Dataset
from ast import literal_eval

unique_feature_names = set(itertools.chain.from_iterable(recipes['ingredients']))

dataset = Dataset()
dataset.fit(rating['user_id'].unique(), rating['recipe_id'].unique(), item_features=unique_feature_names)

num_users, num_items = dataset.interactions_shape()
print('Num users: {}, num_items: {}.'.format(num_users, num_items))

Num users: 3, num_items: 9.


###### Building unique feature names

In [141]:
#from ast import literal_eval

#unique_feature_names = set(itertools.chain.from_iterable(recipes['ingredients']))

##unique_tags = set(recipes['tags'])

##unique_feature_names.update(unique_tags)

#dataset.fit_partial(items=rating['recipe_id'].unique(), item_features=unique_feature_names)

#### Building the interactions matrix

In [163]:
(interactions, weights) = dataset.build_interactions(((r['user_id'], r['recipe_id']) 
                                                      for i,r in rating.iterrows()))
print(repr(interactions))

interactions.todense()

<3x9 sparse matrix of type '<class 'numpy.int32'>'
	with 27 stored elements in COOrdinate format>


matrix([[1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1]], dtype=int32)

In [164]:
weights.todense()

matrix([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.]], dtype=float32)

In [166]:
f_tuple = ((r['id'], r['ingredients']) for i,r in recipes.iterrows())
item_features = dataset.build_item_features(f_tuple)
print(repr(item_features))

item_features.todense()

<9x28 sparse matrix of type '<class 'numpy.float32'>'
	with 65 stored elements in Compressed Sparse Row format>


matrix([[0.16666667, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.16666667, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.16666667, 0.        , 0.        ,
         0.        , 0.16666667, 0.16666667, 0.        , 0.16666667,
         0.        , 0.        , 0.        ],
        [0.        , 0.16666667, 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.16666667, 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.16666667, 0.        , 0.        ,
         0.16666667, 0.        , 0.16666667, 0.        , 0.16666667,
         0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.2       , 0.        , 0.        ,
         0.        , 0.        , 0.        , 0.        , 0.        ,
         0.2       , 0.        , 0.        , 0.        , 0.        ,
         0.

In [167]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset.mapping()
item_feature_map

dataset.mapping()

({0: 0, 1: 1, 2: 2},
 {0: 0, 1: 1, 2: 2},
 {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8},
 {0: 0,
  1: 1,
  2: 2,
  3: 3,
  4: 4,
  5: 5,
  6: 6,
  7: 7,
  8: 8,
  'lens': 9,
  'oil': 10,
  'pepper': 11,
  'noodle': 12,
  'butter': 13,
  'carrots': 14,
  'potatoes': 15,
  'beef': 16,
  'salt': 17,
  'kardamon': 18,
  'onions': 19,
  'chicken': 20,
  'lamb': 21,
  'water': 22,
  'rice': 23,
  'curry': 24,
  'chile': 25,
  'beans': 26,
  'eggs': 27})

In [168]:
model = LightFM(loss='warp')
model.fit(interactions,
      item_features=item_features,
      sample_weight=weights,
      epochs=2)

train_auc = auc_score(model,
                      interactions,
                      item_features=item_features
                     ).mean()
print('Hybrid training set AUC: %s' % train_auc)

Hybrid training set AUC: nan


##### Find Similar Items

In [172]:
recipe_names = recipes['name'].values

def similar_items(item_id, item_features, model, N=4):
    (item_biased, item_representations) = model.get_item_representations(features=item_features)
    # Cosine similarity
    scores = item_representations.dot(item_representations[item_id])
    #for s in scores:
    #    print(s)
    item_norms = np.linalg.norm(item_representations, axis=1)
    scores /= item_norms
    best = np.argpartition(scores, -N)[-N:]
    
    top_similar_items = recipe_names[np.argsort(-scores)]
    print("Recommended item: %s" % top_similar_items[1:N])
    
    return sorted(zip(best, scores[best] / item_norms[item_id]),
                  key=lambda x: -x[1])

result = similar_items(0, item_features, model) # Lamb Curry

Recommended item: ['Chicken Curry' 'Vegetable Curry' 'Lamb Fried Rice']


In [178]:
result = similar_items(5, item_features, model) # Lamb Fried Rice

Recommended item: ['Fried Rice' 'Lamb Curry' 'Vegetable Fried Rice']


In [176]:
result = similar_items(8, item_features, model) # Lens Soup

Recommended item: ['Lamb Soup' 'Pho Soup' 'Vegetable Curry']


In [153]:
recipes

Unnamed: 0,id,name,ingredients,tags
0,0,Lamb Curry,"[lamb, curry, oil, water, salt]",non-veg
1,1,Chicken Curry,"[chicken, curry, oil, water, salt]",non-veg
2,2,Vegetable Curry,"[curry, oil, water, salt]",veg
3,3,Fried Rice,"[rice, chicken, eggs, beans, butter, salt]",non-veg
4,4,Vegetable Fried Rice,"[rice, eggs, butter, beans, salt]",veg
5,5,Lamb Fried Rice,"[rice, lamb, eggs, butter, salt]",non-veg
6,6,Pho Soup,"[noodle, beef, kardamon, oil, water, salt, pep...",non-veg
7,7,Lamb Soup,"[lamb, potatoes, carrots, kardamon, oil, water...",non-veg
8,8,Lens Soup,"[lens, potatoes, carrots, oil, water, salt, pe...",veg


In [184]:
recipe_names = recipes['name'].values

def sample_recommendation(model, data, user_id):
    n_user, n_items = data.shape
    scores = model.predict(user_id, np.arange(n_items))
    #print("Scores:")
    #for i in scores:
    #    print(i)
    top_items_for_user = recipe_names[np.argsort(-scores)]
    print("Recommended item: %s" % top_items_for_user[:3])
    
sample_recommendation(model, interactions, 0) # Jana

Recommended item: ['Vegetable Fried Rice' 'Lens Soup' 'Vegetable Curry']


In [185]:
sample_recommendation(model, interactions, 1) # Ilya

Recommended item: ['Vegetable Curry' 'Lamb Curry' 'Pho Soup']


In [186]:
sample_recommendation(model, interactions, 2) # Mansur

Recommended item: ['Lamb Curry' 'Chicken Curry' 'Pho Soup']
