# Importing necessary libraries

In [45]:
import pandas as pd
import numpy as np
from scipy import sparse
from lightfm import LightFM
from lightfm.evaluation import auc_score
from lightfm.data import Dataset
import itertools

# Creating dummy datasets 

In [46]:
# utility functions
def get_interactions_format(data, user, item):
        return [
            (
                row[user],
                row[item]
            )
            for idx, row in data.iterrows()
        ]

def serialize_list(in_list):
        return list(itertools.chain.from_iterable(in_list))

def prepare_features_format(data, id, feature_columns):
        for row in data.iterrows():
            yield (row[1][id], [str(row[1][feature]) for feature in feature_columns])

def get_uniques_by_columns(dataframe, columns):
    uniques = list()
    dataframe = dataframe.applymap(str)
    for col in columns:
        uniques.append(dataframe[col].unique())
    return serialize_list(uniques)

def flagFeatures(data, columns):
    for c in columns:
        data[c] = [c+':'+x for x in data[c].values]
    return data

def format_newuser_input(user_feature_map, user_feature_list):
    num_features = len(user_feature_list)
    normalised_val = 1.0 
    target_indices = []
    for feature in user_feature_list:
        try:
            target_indices.append(user_feature_map[feature])
        except KeyError:
            print("new user feature encountered '{}'".format(feature))
            pass
    #print("target indices: {}".format(target_indices))
    new_user_features = np.zeros(len(user_feature_map.keys()))
    for i in target_indices:
        new_user_features[i] = normalised_val
    new_user_features = sparse.csr_matrix(new_user_features)
    return(new_user_features)

def getrecommendations(model, user, interactions, item_map, user_map, user_features):
    
    n_users, n_items = interactions.shape
    pred = model.predict(user_map[user], np.arange(n_items), user_features=user_features)
    dd = { 'user': user,
        'score' : pred,
        'item': item_map.values(),
        'item_index': item_map.keys(),
        'purchased': np.array(interactions.todense()[user_map[user], :]).flatten()
    }
    dd = pd.DataFrame(dd)
    dd = dd.sort_values(by='score', ascending=False)
    return dd

In [47]:
# create dummy dataset
interaction = {'user': ['u1','u1','u2','u2','u3','u3','u3'], 
        'item': ['i1','i3','i2','i3','i1','i4','i2'], 
        'r': [1,2,1,3,4,5,2]
       }
df = pd.DataFrame(interaction,  columns = ['user', 'item', 'r'])

print(':::::::: user / item :::::::::')
print(df)

# dummy users features
ux = {'user': ['u1', 'u2' , 'u3'], 
        'f1': ['1', '0', '1'], 
        'f2': ['1', '1', '1'],
        'f3': ['0', '0', '1'],
        'loc': ['del', 'mum', 'del']
       }
ux_features = pd.DataFrame(ux,  columns = ['user', 'f1', 'f2', 'f3', 'loc'])
print(':::::::: user features :::::::::')
print(ux_features)


:::::::: user / item :::::::::
  user item  r
0   u1   i1  1
1   u1   i3  2
2   u2   i2  1
3   u2   i3  3
4   u3   i1  4
5   u3   i4  5
6   u3   i2  2
:::::::: user features :::::::::
  user f1 f2 f3  loc
0   u1  1  1  0  del
1   u2  0  1  0  mum
2   u3  1  1  1  del


In [48]:
ux = flagFeatures(ux_features, ['f1', 'f2', 'f3', 'loc'])
unique_user_features = get_uniques_by_columns(ux, ['f1', 'f2', 'f3', 'loc'])

ii = get_interactions_format(df, 'user', 'item')
uu = prepare_features_format(ux_features, 'user', ['f1', 'f2', 'f3', 'loc'])

unique_users = get_uniques_by_columns(ux_features, ['user'])
unique_items = get_uniques_by_columns(df, ['item'])

user_map = dict(zip(unique_users, range(0, len(unique_users))))
item_map = dict(zip(unique_items, range(0, len(unique_items))))

In [49]:
# create datasets fro ligthFM
dataset = Dataset()
dataset.fit(
    users=unique_users,
    items=unique_items, 
    user_features = unique_user_features
)

interactions, weights = dataset.build_interactions(ii)
user_features = dataset.build_user_features(uu, normalize=False)

In [50]:
# explore matrices

print(':::::::: interactions :::::::::')
print(interactions.todense())

print(":::::: user features :::::")
print(user_features.todense())

:::::::: interactions :::::::::
[[1 1 0 0]
 [0 1 1 0]
 [1 0 1 1]]
:::::: user features :::::
[[1. 0. 0. 1. 0. 1. 1. 0. 1. 0.]
 [0. 1. 0. 0. 1. 1. 1. 0. 0. 1.]
 [0. 0. 1. 1. 0. 1. 0. 1. 1. 0.]]


In [51]:
model = LightFM(loss='warp')
model.fit(interactions, 
      user_features=user_features, # we have built the sparse matrix above
      # sample_weight=weights, # spase matrix representing how much value to give to user u and item i interaction: i.e ratings
      epochs=20)

<lightfm.lightfm.LightFM at 0x7faec9cd2310>

In [52]:
train_auc = auc_score(model,
                    interactions,
                    user_features=user_features
                    ).mean()
print('Hybrid training set AUC: %s' % train_auc)

Hybrid training set AUC: 0.8055556


In [53]:
# predict for existing user
user_x = 1
n_users, n_items = interactions.shape # no of users * no of items
pp = model.predict(user_x, np.arange(n_items), user_features=user_features) # means predict for all

In [54]:
r = getrecommendations(model, 'u1', interactions, item_map, user_map, user_features)
print(r.loc[r.purchased == 0])

r = getrecommendations(model, 'u2', interactions, item_map, user_map, user_features)
print(r.loc[r.purchased == 0])

r = getrecommendations(model, 'u3', interactions, item_map, user_map, user_features)
print(r.loc[r.purchased == 0])

  user     score  item item_index  purchased
2   u1 -2.374612     2         i2        0.0
3   u1 -3.292638     3         i4        0.0
  user     score  item item_index  purchased
0   u2 -2.323379     0         i1        0.0
3   u2 -3.280339     3         i4        0.0
  user     score  item item_index  purchased
1   u3 -2.217167     1         i3        0.0


In [55]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset.mapping()
user_feature_map

{'u1': 0,
 'u2': 1,
 'u3': 2,
 'f1:1': 3,
 'f1:0': 4,
 'f2:1': 5,
 'f3:0': 6,
 'f3:1': 7,
 'loc:del': 8,
 'loc:mum': 9}

In [60]:
user_feature_list = ['f1:1', 'f2:1', 'f3:1', 'loc:del']
new_user_features = format_newuser_input(user_feature_map, user_feature_list)
# here 0 means pick the first row of the user_features sparse matrix
model.predict(0, np.arange(n_items), user_features=new_user_features) 

array([-1.7631356, -1.8461933, -1.8855019, -2.421931 ], dtype=float32)

In [59]:
print(ux_features)
print(user_feature_list)

  user    f1    f2    f3      loc
0   u1  f1:1  f2:1  f3:0  loc:del
1   u2  f1:0  f2:1  f3:0  loc:mum
2   u3  f1:1  f2:1  f3:1  loc:del
['f1:1', 'f2:1', 'f3:1', 'loc:del']
