# Importing necessary libraries

In [2]:
import pandas as pd
import numpy as np
from lightfm import LightFM



# Creating dummy datasets 

In [3]:
# create dummy dataset
data = {'user': ['u1','u1','u2','u2', 'u3', 'u3', 'u3'], 
        'item': ['i1', 'i3', 'i2', 'i3', 'i1', 'i4', 'i2'], 
        'r': [.1,.2,.1,.3,.4,.5,.2]
       }
df = pd.DataFrame(data,  columns = ['user', 'item', 'r'])
df

Unnamed: 0,user,item,r
0,u1,i1,0.1
1,u1,i3,0.2
2,u2,i2,0.1
3,u2,i3,0.3
4,u3,i1,0.4
5,u3,i4,0.5
6,u3,i2,0.2


In [4]:
#dummy item features
data = {'user': ['u1','u2','u3'], 
        'f1': [1, 0, 1], 
        'f2': [1, 1, 1],
        'f3': [0, 0, 1],
        'loc': ['del', 'mum', 'del']
       }
features = pd.DataFrame(data,  columns = ['user', 'f1', 'f2', 'f3', 'loc'])
features

Unnamed: 0,user,f1,f2,f3,loc
0,u1,1,1,0,del
1,u2,0,1,0,mum
2,u3,1,1,1,del


# Creating user features

In [5]:
uf = []
col = ['f1']*len(features.f1.unique()) + ['f2']*len(features.f2.unique()) + ['f3']*len(features.f3.unique()) + ['loc']*len(features['loc'].unique())
unique_f1 = list(features.f1.unique()) + list(features.f2.unique()) + list(features.f3.unique()) + list(features['loc'].unique())
#print('f1:', unique_f1)
for x,y in zip(col, unique_f1):
    res = str(x)+ ":" +str(y)
    uf.append(res)
    print(res)


f1:1
f1:0
f2:1
f3:0
f3:1
loc:del
loc:mum


# Fitting the dataset

In [6]:
from lightfm.data import Dataset
# we call fit to supply userid, item id and user/item features
dataset1 = Dataset()
dataset1.fit(
        df['user'].unique(), # all the users
        df['item'].unique(), # all the items
        #user_features = ['f1:1', 'f1:0', 'f2:1', 'f2:0', 'f3:1', 'f3:0', 'loc:mum', 'loc:del']
        user_features = uf
)

In [7]:
# plugging in the interactions and their weights
(interactions, weights) = dataset1.build_interactions([(x[0], x[1], x[2]) for x in df.values ])


In [8]:

interactions.todense()



matrix([[1, 1, 0, 0],
        [0, 1, 1, 0],
        [1, 0, 1, 1]], dtype=int32)

In [9]:
weights.todense()

matrix([[0.1, 0.2, 0. , 0. ],
        [0. , 0.3, 0.1, 0. ],
        [0.4, 0. , 0.2, 0.5]], dtype=float32)

# Building user features

In [10]:
def feature_colon_value(my_list):
    """
    Takes as input a list and prepends the columns names to respective values in the list.
    For example: if my_list = [1,1,0,'del'],
    resultant output = ['f1:1', 'f2:1', 'f3:0', 'loc:del']
   
    """
    result = []
    ll = ['f1:','f2:', 'f3:', 'loc:']
    aa = my_list
    for x,y in zip(ll,aa):
        res = str(x) +""+ str(y)
        result.append(res)
    return result


In [11]:
ad_subset = features[["f1", 'f2','f3', 'loc']] 
ad_list = [list(x) for x in ad_subset.values]
feature_list = []
for item in ad_list:
    feature_list.append(feature_colon_value(item))
    print(feature_colon_value(item))
print(f'Final output: {feature_list}')    

['f1:1', 'f2:1', 'f3:0', 'loc:del']
['f1:0', 'f2:1', 'f3:0', 'loc:mum']
['f1:1', 'f2:1', 'f3:1', 'loc:del']
Final output: [['f1:1', 'f2:1', 'f3:0', 'loc:del'], ['f1:0', 'f2:1', 'f3:0', 'loc:mum'], ['f1:1', 'f2:1', 'f3:1', 'loc:del']]


In [12]:
user_tuple = list(zip(features.user, feature_list))
user_tuple


[('u1', ['f1:1', 'f2:1', 'f3:0', 'loc:del']),
 ('u2', ['f1:0', 'f2:1', 'f3:0', 'loc:mum']),
 ('u3', ['f1:1', 'f2:1', 'f3:1', 'loc:del'])]

In [13]:
user_features = dataset1.build_user_features(user_tuple, normalize= False)
user_features.todense()

matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)

In [17]:
user_features.todense() # rows are the users and columns are the user features : total 10 features. WHY 10 see below


matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)

In [14]:
user_id_map, user_feature_map, item_id_map, item_feature_map = dataset1.mapping()
dataset1.mapping()

({'u1': 0, 'u2': 1, 'u3': 2},
 {'u1': 0,
  'u2': 1,
  'u3': 2,
  'f1:1': 3,
  'f1:0': 4,
  'f2:1': 5,
  'f3:0': 6,
  'f3:1': 7,
  'loc:del': 8,
  'loc:mum': 9},
 {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3},
 {'i1': 0, 'i3': 1, 'i2': 2, 'i4': 3})

In [15]:
user_feature_map


{'u1': 0,
 'u2': 1,
 'u3': 2,
 'f1:1': 3,
 'f1:0': 4,
 'f2:1': 5,
 'f3:0': 6,
 'f3:1': 7,
 'loc:del': 8,
 'loc:mum': 9}

# Training the model

In [16]:

model = LightFM(loss='warp')
model.fit(interactions, # spase matrix representing whether user u and item i interacted
      user_features= user_features, # we have built the sparse matrix above
      sample_weight= weights, # spase matrix representing how much value to give to user u and item i inetraction: i.e ratings
      epochs=10)

<lightfm.lightfm.LightFM at 0x1a18858d50>

# Evaluating the model

In [17]:
from lightfm.evaluation import auc_score
train_auc = auc_score(model,
                      interactions,
                      user_features=user_features
                     ).mean()
print('Hybrid training set AUC: %s' % train_auc)


Hybrid training set AUC: 0.6111111


# Prediction for KNOWN user

In [18]:

# predict for existing user
user_x = user_id_map['u3']
n_users, n_items = interactions.shape # no of users * no of items
model.predict(user_x, np.arange(n_items)) # means predict for all


array([-0.0981927 , -0.04535889, -0.01995798, -0.20764589])

# Prediction for NEW user

In [48]:
# predict for new user
user_feature_list = ['f1:1', 'f2:1', 'f3:0', 'loc:del']

In [53]:
from scipy import sparse
def format_newuser_input(user_feature_map, user_feature_list):
  #user_feature_map = user_feature_map  
  num_features = len(user_feature_list)
  normalised_val = 1.0 
  target_indices = []
  for feature in user_feature_list:
    try:
        target_indices.append(user_feature_map[feature])
    except KeyError:
        print("new user feature encountered '{}'".format(feature))
        pass
  #print("target indices: {}".format(target_indices))
  new_user_features = np.zeros(len(user_feature_map.keys()))
  for i in target_indices:
    new_user_features[i] = normalised_val
  new_user_features = sparse.csr_matrix(new_user_features)
  return(new_user_features)

In [54]:
new_user_features = format_newuser_input(user_feature_map, user_feature_list)

In [31]:
new_user_features.todense()


matrix([[0., 0., 0., 1., 0., 1., 1., 0., 1., 0.]])

In [55]:

model.predict(0, np.arange(n_items), user_features=new_user_features) # Here 0 means pick the first row of the user_features sparse matrix

array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])

In [65]:
model.predict_rank(test, user_features = user_features).todense()

matrix([[3., 0., 0., 0.],
        [0., 1., 0., 0.],
        [3., 0., 0., 2.]], dtype=float32)

If you notice, for user 0, i.e. first row, the rank for item 0 is 3, thats because it has the lowest predict score in output [78]. 

In [64]:
test.todense()

matrix([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 1.]], dtype=float32)

In the above matrix, whereever there is a 1, only those interactions (u,i) pair will hv a ranking.

In [85]:
print(test)

  (0, 0)	1.0
  (2, 3)	1.0
  (2, 0)	1.0
  (1, 1)	1.0


In [78]:
model.predict(0, np.arange(n_items), user_features = user_features)

array([-1.86071086, -1.56120646, -1.30620074, -1.85903156])

In [82]:
model.predict(1, np.arange(n_items), user_features = user_features)

array([-1.63636696, -1.35404575, -1.03478122, -1.60591865])

In [83]:
model.predict(2, np.arange(n_items), user_features = user_features)

array([-1.79751194, -1.5171963 , -1.21272945, -1.77229905])

In [81]:
model.predict_rank(test,user_features = user_features)

<3x4 sparse matrix of type '<class 'numpy.float32'>'
	with 4 stored elements in Compressed Sparse Row format>

In [86]:
import scipy.stats as ss

res = model.predict(0, np.arange(n_items), user_features=new_user_features)

ss.rankdata(res)

array([1., 3., 4., 2.])

In [87]:
res

array([-1.64527702, -1.36543167, -1.10002422, -1.63429642])

Higher the rank the better

In [106]:
user_features.todense()

matrix([[1., 0., 0., 1., 0., 1., 1., 0., 1., 0.],
        [0., 1., 0., 0., 1., 1., 1., 0., 0., 1.],
        [0., 0., 1., 1., 0., 1., 0., 1., 1., 0.]], dtype=float32)

In [121]:
from requests import put, get
import requests
from pandas.io.json import json_normalize 

url = 'http://127.0.0.1:1221/vs'
params = {'uid': 'BWf6M8RVnhRLbng1VUAdhtCLEG72', 'num1': 12
         }
response = requests.get(url, params)
d = response.json()
json_normalize(d, 'res')

Unnamed: 0,user,episode,progress,r_est_SVD,r_est_KNN,rank_lightfm
0,cGILP7WVa4XwVILmjxp0ZVI7gfC3,7d69c61b-2937-4f1e-a1f3-dd53ba863130,98.33,999.0,999.0,999.0
1,cGILP7WVa4XwVILmjxp0ZVI7gfC3,f1be1f0b-6359-41a6-b921-c5a6307f90bb,98.33,92.794784,94.245662,0.328915
2,cGILP7WVa4XwVILmjxp0ZVI7gfC3,949243f9-09eb-4110-950c-f9a15182c6b7,98.33,999.0,999.0,999.0
3,cGILP7WVa4XwVILmjxp0ZVI7gfC3,"tag:soundcloud,2010:tracks/772779739",98.33,92.624375,94.245662,0.329188
4,cGILP7WVa4XwVILmjxp0ZVI7gfC3,2890ec96-8327-11ea-a096-b747bcb91e95,98.33,92.531928,94.245662,0.329029
5,2IU3mIjL5uNgOnrEM128PEPgPLz1,aca62eec-c140-415e-8d27-e06e808de84a,94.23,93.752177,93.976973,0.328142
6,2IU3mIjL5uNgOnrEM128PEPgPLz1,cc344c6e-84d3-11ea-bddc-af6bd1d75ec3,94.23,93.466062,93.976973,0.329994
7,lAKy9OUT97RaTqJ7XicGHDUDouT2,516c3808-aaa7-11ea-88c6-43ea29ee4991,99.98,94.414046,999.0,0.328714
8,lAKy9OUT97RaTqJ7XicGHDUDouT2,ff99c9f6-948e-11ea-b40e-b79c47374336,99.98,93.858811,999.0,0.322251
