In [1]:
import pandas as pd
import scipy.sparse as sparse
import numpy as np
import random
from lightfm import LightFM
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.notebook import tqdm
%matplotlib inline

In [2]:
data = pd.read_csv('ratings.dat', header=0, names=['user_a', 'user_b', 'rating'])

In [3]:
for_delete = (data.groupby('user_a').count()['user_b'] > 50)
for_delete = for_delete[for_delete]

In [4]:
index_for_delete = []
for index, row in tqdm(enumerate(data.user_a)):
    if row in for_delete:
        index_for_delete.append(index)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [5]:
df = data.drop(pd.Index(index_for_delete))

In [6]:
user_a = list(np.sort(df.user_a.unique())) 
user_b = list(df.user_b.unique()) 
rating = list(df.rating) 

rows = df.user_a.astype('category').cat.codes 
cols = df.user_b.astype('category').cat.codes 
purchases_sparse = sparse.csr_matrix((rating, (rows, cols)), shape=(len(user_a), len(user_b)))

In [7]:
def make_train(ratings, pct_test = 0.2):
    test_set = ratings.copy() 
    test_set[test_set != 0] = 1 
    training_set = ratings.copy() 
    nonzero_inds = training_set.nonzero() 
    nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) 
    random.seed(0) 
    num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) 
    samples = random.sample(nonzero_pairs, num_samples) 
    user_inds = [index[0] for index in samples] 
    item_inds = [index[1] for index in samples] 
    training_set[user_inds, item_inds] = 0 
    training_set.eliminate_zeros() 
    return training_set, test_set, list(set(user_inds))  

In [8]:
product_train, product_test, product_users_altered = make_train(purchases_sparse, pct_test = 0.2)

In [9]:
model = LightFM(loss='warp')
model.fit_partial(product_train, epochs=40, num_threads=4)

<lightfm.lightfm.LightFM at 0x7faa3c01c208>

In [10]:


def display_item_to_items_recommendations(model,item_id):

    user_arr = np.array(user_b) 
    
    item_id = np.where(user_arr == item_id)[0][0]
    print(item_id)
  
    return cosine_similarity(
           model.item_embeddings)[item_id].argsort()[-5:][::-1]


display_item_to_items_recommendations(model, 1095)

21653


array([21653, 51423, 20883, 61411, 23509])