# LightFM is a Python implementation of a hybrid recommendation algorithms for both implicit and explicit feedbacks

It is a hybrid content-collaborative model which represents users and items as linear combinations of their content features’ latent factors. The model learns embeddings or latent representations of the users and items in such a way that it encodes user preferences over items. These representations produce scores for every item for a given user; items scored highly are more likely to be interesting to the user.

The user and item embeddings are estimated for every feature, and these features are then added together to be the final representations for users and items.

For example, for user i, the model retrieves the i-th row of the feature matrix to find the features with non-zero weights. The embeddings for these features will then be added together to become the user representation e.g. if user 10 has weight 1 in the 5th column of the user feature matrix, and weight 3 in the 20th column, the user 10’s representation is the sum of embedding for the 5th and the 20th features multiplying their corresponding weights. The representation for each items is computed in the same approach.

In [1]:
pip install lightfm

Note: you may need to restart the kernel to use updated packages.


In [2]:
import sys
import os

import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm import cross_validation

# Import LightFM's evaluation metrics
from lightfm.evaluation import precision_at_k as lightfm_prec_at_k
from lightfm.evaluation import recall_at_k as lightfm_recall_at_k

# Import repo's evaluation metrics
from recommenders.evaluation.python_evaluation import (
    precision_at_k, recall_at_k)

from recommenders.utils.timer import Timer
from recommenders.datasets import movielens
from recommenders.models.lightfm.lightfm_utils import (
    track_model_metrics, prepare_test_df, prepare_all_predictions,
    compare_metric, similar_users, similar_items)

print("System version: {}".format(sys.version))
print("LightFM version: {}".format(lightfm.__version__))



System version: 3.8.5 (default, Sep  3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)]
LightFM version: 1.16


In [3]:
# default number of recommendations
K = 10
# percentage of data used for testing
TEST_PERCENTAGE = 0.25
# model learning rate
LEARNING_RATE = 0.25
# no of latent factors
NO_COMPONENTS = 20
# no of epochs to fit model
NO_EPOCHS = 20
# no of threads to fit model
NO_THREADS = 32
# regularisation for both user and item features
ITEM_ALPHA=1e-6
USER_ALPHA=1e-6
# seed for pseudonumber generations
SEEDNO = 42

In [4]:
filter_reviews = pd.read_csv('C:\\Users\\PAVANI\\Documents\\Reading course\\Reading course\\Recommendation systems\\8Lkh_with_Sentiment_analysis.csv')


In [5]:
filter_reviews = filter_reviews.drop('Unnamed: 0', axis=1)
ratings = filter_reviews[['user_name', 'item_id', 'review_polarity']]

In [6]:
genre_data = pd.read_csv('3k_game_Data.csv')

In [7]:
genre_data = genre_data[['item_id', 'genre']]

In [8]:
final_dataframe = pd.merge(ratings, genre_data, on='item_id')

In [9]:
final_dataframe = final_dataframe.rename(columns={'userId': 'userID', 'itemId': 'itemID', 'rating': 'rating'})

In [10]:
final_dataframe.head()

Unnamed: 0,user_name,item_id,review_polarity,genre
0,coolguyrift,32430,3,Action
1,bort,32430,3,Action
2,ヽ(｡･ω･｡)ﾉ,32430,3,Action
3,lowkey,32430,2,Action
4,greg,32430,2,Action


In [11]:
dataset = Dataset()


In [12]:
dataset.fit(users=final_dataframe['userID'], 
            items=final_dataframe['itemID'])
num_users, num_items = dataset.interactions_shape()
print(f'Num users: {num_users}, num_items: {num_items}.')

KeyError: 'userID'

# Building Interaction Matrix

In [None]:
(interactions, weights) = dataset.build_interactions(final_dataframe.iloc[:, 0:3].values)

In [None]:
train_interactions, test_interactions = cross_validation.random_train_test_split(
    interactions, test_percentage=TEST_PERCENTAGE,random_state=np.random.RandomState(SEEDNO))

In [None]:
print(f"Shape of train interactions: {train_interactions.shape}")
print(f"Shape of test interactions: {test_interactions.shape}")

In [108]:
model1 = LightFM(loss='warp', no_components=NO_COMPONENTS, 
                 learning_rate=LEARNING_RATE,                 
                 random_state=np.random.RandomState(SEEDNO))
model1.fit(interactions=train_interactions,
          epochs=NO_EPOCHS);


In [112]:
uids, iids, interaction_data = cross_validation._shuffle(
    interactions.row, interactions.col, interactions.data, 
    random_state=np.random.RandomState(SEEDNO))

cutoff = int((1.0 - TEST_PERCENTAGE) * len(uids))
test_idx = slice(cutoff, None)

In [110]:
dic_users={}
i=1
for index, row in final_dataframe.iterrows():
    if row['userID'] not in dic_users:
        dic_users[row['userID']] = i
        i+=1
    
len(dic_users)

9145

In [111]:
def get_recommendations(username):

    # get the list of the movie ids
    unique_ids = final_dataframe['itemID'].unique()
    ids = final_dataframe.loc[final_dataframe['userID']== username, 'itemID']

    # remove the rated movies for the recommendations
    movies_to_predict = np.setdiff1d(unique_ids,ids)
    return movies_to_predict

movies_to_predict = get_recommendations('lumpy')
print(len(movies_to_predict))
my_recs = []
for iid in movies_to_predict:
    my_recs.append((iid, model1.predict(dic_users['lumpy'],iid)))
 
df_result = pd.DataFrame(my_recs, columns=['item_id', 'predictions']).sort_values('predictions', ascending=False).head(20)
df_result


2954


TypeError: object of type 'numpy.int64' has no len()

In [113]:
uid_map, ufeature_map, iid_map, ifeature_map = dataset.mapping()

In [114]:
ifeature_map

{32430: 0,
 10180: 1,
 230050: 2,
 33230: 3,
 231200: 4,
 266510: 5,
 220240: 6,
 39120: 7,
 239160: 8,
 4000: 9,
 107410: 10,
 25800: 11,
 388410: 12,
 227860: 13,
 433340: 14,
 9900: 15,
 243950: 16,
 50300: 17,
 421020: 18,
 221910: 19,
 431240: 20,
 221100: 21,
 330830: 22,
 319630: 23,
 349700: 24,
 227300: 25,
 204450: 26,
 302830: 27,
 360640: 28,
 264280: 29,
 203290: 30,
 11450: 31,
 239200: 32,
 212680: 33,
 394230: 34,
 452060: 35,
 240: 36,
 456670: 37,
 391460: 38,
 215530: 39,
 674940: 40,
 352520: 41,
 17500: 42,
 588430: 43,
 230230: 44,
 368370: 45,
 202970: 46,
 485380: 47,
 205190: 48,
 630: 49,
 254200: 50,
 233270: 51,
 236110: 52,
 322110: 53,
 433850: 54,
 110800: 55,
 259080: 56,
 3590: 57,
 363600: 58,
 208650: 59,
 271590: 60,
 8190: 61,
 301910: 62,
 252490: 63,
 48700: 64,
 4500: 65,
 221680: 66,
 241930: 67,
 391540: 68,
 312990: 69,
 303210: 70,
 298610: 71,
 384300: 72,
 367600: 73,
 16450: 74,
 12120: 75,
 248630: 76,
 16720: 77,
 347430: 78,
 55140: 79,

In [115]:
with Timer() as test_time:
    test_df = prepare_test_df(test_idx, uids, iids, uid_map, iid_map, weights)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict test data.")  
time_reco1 = test_time.interval

Took 31.2 seconds for prepare and predict test data.


In [116]:
test_df.sample(5)

Unnamed: 0,userID,itemID,rating
128074,hunk,333930,3.0
107668,cuki,271860,3.0
77390,all might,41700,3.0
76153,nick,45760,6.0
133896,cozmin,224600,3.0


In addition, the predictions of all unseen user-item pairs (e.g. removing those seen in the training data) can be prepared as follows:

In [118]:
with Timer() as test_time:
    all_predictions = prepare_all_predictions(final_dataframe, uid_map, iid_map, 
                                              interactions=train_interactions,
                                              model=model1, 
                                              num_threads=NO_THREADS)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict all data.")
time_reco2 = test_time.interval

MemoryError: 

In [None]:
all_predictions.sample(5)