In [1]:
!pip install lightfm > /dev/null

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import auc_score

In [3]:
# Version of the libraries used
print('\n'.join(f'{m.__name__} - {m.__version__}'
                for m in globals().values()
                if getattr(m, '__version__', None)))

numpy - 1.25.2
pandas - 1.5.3
seaborn - 0.13.1


In [4]:
# For Google Colaboratory
from google.colab import drive

drive.mount('/content/gdrive/')
%cd '/content/gdrive/MyDrive/Master Ingénieur IA [OC]/Slides Projets/Projet #10/'

Mounted at /content/gdrive/
/content/gdrive/MyDrive/Master Ingénieur IA [OC]/Slides Projets/Projet #10


In [5]:
# Load data
metadata = pd.read_csv('./classic_data/articles_metadata.csv')
click_sample = pd.read_csv('./classic_data/clicks_sample.csv')

---

### **Method 1 (without features)**

In [6]:
# Create a Dataset object
dataset = Dataset()

# Add the entities (articles and users) to the dataset
dataset.fit(users=(row['user_id'] for index, row in click_sample.iterrows()),
            items=(row['article_id'] for index, row in metadata.iterrows()))

In [7]:
# Build interactions between users and articles
(interactions, weights) = dataset.build_interactions((row['user_id'], row['click_article_id'])
                                                      for index, row in click_sample.iterrows())

In [8]:
# Create the template
model = LightFM(loss='warp')

In [9]:
# Train the model
model.fit(interactions, epochs=30)

<lightfm.lightfm.LightFM at 0x7d5d1a143d60>

In [10]:
# Evaluate the model with ROC AUC
train_auc = auc_score(model, interactions).mean()
print('ROC AUC: %s' % train_auc)

ROC AUC: 0.9999829


In [11]:
user_id = click_sample['user_id']
user_id = 1

# Get internal mapping of user ID
user_id_internal = dataset.mapping()[0][user_id]

# Predict scores for all articles for this user
scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))

# Get the first 5 clues from the articles in descending order of score
top_items = np.argsort(-scores)[:5]

# Get the IDs of the first 5 recommended articles
recommended_article_ids = [dataset.mapping()[2][idx] for idx in top_items]

print('The first 5 articles recommended for user {} are : {}'.format(user_id, recommended_article_ids))

The first 5 articles recommended for user 1 are : [119592, 96663, 284847, 108854, 235840]


---

### **Method 2 (with all features)**

In [12]:
dataset = Dataset()

dataset.fit(users=(row['user_id'] for index, row in click_sample.iterrows()),
            items=(row['article_id'] for index, row in metadata.iterrows()))

In [13]:
# Add article features (e.g. category_id, publisher_id, words_count)
dataset.fit_partial(items=(row['article_id'] for index, row in metadata.iterrows()),
                    item_features=(row['category_id'] for index, row in metadata.iterrows()))

# Build interactions between users and articles
(interactions, weights) = dataset.build_interactions((row['user_id'], row['click_article_id'])
                                                      for index, row in click_sample.iterrows())

# Build article features
item_features = dataset.build_item_features(((row['article_id'], [row['category_id']])
                                             for index, row in metadata.iterrows()))

In [14]:
model = LightFM(loss='warp')

In [15]:
model.fit(interactions, item_features=item_features, epochs=30)

<lightfm.lightfm.LightFM at 0x7d5d1a143a90>

In [16]:
train_auc = auc_score(model, interactions, item_features=item_features).mean()
print('ROC AUC: %s' % train_auc)

ROC AUC: 0.999989


In [17]:
user_id = click_sample['user_id']
user_id = 1

user_id_internal = dataset.mapping()[0][user_id]

scores = model.predict(user_id_internal, np.arange(interactions.shape[1]))

top_items = np.argsort(-scores)[:5]

recommended_article_ids = [dataset.mapping()[2][idx] for idx in top_items]

print('The first 5 articles recommended for user {} are : {}'.format(user_id, recommended_article_ids))

The first 5 articles recommended for user 1 are : [96663, 119592, 284847, 235840, 68866]
