# LightFM

[making.lyst.com/lightfm](https://making.lyst.com/lightfm/docs/lightfm.html#lightfm)

## Constantes

In [1]:
K = 4
TEST_PERCENTAGE = 0.25
LEARNING_RATE = 0.25
LOSS_FUNCTION = 'warp'
NO_COMPONENTS = 20
NO_EPOCHS = 20
NO_THREADS = 32
ITEM_ALPHA = 1e-6
USER_ALPHA = 1e-6

SEED = 42

## Importaciones

In [2]:
import pandas as pd

In [106]:
!python -m pip install git+https://github.com/daviddavo/lightfm

Collecting git+https://github.com/daviddavo/lightfm
  Cloning https://github.com/daviddavo/lightfm to /tmp/pip-req-build-4grfxrvl
  Running command git clone --filter=blob:none --quiet https://github.com/daviddavo/lightfm /tmp/pip-req-build-4grfxrvl
  Resolved https://github.com/daviddavo/lightfm to commit f0eb500ead54ab65eb8e1b3890337a7223a35114
  Preparing metadata (setup.py) ... [?25ldone


In [107]:
!python -m pip install recommenders



In [3]:
import os
import sys
import itertools
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm import cross_validation
from lightfm.evaluation import precision_at_k as lightfm_prec_at_k
from lightfm.evaluation import recall_at_k as lightfm_recall_at_k

from recommenders.evaluation.python_evaluation import precision_at_k, recall_at_k
from recommenders.utils.timer import Timer
from recommenders.datasets import movielens
from recommenders.models.lightfm.lightfm_utils import (
    track_model_metrics,
    prepare_test_df,
    prepare_all_predictions,
    compare_metric,
    similar_users,
    similar_items,
)
from recommenders.utils.notebook_utils import store_metadata

print("System version: {}".format(sys.version))
print("LightFM version: {}".format(lightfm.__version__))


System version: 3.10.18 (main, Jun  5 2025, 13:14:17) [GCC 11.2.0]
LightFM version: 1.17


top-level pandera module will be **removed in a future version of pandera**.
If you're using pandera to validate pandas objects, we highly recommend updating
your import:

```
# old import
import pandera as pa

# new import
import pandera.pandas as pa
```

If you're using pandera to validate objects from other compatible libraries
like pyspark or polars, see the supported libraries section of the documentation
for more information on how to import pandera:

https://pandera.readthedocs.io/en/stable/supported_libraries.html


```
```



### Dataframe

In [4]:
n_users = int(11000/8)
random_state = SEED
ratings = pd.read_csv("../data/player_cards.csv")
sampled_users = ratings["tag"].drop_duplicates().sample(n=n_users, random_state=random_state)
ratings = ratings[ratings["tag"].isin(sampled_users)]
card_cols = [col for col in ratings.columns if col != "tag"]
ratings = pd.melt(ratings, id_vars="tag", value_vars=card_cols, var_name="card", value_name="rating")
ratings.rename(columns={"tag": "player"}, inplace=True)
df = ratings[ratings["rating"] == 2]

In [5]:
df

Unnamed: 0,player,card,rating
0,#VY08Q8V89,26000000,2
6,#G2ULJQVVC,26000000,2
13,#PQCGQ2QR,26000000,2
14,#9PQ00CRRJ,26000000,2
16,#VLQ82UQCJ,26000000,2
...,...,...,...
166277,#JYYU2P9RC,28000026,2
166289,#200JPP8YC2,28000026,2
166305,#PVP99UQUV,28000026,2
166325,#VGG0U8QU0,28000026,2


### Preparación datos

In [6]:
data = df.rename(columns={"player": "userID", "card": "itemID"})
data

Unnamed: 0,userID,itemID,rating
0,#VY08Q8V89,26000000,2
6,#G2ULJQVVC,26000000,2
13,#PQCGQ2QR,26000000,2
14,#9PQ00CRRJ,26000000,2
16,#VLQ82UQCJ,26000000,2
...,...,...,...
166277,#JYYU2P9RC,28000026,2
166289,#200JPP8YC2,28000026,2
166305,#PVP99UQUV,28000026,2
166325,#VGG0U8QU0,28000026,2


Se esconden 4 cartas para test, para simular el proceso de darle 4 cartas al LLM y que complete el resto del mazo (de 8 cartas).

In [7]:
np.random.seed(42)

grouped = data.groupby("userID")

train_rows = []
test_rows = []

for user, group in grouped:
    if len(group) < 8:
        continue

    shuffled = group.sample(frac=1)

    train = shuffled.iloc[:4]
    test = shuffled.iloc[4:]

    train_rows.append(train)
    test_rows.append(test)

df_train = pd.concat(train_rows)
df_test = pd.concat(test_rows)

print(df_train.shape, df_test.shape)

(5468, 3) (5468, 3)


In [8]:
dataset = Dataset()

dataset.fit(users=data["userID"], items=data["itemID"])

num_players, num_cards = dataset.interactions_shape()
print(f'Num players: {num_players}, num cards: {num_cards}.')

Num players: 1375, num cards: 121.


In [9]:
(interactions, weights) = dataset.build_interactions(data.iloc[:, 0:3].values)
(train_interactions, _) = dataset.build_interactions(df_train.iloc[:, 0:3].values)
(test_interactions, _) = dataset.build_interactions(df_test.iloc[:, 0:3].values)

In [10]:
print(f"Shape of train interactions: {train_interactions.shape}")
print(f"Shape of test interactions: {test_interactions.shape}")

Shape of train interactions: (1375, 121)
Shape of test interactions: (1375, 121)


### Entrenamiento modelo

In [11]:
model = LightFM(loss=LOSS_FUNCTION, no_components=NO_COMPONENTS,
    learning_rate=LEARNING_RATE,
    random_state=np.random.RandomState(SEED))

In [12]:
model.fit(interactions=train_interactions,
    epochs=NO_EPOCHS);

### Preparación de datos para evaluación

In [13]:
uids, iids, interaction_data = cross_validation._shuffle(
    interactions.row, interactions.col, interactions.data,
    random_state=np.random.RandomState(SEED))

cutoff = int((1.0 - TEST_PERCENTAGE) * len(uids))
test_idx = slice(cutoff, None)

In [14]:
uid_map, ufeature_map, iid_map, ifeature_map = dataset.mapping()

In [15]:
with Timer() as test_time:
    test_df = prepare_test_df(test_idx, uids, iids, uid_map, iid_map, weights)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict test data.")
time_reco1 = test_time.interval

Took 0.1 seconds for prepare and predict test data.


In [16]:
test_df.sample(5, random_state=SEED)

Unnamed: 0,userID,itemID,rating
859,#LYCPJGCY,27000006,2.0
1521,#JRVVYRYQ0,26000055,2.0
1322,#2Q009QRQU,26000077,2.0
1352,#V8PVRJCGJ,26000005,2.0
2031,#JJRV0QYUL,26000072,2.0


In [17]:
with Timer() as test_time:
    all_predictions = prepare_all_predictions(data, uid_map, iid_map,
        interactions=train_interactions,
        model=model,
        num_threads=NO_THREADS)
print(f"Took {test_time.interval:.1f} seconds for prepare and predict all data.")
time_reco2 = test_time.interval

Took 216.3 seconds for prepare and predict all data.


In [18]:
all_predictions.sample(5, random_state=SEED)

Unnamed: 0,userID,itemID,prediction
34756,#YLLCCJ8CU,28000018,-2.798141
50562,#2PPC02V,26000007,-4.536026
116454,#C9YU2VYUV,26000024,-2.129728
66073,#UVC8JQYRR,26000080,-8.862637
46885,#QVV82LLV2,26000093,-13.315029


### Evaluación

In [19]:
with Timer() as test_time:
    eval_precision = precision_at_k(rating_true=test_df,
        rating_pred=all_predictions, k=K)
    eval_recall = recall_at_k(test_df, all_predictions, k=K)
time_reco3 = test_time.interval

with Timer() as test_time:
    eval_precision_lfm = lightfm_prec_at_k(model, test_interactions,
        train_interactions, k=K).mean()
    eval_recall_lfm = lightfm_recall_at_k(model, test_interactions,
        train_interactions, k=K).mean()
time_lfm = test_time.interval

print(
    "------ Using Repo's evaluation methods ------",
    f"Precision@{K}:\t{eval_precision:.6f}",
    f"Recall@{K}:\t{eval_recall:.6f}",
    "\n------ Using LightFM evaluation methods ------",
    f"Precision@{K}:\t{eval_precision_lfm:.6f}",
    f"Recall@{K}:\t{eval_recall_lfm:.6f}",
    sep='\n')

------ Using Repo's evaluation methods ------
Precision@4:	0.039495
Recall@4:	0.073439

------ Using LightFM evaluation methods ------
Precision@4:	0.128749
Recall@4:	0.128749
