# Implementación LightFM

## Librerias

In [14]:
from lightfm import LightFM
from lightfm.data import Dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from lightfm.evaluation import precision_at_k, recall_at_k
from lightfm.evaluation import auc_score
from itertools import product
import json

## Datasets

In [15]:
df_postulantes = pd.read_csv('postulantes_procesados.csv', index_col=0)
df_establecimientos = pd.read_csv('establecimientos_procesados.csv', index_col=0)
df_postulaciones_training = pd.read_csv('postulaciones_training.csv', index_col=0)
df_postulaciones_testing = pd.read_csv('postulaciones_testing.csv', index_col=0)

In [16]:
for col in df_establecimientos.columns:
  if ((col != 'LATITUD') and (col != 'LONGITUD')):
    df_establecimientos[col] = df_establecimientos[col].astype(int)

In [17]:
df_establecimientos.drop(columns=['COD_COM_RBD', 'LATITUD', 'LONGITUD'], inplace=True)

In [18]:
dataset = Dataset()
dataset.fit(
    users=df_postulaciones_training['mrun'].unique(),
    items=df_postulaciones_training['rbd'].unique(),
    item_features=[col for col in df_establecimientos.columns if col != 'RBD']
)

In [19]:
# Construir las interacciones de entrenamiento
(interactions, weights) = dataset.build_interactions(
    (row['mrun'], row['rbd']) for _, row in df_postulaciones_training.iterrows()
)

In [20]:
# Construir las características de los ítems
item_features = dataset.build_item_features(
    [
        (row['RBD'], {col: int(row[col]) for col in df_establecimientos.columns if col != 'RBD'})
        for _, row in df_establecimientos.iterrows()
    ]
)

In [21]:
model = LightFM(loss='bpr')
model.fit_partial(interactions, item_features=item_features)

<lightfm.lightfm.LightFM at 0x7fdf12a678e0>

In [22]:
# Construir las interacciones de prueba
(test_interactions, test_weights) = dataset.build_interactions(
    (row['mrun'], row['rbd']) for _, row in df_postulaciones_testing.iterrows()
)


In [23]:
# Evaluar el modelo
train_precision = precision_at_k(model=model, test_interactions=interactions, item_features=item_features, k=10).mean()
test_precision = precision_at_k(model=model, test_interactions=test_interactions, item_features=item_features, k=10, train_interactions=interactions).mean()

train_auc = auc_score(model=model, test_interactions=interactions, item_features=item_features).mean()
test_auc = auc_score(model=model, test_interactions=test_interactions, item_features=item_features, train_interactions=interactions).mean()

train_recall = recall_at_k(model=model, test_interactions=interactions, item_features=item_features, k=10).mean()
test_recall = recall_at_k(model=model, test_interactions=test_interactions, item_features=item_features, k=10, train_interactions=interactions).mean()

print('Precision: train %.10f, test %.10f.' % (train_precision, test_precision))
print('AUC: train %.10f, test %.10f.' % (train_auc, test_auc))
print('Recall: train %.10f, test %.10f.' % (train_recall, test_recall))

Precision: train 0.0031891256, test 0.0018145132.
AUC: train 0.5448706746, test 0.5664147139.
Recall: train 0.0097201398, test 0.0112207712.


In [25]:
predicciones_lightfm = {}
lista_id_usuarios = list(dataset.mapping()[0].keys())
lista_id_establecimientos = list(dataset.mapping()[2].keys())


x = 0
for _,usuario in df_postulaciones_testing.iterrows():
    x += 1
    print(x)
    if x > 10:
        break
    (test_interactions, test_weights) = dataset.build_interactions(
        (usuario['mrun'], row['rbd']) for _, row in df_postulaciones_testing.iterrows()
    )
    ranks = model.predict_rank(test_interactions, item_features=item_features)
    non_zero_values = ranks.data
    row_indices, col_indices = ranks.nonzero()

    non_zero_elements = list(zip(row_indices, col_indices, non_zero_values))
    non_zero_elements = [(int(row[2]), int(row[0]), int(row[1])) for row in non_zero_elements]
    non_zero_elements.sort()
    predicciones_lightfm[int(lista_id_usuarios[non_zero_elements[0][1]])] = [int(lista_id_establecimientos[non_zero_elements[i][2]]) for i in range(20)]

with open('recomendaciones_lightfm.json', 'w') as output:
    json.dump(predicciones_lightfm, output)

1
2
3
4
5
6
7
8
9
10
11
