In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tqdm import tqdm, tqdm_notebook
import time, os
import lightfm as lfm
from lightfm import data
from lightfm import cross_validation
from lightfm import evaluation

## Comparativa Metodos KNN y Busqueda de hiperParametros

In [4]:
#! pip install lightfm

### https://making.lyst.com/lightfm/docs/lightfm.html

In [2]:
df = pd.read_csv('./data/ratings_train.csv')
mg = pd.read_csv('./data/movie_genres.csv')

## Init Lightfm

In [3]:
ds = lfm.data.Dataset()
ds.fit(users=df["userID"].unique(), items=mg["movieID"].unique())
ds.interactions_shape()

(2113, 10197)

In [4]:
(interactions, weights) = ds.build_interactions(df[['userID', 'movieID', 'rating']].itertuples(index=False))
interactions

<2113x10197 sparse matrix of type '<class 'numpy.int32'>'
	with 770089 stored elements in COOrdinate format>

In [5]:
train, test = lfm.cross_validation.random_train_test_split(interactions, test_percentage=0.2)
model = lfm.LightFM(no_components=20)
model.fit(train, epochs=10, num_threads=2)

<lightfm.lightfm.LightFM at 0x7fd432d42c10>

In [6]:
train_precision = lfm.evaluation.precision_at_k(model, train, k=10, num_threads=2).mean()
test_precision  = lfm.evaluation.precision_at_k(model, test,  k=10, num_threads=2).mean()
print('Precision en train: {}'.format(train_precision))
print('Precision en test: {}'.format(test_precision))

Precision en train: 0.5128726959228516
Precision en test: 0.13170617818832397


## Item Features

In [7]:
ds = lfm.data.Dataset()
ds.fit(users=df["userID"].unique(), items=mg["movieID"].unique(), item_features=mg["genre"].unique())
ds.interactions_shape()

(2113, 10197)

In [8]:
mg["genre"].unique()


array(['Adventure', 'Animation', 'Children', 'Comedy', 'Fantasy',
       'Romance', 'Drama', 'Action', 'Crime', 'Thriller', 'Horror',
       'Mystery', 'Sci-Fi', 'IMAX', 'Documentary', 'War', 'Musical',
       'Film-Noir', 'Western', 'Short'], dtype=object)

In [9]:
item_features = ds.build_item_features([(i, mg.loc[mg["movieID"] == i, "genre"].values) for i in mg["movieID"].values])

In [35]:
(interactions, weights) = ds.build_interactions(df[['userID', 'movieID', 'rating']].itertuples(index=False))
interactions

<2113x10197 sparse matrix of type '<class 'numpy.int32'>'
	with 770089 stored elements in COOrdinate format>

In [36]:
train, test = lfm.cross_validation.random_train_test_split(interactions, test_percentage=0.2)
model = lfm.LightFM(no_components=20)
model.fit(train, epochs=10, item_features=item_features, num_threads=2)

<lightfm.lightfm.LightFM at 0x7fed10cc71d0>

In [37]:
train_precision = lfm.evaluation.precision_at_k(model, train, item_features=item_features, k=10, num_threads=2).mean()
test_precision  = lfm.evaluation.precision_at_k(model, test,  item_features=item_features, k=10, num_threads=2).mean()
print('Precision en train: {}'.format(train_precision))
print('Precision en test: {}'.format(test_precision))

Precision en train: 0.046142928302288055
Precision en test: 0.010379146784543991
