In [None]:
from collections import defaultdict

from lightfm import LightFM
from lightfm import evaluation
import numpy as np
import pandas as pd
from sklearn import model_selection

import resources as re

In [None]:
# Data from https://grouplens.org/datasets/movielens/
ratings_df = pd.read_csv('data/ratings.tsv', delimiter='\t')
users_df = pd.read_csv('data/users.tsv', delimiter='\t')
items_df = pd.read_csv('data/items.tsv', delimiter='\t')

In [None]:
# Users and items IDs start in 1, let's fix this
ratings_df.user = ratings_df.user - 1
ratings_df.item = ratings_df.item - 1
users_df.user = users_df.user - 1
items_df.item = items_df.item - 1

In [None]:
ratings_df.head()

In [None]:
users_df.head()

In [None]:
items_df.head()

In [None]:
user_features_dict = re.build_dict(np.hstack(['age', 'female', 'male', users_df.occupation.unique()]))
user_features_df = re.build_users_dataframe(users_df, user_features_dict)

In [None]:
user_features_df.head()

In [None]:
item_features_dict = re.build_dict(np.hstack(['release', items_df.columns[-19:]]))
item_features_df = re.build_items_dataframe(items_df, item_features_dict)

In [None]:
item_features_df.head()

In [None]:
# Getting stats
length = len(ratings_df)
num_users = len(ratings_df['user'].unique())
num_items = len(ratings_df['item'].unique())
num_user_features = len(user_features_dict)
num_item_features = len(item_features_dict)

In [None]:
length, num_users, num_items, num_user_features, num_item_features

In [None]:
# Getting a quasi-k-fold
ss = model_selection.ShuffleSplit(n_splits=1, random_state=19, test_size=0.1)
for train_index, test_index in ss.split(ratings_df):
    break

In [None]:
len(train_index), len(test_index)

In [None]:
# Getting the training and testing datasets
train_df = ratings_df.iloc[train_index]
test_df = ratings_df.iloc[test_index]

In [None]:
# Transforming the final data to sparse matrices
train_data = re.build_interaction_matrix(num_users, num_items, train_df,
                                         re.collaborative_filter, {'min_rating': 3})
test_data = re.build_interaction_matrix(num_users, num_items,
                                        test_df, re.collaborative_filter, {'min_rating': 3})
user_features = re.build_interaction_matrix(num_users, num_user_features,
                                            user_features_df, re.content_filter, {'kind': 'user'})
item_features = re.build_interaction_matrix(item_features_df)

In [None]:
# Training the model
model = LightFM(no_components=5, loss='warp', learning_schedule='adagrad',
                random_state=19)

model.fit(train_data, # user_features=user_features, item_features=item_features,
          epochs=5, num_threads=2)

In [None]:
# Scoring...
auc_score_train = evaluation.auc_score(model, train_data,
                                       # user_features=user_features, item_features=item_features,
                                       num_threads=2).mean()
auc_score_test = evaluation.auc_score(model, test_data,
                                      # user_features=user_features, item_features=item_features,
                                      num_threads=2).mean()

In [None]:
auc_score_train, auc_score_test

In [None]:
# And getting a recommendation!!
user_id = 2  # The user for recommending movies
year = 1995  # A filter

scores = model.predict(user_id, np.arange(num_items), user_features=user_features,
                       item_features=item_features, num_threads=2)

scores_index_sorted = scores.argsort()[::-1]  # Sorting, getting indeces and reversing
after_95 = item_features_df.release.values > year  # Filtering by year
after_95_sorted = after_95[scores_index_sorted]  # Sorting filter
recommendation = scores_index_sorted[after_95_sorted]  # Getting recommendation

items_df.title.values[recommendation][:100]  # Getting recommendation tittles