In [19]:
import numpy as np
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k
from lightfm import LightFM
import pandas as pd
import scipy.sparse as sp

def get_df_matrix_mappings(df, user, item):
    """Map entities in interactions df to row and column indices
    Parameters
    ----------
    df : DataFrame Interactions DataFrame.
    user : str User Column.
    item : str Item Column.
    Returns
    -------
    uid_to_idx : dict maps user ID's to index
    iid_to_idx : dict maps item ID's to index

    """
    # Create mappings
    uid_to_idx = {}
    for (idx, rid) in enumerate(df[user].unique().tolist()):
        uid_to_idx[rid] = idx
        

    iid_to_idx = {}
    for (idx, cid) in enumerate(df[item].unique().tolist()):
        iid_to_idx[cid] = idx

    return uid_to_idx, iid_to_idx


def df_to_matrix(df, user, item):
    """Take interactions dataframe and convert to a sparse matrix
    Parameters
    ----------
    df : DataFrame
    user : str
    item : str
    Returns
    -------
    interactions : sparse csr matrix
    uid_to_idx : dict
    iid_to_idx : dict
    """

    uid_to_idx, iid_to_idx = get_df_matrix_mappings(df,user,item)

    def map_ids(row, mapper):
        return mapper[row]

    I = df[user].apply(map_ids, args=[uid_to_idx]).to_numpy()
    J = df[item].apply(map_ids, args=[iid_to_idx]).to_numpy()
    
    V = np.ones(I.shape[0])
    interactions = sp.coo_matrix((V, (I, J)), dtype=np.float64)
    interactions = interactions.tocsr()
    
    return interactions, uid_to_idx, iid_to_idx

def subset_to_matrix(interactions, uid_to_idx, iid_to_idx, ratings, subset):

    diff = ratings.merge(subset, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
    
    user_list = diff['userId'].values
    movie_list = diff['movieId'].values

    sub_mat = interactions.copy().tolil()
    
    for user,movie in zip(user_list,movie_list):
        uidx = uid_to_idx[user]
        iidx = iid_to_idx[movie]

        sub_mat[uidx, iidx] = 0.
    
    return sub_mat.tocsr()

In [20]:
colnames=['userId', 'movieId', 'rating', 'timestamp']
    
ratings = pd.read_csv('ml-latest-small/ratings.csv', names=colnames)
train_df = pd.read_csv('combined.csv')#'train-split-large.csv')
val_df = pd.read_csv('combined_val.csv')#'valid-split-large.csv')
test_df = pd.read_csv('combined_test.csv')#'test-split-large.csv')

train_df.reset_index(inplace=True)

interaction_matrix, uid_to_idx, iid_to_idx = df_to_matrix(ratings[['userId','movieId']], 'userId', 'movieId')

train = subset_to_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], train_df[['userId', 'movieId']])
val = subset_to_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], val_df[['userId', 'movieId']])
test = subset_to_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], test_df[['userId', 'movieId']])

model = LightFM(loss='warp')
    
model.fit(train, epochs=10)
    
train_precision = precision_at_k(model, train, k=100).mean()
val_precision = precision_at_k(model, val, k=100).mean()
test_precision = precision_at_k(model, test, k=100).mean()
    
print(train_precision)
print(val_precision)
print(test_precision)

0.30636662
0.07851852
0.069322035
