In [27]:
import numpy as np
import pandas as pd
import scipy.sparse as sp
from lightfm.evaluation import precision_at_k
from lightfm import LightFM

def matrix_mappings(df, user, item):
    """Map entities in interactions df to row and column indices
    Parameters
    ----------
    df : DataFrame Interactions DataFrame.
    user : str User Column.
    item : str Item Column.
    Returns
    -------
    uid_to_idx : dict maps user ID's to index
    iid_to_idx : dict maps item ID's to index

    """
    
    uid_to_idx = {}
    for (idx, rid) in enumerate(df[user].unique().tolist()):
        uid_to_idx[rid] = idx
        

    iid_to_idx = {}
    for (idx, cid) in enumerate(df[item].unique().tolist()):
        iid_to_idx[cid] = idx

    return uid_to_idx, iid_to_idx


def sparse_matrix(df, user, item):
    """Take interactions dataframe and convert to a sparse matrix
    Parameters
    ----------
    df : DataFrame
    user : str
    item : str
    Returns
    -------
    interactions : sparse csr matrix
    uid_to_idx : dict
    iid_to_idx : dict
    """

    uid_to_idx, iid_to_idx = matrix_mappings(df,user,item)

    def map_ids(row, mapper):
        return mapper[row]

    I = df[user].apply(map_ids, args=[uid_to_idx]).to_numpy()
    J = df[item].apply(map_ids, args=[iid_to_idx]).to_numpy()
    
    V = np.ones(I.shape[0])
    interactions = sp.coo_matrix((V, (I, J)), dtype=np.float64)
    interactions = interactions.tocsr()
    
    return interactions, uid_to_idx, iid_to_idx

def split_matrix(interactions, uid_to_idx, iid_to_idx, ratings, childset):
    
    #Creating a dataframe of interactions not present in child set but in parent set
    diff = ratings.merge(childset, how = 'outer' ,indicator=True).loc[lambda x : x['_merge']=='left_only']
    
    sub_mat = interactions.copy().tolil()
    
    #Indicating zero for these interactions
    for user,movie in zip(diff['userId'].values,diff['movieId'].values):
        uidx = uid_to_idx[user]
        iidx = iid_to_idx[movie]

        sub_mat[uidx, iidx] = 0.
    
    return sub_mat.tocsr()

In [28]:
#Reading data
col_names=['userId', 'movieId', 'rating', 'timestamp']
    
ratings = pd.read_csv('ml-latest-small/ratings.csv', names=col_names)

#Reading splits
train_df = pd.read_csv('train-split-large.csv')#'combined.csv')
val_df = pd.read_csv('valid-split-large.csv')#'combined_val.csv')
test_df = pd.read_csv('test-split-large.csv')#'combined_test.csv')

train_df.reset_index(inplace=True)

interaction_matrix, uid_to_idx, iid_to_idx = sparse_matrix(ratings[['userId','movieId']], 'userId', 'movieId')

#Splitting matrices
train = split_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], train_df[['userId', 'movieId']])
val = split_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], val_df[['userId', 'movieId']])
test = split_matrix(interaction_matrix, uid_to_idx, iid_to_idx, ratings[['userId','movieId']], test_df[['userId', 'movieId']])

#Creating light fm model
model = LightFM(loss='warp', random_state=2016)
    
model.fit(train, epochs=10)

#Model Evaluation
train_precision = precision_at_k(model, train, k=100).mean()
val_precision = precision_at_k(model, val, k=100).mean()
test_precision = precision_at_k(model, test, k=100).mean()
    
print(train_precision)
print(val_precision)
print(test_precision)

0.066614166
0.024320988
0.02
