In [1]:
import pandas as pd
import numpy as np
from lightfm.datasets import fetch_movielens

data = fetch_movielens('movielens', indicator_features=False, genre_features=True)

print('original train')
print(np.unique(data['train'].data))
print(data['train'].__repr__())
print('original test')
print(np.unique(data['test'].data))
print(data['test'].__repr__())

# binarizing traing examples as in the original lightfm paper to use the logistic loss
data['train'].data = np.array([-1, 1])[1 * (data['train'].data >= 4)]
data['test'].data = np.array([-1, 1])[1 * (data['test'].data >= 4)]

# should keep only positive test interactions
data['test_positive_only'] = data['test'].copy()
data['test_positive_only'].data = 1 *(data['test_positive_only'].data>=1)
data['test_positive_only'].eliminate_zeros()

train = data['train']
test = data['test']
test_positives = data['test_positive_only']

print('train')
print(np.unique(data['train'].data))
print(data['train'].__repr__())
print('test')
print(np.unique(data['test'].data))
print(data['test'].__repr__())
print('test_positive_only')
print(np.unique(data['test_positive_only'].data))
print(data['test_positive_only'].__repr__())

item_features = data['item_features']
tag_labels = data['item_feature_labels']
print('There are %s distinct item features, with values like %s.' % (item_features.shape[1], tag_labels[:3].tolist()))



original train
[1 2 3 4 5]
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 90570 stored elements in COOrdinate format>
original test
[1 2 3 4 5]
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 9430 stored elements in COOrdinate format>
train
[-1  1]
<943x1682 sparse matrix of type '<class 'numpy.int64'>'
	with 90570 stored elements in COOrdinate format>
test
[-1  1]
<943x1682 sparse matrix of type '<class 'numpy.int64'>'
	with 9430 stored elements in COOrdinate format>
test_positive_only
[1]
<943x1682 sparse matrix of type '<class 'numpy.int64'>'
	with 5469 stored elements in COOrdinate format>
There are 19 distinct item features, with values like ['genre:unknown', 'genre:Action', 'genre:Adventure'].


In [2]:
train_df = pd.DataFrame.from_dict({
        'user': train.row,
        'item': train.col,
        'rating': train.data,
    })

test_df = pd.DataFrame.from_dict({
        'user': test.row,
        'item': test.col,
        'rating': test.data,
    })

print(train_df.shape)
train_df.head()

test_user_ids = test_df.user.unique()
all_user_ids = train_df.user.unique()
all_item_ids = np.unique(data['item_features'].tocoo().row)

def to_all_user_items(user_ids, item_ids):
    return pd.DataFrame.from_dict(
        {'user': np.repeat(user_ids, len(item_ids)),
         'item': np.tile(item_ids, len(user_ids))})

all_user_items = to_all_user_items(all_user_ids, all_item_ids)
print(all_user_items.shape)
print(test_user_ids.shape)
all_user_items.head()

(90570, 3)
(1586126, 2)
(943,)


Unnamed: 0,item,user
0,0,0
1,1,0
2,2,0
3,3,0
4,4,0


## Tensforflow model

In [4]:
import tensorflow as tf
import datetime as dt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline

## Prediction

In [22]:
def all_predictions_to_hits(all_user_items, all_predicted_values, ground_truth_user_items):
    predicted_ratings = all_user_items.assign(predicted_rating=lambda _: all_predicted_values)
    predicted_ranks = predicted_ratings.groupby('user')['predicted_rating'].rank(ascending=False, method='max')
    predicted_ratings['rank'] = predicted_ranks.values - 1

    ground_truth_hits = pd.merge(
        left=ground_truth_user_items,
        right=predicted_ratings,
        on=['user', 'item'], how='left')
    return ground_truth_hits

In [23]:
def all_predicted_hits(predict_function, ground_truth_df, split_size=1000):
    user_ids = ground_truth_df.user.unique()
    item_ids = ground_truth_df.item.unique()
    user_ids_splits = np.array_split(user_ids, len(user_ids) / split_size)
    user_items_splits = (to_all_user_items(user_ids_split, item_ids) for user_ids_split in user_ids_splits)
    hits_for_user_splits = [all_predictions_to_hits(
            split_user_items, 
            all_predicted_values=predict_function(split_user_items),
            ground_truth_user_items=ground_truth_df[ground_truth_df.user.isin(split_user_items.user.unique())])
        for split_user_items in user_items_splits]
    return pd.concat(hits_for_user_splits)

In [24]:
def mean_reciprocal_rank(predicted_ranks_df):
    return predicted_ranks_df\
        .assign(rec_rank=lambda df:1 / (df['rank'] + 1))\
        .groupby('user')['rec_rank'].max()\


In [30]:
def sample_batch(positives_df, batch_size, positive_ratio=.33):
    n_positives = int(batch_size * positive_ratio)
    n_negatives = batch_size - n_positives
    negatives = pd.DataFrame.from_dict({
        'user': np.random.choice(all_user_ids, replace=True, size=n_negatives),
        'item': np.random.choice(all_item_ids, replace=True, size=n_negatives),
        'rating': np.repeat(0, n_negatives)
        })
    return pd.concat([positives_df.sample(n_positives), negatives], axis=0)

# if train has both positives and negatives
def sample_batch(positives_and_negatives_df, batch_size):
    batch_df = positives_and_negatives_df.sample(batch_size)
    return batch_df.assign(rating = lambda df: np.maximum(df.rating, 0))

test_samples = sample_batch(train_df, batch_size=5)

In [31]:
inputs.to_feed_dict(test_samples, with_ratings=True)

{<tf.Tensor 'user_ids:0' shape=(?,) dtype=int32>: array([933, 682, 408, 706, 436], dtype=int32),
 <tf.Tensor 'item_ids:0' shape=(?,) dtype=int32>: array([532, 326, 126, 448, 411], dtype=int32),
 <tf.Tensor 'ratings:0' shape=(?,) dtype=float32>: array([0, 1, 1, 0, 0])}

In [68]:
N_USERS, N_ITEMS = train.shape


class Placeholders:
    
    def __init__(self, batch_size=None):
        self.user_ids = tf.placeholder(tf.int32, shape=[batch_size], name='user_ids')
        self.item_sparse_features = tf.sparse_placeholder(tf.int32, name='item_features')
        self.item_ids = tf.placeholder(tf.int32, shape=[batch_size], name='item_ids')
        self.ratings = tf.placeholder(tf.float32, shape=[batch_size], name='ratings')

    def to_feed_dict(self, user_items_df, with_ratings=False):
        features_dict = {
            self.user_ids: user_items_df.user.values,
            self.item_ids: user_items_df.item.values
        }
        
        if with_ratings:
            features_dict[self.ratings] = user_items_df.rating.values

        return features_dict


class UserItem2BinaryModel:
    def __init__(self, dimensionality=30):
        self.dimensionality = dimensionality
        
        with tf.name_scope('B'):
            self.user_biases =  tf.Variable(tf.random_normal(shape=[N_USERS, 1], stddev=0.01, mean=0))
            tf.summary.histogram('user_biases', self.user_biases)

        with tf.name_scope('B'):
            self.item_biases =  tf.Variable(tf.random_normal(shape=[N_ITEMS, 1], stddev=0.01, mean=0))
            tf.summary.histogram('item_biases', self.item_biases)

        with tf.name_scope('X'):
            self.user_factors = tf.Variable(tf.random_normal([N_USERS, self.dimensionality], stddev=0.01, mean=0))
            tf.summary.histogram('user_factors', self.user_factors)
            
        with tf.name_scope('Y'):
            self.item_factors = tf.Variable(tf.random_normal([N_ITEMS, self.dimensionality], stddev=0.01, mean=0))
            tf.summary.histogram('item_factors', self.item_factors)

    def user_bias(self, user_ids):
        with tf.name_scope('B_user'):
            return tf.squeeze(tf.nn.embedding_lookup(params=self.user_biases, ids=user_ids), name='B_user')

    def item_bias(self, item_ids):
        with tf.name_scope('C_item'):
            return tf.squeeze(tf.nn.embedding_lookup(params=self.item_biases, ids=item_ids), name='C_item')

    def user_item_product(self, user_ids, item_ids):
        with tf.name_scope('X_user'):
            batch_user_factors = tf.squeeze(tf.nn.embedding_lookup(self.user_factors, user_ids))
        with tf.name_scope('Y_item'):
            batch_item_factors = tf.squeeze(tf.nn.embedding_lookup(self.item_factors, item_ids))
        with tf.name_scope('dot'):
            factors_prediction = tf.reduce_mean(
                tf.mul(batch_user_factors, batch_item_factors), reduction_indices=1)
        return factors_prediction

    def predictions(self, user_ids, item_ids):
        with tf.name_scope('inference'):
            return tf.add(
                self.user_item_product(user_ids, item_ids), 
                tf.add(self.user_bias(user_ids), self.item_bias(item_ids), name='biases'),
                name='logits')
                                 

def log_loss(predictions, targets):
    """ targets as one-hot encodings
    """
    with tf.name_scope('log_loss'):
        return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=predictions, targets=targets))

In [69]:
import os

LEARNING_RATE = 0.01
N_ITER = 101
BATCH_SIZE = 1024
N_STEP_SUMMARY = 20
LOG_DIR = '/tmp/tfrecs_logs'

with tf.Graph().as_default():
    model = UserItem2BinaryModel(dimensionality=10)
    inputs = Placeholders()
    
    logits = model.predictions(inputs.user_ids, inputs.item_ids)
    
    loss = log_loss(logits, inputs.ratings)
    tf.summary.scalar('train_loss', loss)
    summary = tf.summary.merge_all()
    test_summary = tf.summary.scalar('test_loss', loss)

    train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
                
    def perform_step(step, train, test, summary_writer):
        batch_samples = sample_batch(train_df, BATCH_SIZE)

        _, loss_value, summary_value = sess.run(
            fetches=[train_step, loss, summary], 
            feed_dict=inputs.to_feed_dict(batch_samples, with_ratings=True))
        
        summary_writer.add_summary(summary_value, global_step=step)

        if step % N_STEP_SUMMARY == 0:

            test_samples = sample_batch(test_df, BATCH_SIZE)
            test_loss_value, test_summary_value = sess.run(
                fetches=[loss, test_summary],
                feed_dict=inputs.to_feed_dict(test_samples, with_ratings=True))
            summary_writer.add_summary(test_summary_value, global_step=step)

            # predicting on all users and items
            all_prediction_values = logits.eval(feed_dict=inputs.to_feed_dict(all_user_items))
            
            print('Step %d: batch/test log loss = %.3f/%.3f, train/test MRR = %.3f/%.3f' % (
                    step, loss_value, test_loss_value, 
                    mean_reciprocal_rank(all_predictions_to_hits(
                        all_user_items, all_prediction_values, train_df.query("rating > 0"))).mean(),
                    mean_reciprocal_rank(all_predictions_to_hits(
                        all_user_items, all_prediction_values, test_df.query("rating > 0"))).mean()
                ))

        summary_writer.flush()

    with tf.Session() as sess:

        summary_writer = tf.summary.FileWriter(LOG_DIR + '/{:%Y%m%d%H%M%S}'.format(dt.datetime.now()), sess.graph)

        print('Starting training')
        sess.run(tf.global_variables_initializer())
        
        for step in range(N_ITER):
            perform_step(step, train_df, test_df, summary_writer)

        train_hits = all_predicted_hits(
            lambda user_items: logits.eval(inputs.to_feed_dict(user_items)),
            train_df, split_size=100)

        test_hits = all_predicted_hits(
            lambda user_items: logits.eval(inputs.to_feed_dict(user_items)),
            test_df.query("rating > 0"), split_size=100)

Starting training
Step 0: batch/test log loss = 0.693/0.692, train/test MRR = 0.237/0.053
Step 20: batch/test log loss = 0.661/0.661, train/test MRR = 0.480/0.128
Step 40: batch/test log loss = 0.633/0.643, train/test MRR = 0.496/0.137
Step 60: batch/test log loss = 0.612/0.627, train/test MRR = 0.560/0.157
Step 80: batch/test log loss = 0.598/0.610, train/test MRR = 0.546/0.143
Step 100: batch/test log loss = 0.588/0.601, train/test MRR = 0.550/0.143


In [59]:
print(train_hits.shape)
print(test_hits.shape)
        
print(mean_reciprocal_rank(train_hits).mean())
print(mean_reciprocal_rank(test_hits).mean())
test_hits.head(5)

(90570, 5)
(5469, 5)
0.502520948841
0.112540172658


Unnamed: 0,item,rating,user,predicted_rating,rank
0,19,1,0,0.146599,412.0
1,32,1,0,-0.150742,669.0
2,60,1,0,0.379277,254.0
3,159,1,0,0.048568,512.0
4,170,1,0,0.455228,199.0


In [26]:
from scipy import sparse

def to_sparse_ranks(df, dtype=np.float32):
    return sparse.csr_matrix((df['rank'], (df.user, df.item)), dtype=dtype)

from lightfm._lightfm_fast import CSRMatrix, calculate_auc_from_rank

def hits_to_auc(test_hits):
    """
    Simplification of https://github.com/lyst/lightfm/blob/master/lightfm/evaluation.py#L136
    """
    ranks = to_sparse_ranks(test_hits)
    auc = np.zeros(ranks.shape[0], dtype=np.float32)
    num_train_positives = np.zeros(ranks.shape[0], dtype=np.int32)
    calculate_auc_from_rank(CSRMatrix(sparse.csr_matrix(ranks, dtype=np.float32)), num_train_positives, ranks.data, auc, num_threads=1)

    return auc

def hits_to_mrr(test_hits):
    """ From https://github.com/lyst/lightfm/blob/master/lightfm/evaluation.py#L206
    """
    ranks = to_sparse_ranks(test_hits)

    ranks.data = 1.0 / (ranks.data + 1.0)
    return np.squeeze(np.array(ranks.max(axis=1).todense()))


print(
    hits_to_mrr(test_hits).shape,
    hits_to_mrr(test_hits).mean(),
    hits_to_auc(test_hits).mean())

# TODO investigate why test_hits doesn't cover all test users
print(mean_reciprocal_rank(test_hits).shape, mean_reciprocal_rank(test_hits).mean())

(943,) 0.162602 0.847304
(934,) 0.164169242148


## Multi-class classification

In [24]:
class User2MulticClassItemsModel:
    def __init__(self, dimensionality=10):
        self.dimensionality = dimensionality
            
        with tf.name_scope('item_biases'):
            self.item_biases =  tf.Variable(tf.random_normal(shape=[N_ITEMS], stddev=0.01, mean=0), name='item_features_biases')
            tf.summary.histogram('item_biases', self.item_biases)

        with tf.name_scope('user_factors'):
            self.user_factors = tf.Variable(tf.random_normal([N_USERS, self.dimensionality], stddev=0.01, mean=0), name='users')
            tf.summary.histogram('user_factors', self.user_factors)
            
        with tf.name_scope('item_factors'):
            self.item_factors = tf.Variable(tf.random_normal([N_ITEMS, self.dimensionality], stddev=0.01, mean=0), name='users')
            tf.summary.histogram('item_factors', self.item_factors)

    def output_items_scores(self, user_ids):
        with tf.name_scope('user_item_product'):
            return self.item_biases + tf.matmul(
                tf.nn.embedding_lookup(self.user_factors, user_ids),
                tf.transpose(self.item_factors), name='user_to_all_items_logits')

def multiclass_loss(logits, target_item_ids):
    with tf.name_scope('loss'):
        return tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits,
                labels=target_item_ids))
    
import os

LEARNING_RATE = 0.001
N_ITER = 201
BATCH_SIZE = 1024
N_STEP_SUMMARY = 20
LOG_DIR = '/tmp/tf_logs'

with tf.Graph().as_default():
    inputs = Placeholders()
        
    model = User2MulticClassItemsModel(30)
    logits = model.output_items_scores(inputs.user_ids)
    loss = multiclass_loss(logits, inputs.item_ids)
    
    tf.summary.scalar('train_loss', loss)
    summary = tf.summary.merge_all()
    test_summary = tf.summary.scalar('test_loss', loss)

    train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
                
    def perform_step(step, train, test, summary_writer):
        batch_samples = train_df.query("rating == 1").sample(BATCH_SIZE)

        _, loss_value, summary_value = sess.run(
            fetches=[train_step, loss, summary], 
            feed_dict=inputs.to_feed_dict(batch_samples, with_ratings=False))
        
        summary_writer.add_summary(summary_value, global_step=step)

        if step % N_STEP_SUMMARY == 0:

            test_samples = sample_batch(test_df, BATCH_SIZE)
            test_loss_value, test_summary_value = sess.run(
                fetches=[loss, test_summary],
                feed_dict=inputs.to_feed_dict(test_samples, with_ratings=True))
            summary_writer.add_summary(test_summary_value, global_step=step)

            # predicting on all users and items
            all_prediction_values = logits.eval(feed_dict={inputs.user_ids: all_user_ids}).ravel()
            print('Step %d: batch/test log loss = %.3f/%.3f, train/test MRR = %.3f/%.3f' % (
                    step, loss_value, test_loss_value, 
                    mean_reciprocal_rank(all_predictions_to_hits(
                        all_user_items, all_prediction_values, train_df.query("rating > 0"))).mean(),
                    mean_reciprocal_rank(all_predictions_to_hits(
                        all_user_items, all_prediction_values, test_df.query("rating > 0"))).mean()
                ))

        summary_writer.flush()

    with tf.Session() as sess:

        summary_writer = tf.summary.FileWriter(LOG_DIR + '/{:%Y%m%d%H%M%S}'.format(dt.datetime.now()), sess.graph)

        print('Starting training')
        sess.run(tf.global_variables_initializer())
        
        for step in range(N_ITER):
            perform_step(step, train_df, test_df, summary_writer)

        all_prediction_values = logits.eval(feed_dict={inputs.user_ids: all_user_ids}).ravel()
        train_hits = all_predictions_to_hits(
            all_user_items, all_prediction_values,
            train_df.query("rating > 0"))

        test_hits = all_predictions_to_hits(
            all_user_items, all_prediction_values,
            test_df.query("rating > 0"))

Starting training
Step 0: batch/test log loss = 7.428/7.427, train/test MRR = 0.095/0.026
Step 20: batch/test log loss = 7.408/7.410, train/test MRR = 0.362/0.085
Step 40: batch/test log loss = 7.378/7.385, train/test MRR = 0.392/0.098
Step 60: batch/test log loss = 7.301/7.339, train/test MRR = 0.409/0.107
Step 80: batch/test log loss = 7.184/7.262, train/test MRR = 0.469/0.123
Step 100: batch/test log loss = 7.022/7.152, train/test MRR = 0.535/0.153
Step 120: batch/test log loss = 6.851/7.060, train/test MRR = 0.488/0.132
Step 140: batch/test log loss = 6.752/6.963, train/test MRR = 0.507/0.134
Step 160: batch/test log loss = 6.634/6.887, train/test MRR = 0.574/0.157
Step 180: batch/test log loss = 6.555/6.850, train/test MRR = 0.578/0.164
Step 200: batch/test log loss = 6.547/6.831, train/test MRR = 0.569/0.164


In [25]:
print(
    hits_to_mrr(test_hits).shape,
    hits_to_mrr(test_hits).mean(),
    hits_to_auc(test_hits).mean())

(943,) 0.162602 0.847304


## LightFM comparison

In [60]:
from lightfm.evaluation import reciprocal_rank, auc_score, recall_at_k

score_functions = [reciprocal_rank, auc_score, recall_at_k]
stat_functions = [np.mean, np.std, np.median, np.size]

def scores_stats(scores, stat_functions):
    return [f(scores) for f in stat_functions]

pd.DataFrame(
    index=[s.__name__ for s in score_functions],
    data=[scores_stats(
        score(cf_model, 
              test_interactions=data['test'], train_interactions=data['train']),
        stat_functions)
        for score in score_functions],
    columns=[s.__name__ for s in stat_functions])

Unnamed: 0,mean,std,median,size
reciprocal_rank,0.500372,0.384439,0.333333,943
auc_score,0.844125,0.115616,0.873672,943
recall_at_k,0.178155,0.131675,0.2,943


In [1]:
from lightfm import LightFM
from lightfm.evaluation import reciprocal_rank, auc_score, recall_at_k, precision_at_k

cf_model = LightFM(loss='logistic', item_alpha=0.0001, no_components=10, learning_rate=0.001)
cf_model.fit(train, epochs=20)

print('Collaborative filtering train/test MRR: %.3f / %.3f'
      % (reciprocal_rank(cf_model, data['train']).mean(),
         reciprocal_rank(cf_model, data['test']).mean()))

print('Collaborative filtering train/test AUC: %.3f / %.3f'
      % (auc_score(cf_model, data['train']).mean(),
         auc_score(cf_model, data['test'], train_interactions=None).mean()))



NameError: name 'train' is not defined

In [56]:
from lightfm import LightFM
from lightfm.evaluation import reciprocal_rank, auc_score, recall_at_k

cf_model = LightFM(loss='bpr', item_alpha=0.0001, no_components=20)
cf_model.fit(train, epochs=10)

print('Collaborative filtering train/test MRR: %.3f / %.3f'
      % (reciprocal_rank(cf_model, data['train']).mean(),
         reciprocal_rank(cf_model, data['test']).mean()))

print('Collaborative filtering train/test AUC: %.3f / %.3f'
      % (auc_score(cf_model, data['train']).mean(),
         auc_score(cf_model, data['test'], train_interactions=None).mean()))

Collaborative filtering train/test MRR: 0.807 / 0.254
Collaborative filtering train/test AUC: 0.853 / 0.826


In [28]:
from lightfm import LightFM
from lightfm.evaluation import reciprocal_rank, auc_score

cf_model = LightFM(loss='warp', item_alpha=0.0001, no_components=20)
cf_model.fit(train, epochs=10)

print('Collaborative filtering train/test MRR: %.3f / %.3f'
      % (reciprocal_rank(cf_model, data['train']).mean(),
         reciprocal_rank(cf_model, data['test']).mean()))

print('Collaborative filtering train/test AUC: %.3f / %.3f'
      % (auc_score(cf_model, data['train']).mean(),
         auc_score(cf_model, data['test'], train_interactions=None).mean()))

Collaborative filtering train/test MRR: 0.825 / 0.272
Collaborative filtering train/test AUC: 0.922 / 0.889
