# RePlay recommender models comparison

### Dataset
We will compare RePlay models on __MovieLens 1m__. 

### Dataset preprocessing: 
Ratings greater than or equal to 3 are considered as positive interactions.

### Data split
Dataset is split by date so that 20% of the last interactions as are placed in the test part. Cold items and users are dropped.

### Predict:
We will predict top-10 most relevant films for each user.

### Metrics
Quality metrics used:__ndcg@k, hitrate@k, map@k, mrr@k__ for k = 1, 5, 10
Additional metrics used: __coverage@k__ and __surprisal@k__.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%config Completer.use_jedi = False

In [3]:
import logging
import pandas as pd
import time


from pyspark.sql import functions as sf, types as st

from replay.data_preparator import DataPreparator
from replay.experiment import Experiment
from replay.metrics import Coverage, HitRate, MRR, MAP, NDCG, Surprisal
from replay.models import (
    ALSWrap, 
    ADMMSLIM, 
    ClassifierRec, 
    KNN, 
    LightFMWrap, 
    MultVAE, 
    NeuroMF, 
    SLIM, 
    Stack,
    PopRec, 
    RandomRec, 
    Wilson, 
    Word2VecRec
)

from replay.models.base_rec import HybridRecommender
from replay.session_handler import State
from replay.splitters import DateSplitter
from replay.utils import get_log_info

In [4]:
logger = logging.getLogger("replay")

In [5]:
spark = State().session
spark

In [6]:
from logging import INFO
State().logger.setLevel(INFO)

In [7]:
K = 10
K_list_metrics = [1, 5, 10]
BUDGET = 20
SEED = 12345

## 0. Preprocessing <a name='data-preparator'></a>

### 0.1 Data loading

In [8]:
from rs_datasets import MovieLens

data = MovieLens("1m")
data.info()

ratings


Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968



users


Unnamed: 0,user_id,gender,age,occupation,zip_code
0,1,F,1,10,48067
1,2,M,56,16,70072
2,3,M,25,15,55117



items


Unnamed: 0,item_id,title,genres
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance





#### log preprocessing

In [9]:
# converting log of interactions to spark-dataframe format
log = DataPreparator().transform(
    data=data.ratings,
    columns_names={
        "user_id": "user_id",
        "item_id": "item_id",
        "relevance": "rating",
        "timestamp": "timestamp"
    }
)
print(get_log_info(log))

total lines: 1000209, total users: 6040, total items: 3706


In [10]:
# will consider ratings >= 3 as positive feedback.  Positive feedback is treated with relevance = 1
only_positives_log = log.filter(sf.col('relevance') >= 3).withColumn('relevance', sf.lit(1))
only_positives_log.count()

836478

In [11]:
user_features=None
item_features=None

### 0.2. Data split

In [12]:
# train/test split 
train_spl = DateSplitter(
    test_start=0.2,
    drop_cold_items=True,
    drop_cold_users=True,
)
train, test = train_spl.split(only_positives_log)
print('train info:\n', get_log_info(train))
print('test info:\n', get_log_info(test))

train info:
 total lines: 669181, total users: 5397, total items: 3569
test info:
 total lines: 86542, total users: 1139, total items: 3279


In [13]:
# train/test split for hyperparameters selection
opt_train, opt_val = train_spl.split(train)
opt_train.count(), opt_val.count()

(535343, 24241)

In [14]:
# negative feedback will be used for Classifier and Wilson models
only_negatives_log = log.filter(sf.col('relevance') < 3).withColumn('relevance', sf.lit(0.))
test_start = test.agg(sf.min('timestamp')).collect()[0][0]

# train with both positive and negative feedback
pos_neg_train=(train
              .withColumn('relevance', sf.lit(1))
              .union(only_negatives_log.filter(sf.col('timestamp') < test_start))
             )
pos_neg_train.count()

798993

In [15]:
train.show(2)

+-------+-------+---------+-------------------+
|user_id|item_id|relevance|          timestamp|
+-------+-------+---------+-------------------+
|    637|   3930|        1|2000-12-02 05:30:12|
|    637|   3932|        1|2000-12-02 05:53:52|
+-------+-------+---------+-------------------+
only showing top 2 rows



# 1. Metrics definition

In [16]:
# experiment is used for metrics calculation
e = Experiment(test, {MAP(): K, NDCG(): K, HitRate(): K_list_metrics, Coverage(train): K, Surprisal(train): K, MRR(): K})

# 2. Models training

## 2.1. Non-personalized models

In [17]:
non_personalized_models = {'Popular Recommender': [PopRec(), 'no_opt'], 
          'Random Recommender (uniform)': [RandomRec(seed=SEED, distribution='uniform'), 'no_opt'], 
          'Random Recommender (popularity-based)': [RandomRec(seed=SEED, distribution='popular_based'), {"alpha": [-0.5, 100]}],
          'Wilson Recommender': [Wilson(), 'no_opt']}

In [18]:
def fit_predict_add_res(name, model, experiment, train, suffix=''):
    """
    Run fit_predict for the `model`, measure time on fit_predict and evaluate metrics
    """
    start_time=time.time()
    
    fit_predict_params = {'log': train, 'k': K, 'users': test.select('user_id').distinct()}
    if isinstance(model, Wilson) or isinstance(model, ClassifierRec):
        fit_predict_params['log'] = pos_neg_train

    if isinstance(model, HybridRecommender):
        fit_predict_params['item_features'] = item_features
        fit_predict_params['user_features'] = user_features
    
    pred=model.fit_predict(**fit_predict_params)
    pred.count()
    fit_predict_time = time.time() - start_time
    
    experiment.add_result(name + suffix, pred)
    experiment.results.loc[name + suffix, 'fit_pred_time'] = fit_predict_time
    
    print(experiment.results[['NDCG@{}'.format(K), 'MRR@{}'.format(K), 'Coverage@{}'.format(K), 'fit_pred_time']].sort_values('NDCG@{}'.format(K), ascending=False))

In [19]:
def full_pipeline(models, experiment, train, suffix='', budget=BUDGET):
    """
    For each model:
        -  if required: run hyperparameters search, set best params and save param values to `experiment`
        - pass model to `fit_predict_add_res`        
    """
    
    for name, [model, params] in models.items():
        model.logger.info(msg='{} started'.format(name))
        if params != 'no_opt':
            model.logger.info(msg='{} optimization started'.format(name))
            best_params = model.optimize(opt_train, 
                                         opt_val, 
                                         param_grid=params, 
                                         item_features=item_features,
                                         user_features=user_features,
                                         k=K, 
                                         budget=budget)
            model.set_params(**best_params)
            logger.info(msg='best params for {} are: {}'.format(name, best_params))
            experiment.results.loc[name + suffix, 'params'] = best_params.__repr__()
        
        logger.info(msg='{} fit_predict started'.format(name))
        fit_predict_add_res(name, model, experiment, train, suffix)        

In [None]:
%%time
full_pipeline(non_personalized_models, e, train)

In [21]:
e.results.sort_values('NDCG@10', ascending=False)

Unnamed: 0,Coverage@10,HitRate@1,HitRate@5,HitRate@10,MAP@10,MRR@10,NDCG@10,Surprisal@10,fit_pred_time,params
Popular Recommender,0.033903,0.28446,0.53029,0.645303,0.157194,0.390414,0.243614,0.118354,12.300273,
Wilson Recommender,0.017092,0.083406,0.34504,0.414399,0.045002,0.180976,0.092121,0.26219,10.033913,
Random Recommender (popularity-based),0.653965,0.060579,0.255487,0.381914,0.028404,0.141636,0.069369,0.317856,6.827084,{'alpha': 1.2948997611910968}
Random Recommender (uniform),0.960773,0.032485,0.107112,0.183494,0.009075,0.067583,0.025557,0.53693,7.897583,


In [22]:
e.results.to_csv('res_21_rel_1.csv')

## 2.2  Personalized models without features

In [23]:
common_models = {
          'ADMM SLIM': [ADMMSLIM(seed=SEED), None],
          'Implicit ALS': [ALSWrap(seed=SEED), None], 
          'Explicit ALS': [ALSWrap(seed=SEED, implicit_prefs=False), None], 
          'KNN': [KNN(), None], 
          'LightFM': [LightFMWrap(random_state=SEED), {"no_components": [8, 512]}], 
          'SLIM': [SLIM(seed=SEED), None]}

In [None]:
%%time
full_pipeline(common_models, e, train)

In [25]:
e.results.sort_values('NDCG@10', ascending=False)

Unnamed: 0,Coverage@10,HitRate@1,HitRate@5,HitRate@10,MAP@10,MRR@10,NDCG@10,Surprisal@10,fit_pred_time,params
Implicit ALS,0.13281,0.305531,0.569798,0.685689,0.171672,0.419297,0.265372,0.162866,10.127573,{'rank': 8}
LightFM,0.151303,0.317823,0.574188,0.698859,0.167327,0.431049,0.262777,0.168066,10.833092,{'no_components': 8}
SLIM,0.040347,0.310799,0.567164,0.669008,0.171509,0.418741,0.26137,0.123728,12.456171,"{'beta': 4.528603379741062, 'lambda_': 0.01886..."
KNN,0.055758,0.294996,0.555751,0.65496,0.166407,0.408699,0.256174,0.137584,17.963558,"{'num_neighbours': 56, 'shrink': 99}"
Popular Recommender,0.033903,0.28446,0.53029,0.645303,0.157194,0.390414,0.243614,0.118354,12.300273,
ADMM SLIM,0.366769,0.188762,0.460053,0.590869,0.084121,0.303578,0.159086,0.236767,77.647394,"{'lambda_1': 0.0017369838173267552, 'lambda_2'..."
Wilson Recommender,0.017092,0.083406,0.34504,0.414399,0.045002,0.180976,0.092121,0.26219,10.033913,
Random Recommender (popularity-based),0.653965,0.060579,0.255487,0.381914,0.028404,0.141636,0.069369,0.317856,6.827084,{'alpha': 1.2948997611910968}
Random Recommender (uniform),0.960773,0.032485,0.107112,0.183494,0.009075,0.067583,0.025557,0.53693,7.897583,
Explicit ALS,0.265621,0.013169,0.055312,0.093064,0.004738,0.032544,0.013044,0.684305,13.875534,{'rank': 60}


In [26]:
e.results.to_csv('res_22_rel_1.csv')

## 2.3 Neural models

In [27]:
nets = {'MultVAE with default parameters': [MultVAE(), 'no_opt'],
        'NeuroMF with default parameters': [NeuroMF(), 'no_opt'], 
        'Word2Vec with default parameters': [Word2VecRec(seed=SEED), 'no_opt'],
        'MultVAE with optimized parameters': [MultVAE(), {"learning_rate": [0.0001, 0.5],
                                   "dropout": [0, 0.5],
                                    "l2_reg": [1e-9, 5]
                                   }],
        'NeuroMF with optimized parameters': [NeuroMF(), {
                                    "learning_rate": [0.0001, 0.5],
                                    "l2_reg": [1e-4, 5],
                                    "count_negative_sample": [1, 20]
                                    }],
        'Word2Vec with optimized parameters': [Word2VecRec(seed=SEED), None]}

In [None]:
%%time
full_pipeline(nets, e, train, budget=10)

In [29]:
e.results.sort_values('NDCG@10', ascending=False)

Unnamed: 0,Coverage@10,HitRate@1,HitRate@5,HitRate@10,MAP@10,MRR@10,NDCG@10,Surprisal@10,fit_pred_time,params
Implicit ALS,0.13281,0.305531,0.569798,0.685689,0.171672,0.419297,0.265372,0.162866,10.127573,{'rank': 8}
LightFM,0.151303,0.317823,0.574188,0.698859,0.167327,0.431049,0.262777,0.168066,10.833092,{'no_components': 8}
SLIM,0.040347,0.310799,0.567164,0.669008,0.171509,0.418741,0.26137,0.123728,12.456171,"{'beta': 4.528603379741062, 'lambda_': 0.01886..."
KNN,0.055758,0.294996,0.555751,0.65496,0.166407,0.408699,0.256174,0.137584,17.963558,"{'num_neighbours': 56, 'shrink': 99}"
Popular Recommender,0.033903,0.28446,0.53029,0.645303,0.157194,0.390414,0.243614,0.118354,12.300273,
MultVAE with default parameters,0.032222,0.286216,0.519754,0.658472,0.154847,0.39379,0.243479,0.121923,32.609149,
MultVAE with optimized parameters,0.030821,0.287094,0.543459,0.640035,0.150969,0.395733,0.237955,0.122875,26.977435,"{'learning_rate': 0.010693178531368242, 'dropo..."
NeuroMF with optimized parameters,0.076772,0.021949,0.524144,0.653205,0.114313,0.243165,0.198788,0.231221,2791.187742,"{'learning_rate': 0.004837890834754644, 'l2_re..."
NeuroMF with default parameters,0.257495,0.187006,0.501317,0.626866,0.110592,0.317911,0.193122,0.235454,350.737231,
ADMM SLIM,0.366769,0.188762,0.460053,0.590869,0.084121,0.303578,0.159086,0.236767,77.647394,"{'lambda_1': 0.0017369838173267552, 'lambda_2'..."


In [30]:
e.results.to_csv('res_23_rel_1.csv')

## 2.4 Ensembles of recommenders

In [32]:
ensembles = {'Stack Recommender (LightFM + KNN + ALS)': [Stack(
    models=[LightFMWrap(random_state=SEED, no_components=common_models['LightFM'][0].no_components), 
            KNN(**{'num_neighbours': common_models['KNN'][0].num_neighbours, 'shrink': common_models['KNN'][0].shrink}), 
            ALSWrap(seed=SEED, rank=common_models['Implicit ALS'][0].rank)],
    n_folds=3,
    budget=BUDGET,
    seed=SEED), 'no_opt']}

In [33]:
State().logger.setLevel(logging.DEBUG)

In [None]:
%%time
full_pipeline(ensembles, e, train)

In [35]:
e.results.sort_values('NDCG@10', ascending=False)

Unnamed: 0,Coverage@10,HitRate@1,HitRate@5,HitRate@10,MAP@10,MRR@10,NDCG@10,Surprisal@10,fit_pred_time,params
Implicit ALS,0.13281,0.305531,0.569798,0.685689,0.171672,0.419297,0.265372,0.162866,10.127573,{'rank': 8}
LightFM,0.151303,0.317823,0.574188,0.698859,0.167327,0.431049,0.262777,0.168066,10.833092,{'no_components': 8}
SLIM,0.040347,0.310799,0.567164,0.669008,0.171509,0.418741,0.26137,0.123728,12.456171,"{'beta': 4.528603379741062, 'lambda_': 0.01886..."
Stack Recommender (LightFM + KNN + ALS),0.057439,0.304653,0.562774,0.661106,0.169578,0.416054,0.26057,0.136999,1057.291856,
KNN,0.055758,0.294996,0.555751,0.65496,0.166407,0.408699,0.256174,0.137584,17.963558,"{'num_neighbours': 56, 'shrink': 99}"
Popular Recommender,0.033903,0.28446,0.53029,0.645303,0.157194,0.390414,0.243614,0.118354,12.300273,
MultVAE with default parameters,0.032222,0.286216,0.519754,0.658472,0.154847,0.39379,0.243479,0.121923,32.609149,
MultVAE with optimized parameters,0.030821,0.287094,0.543459,0.640035,0.150969,0.395733,0.237955,0.122875,26.977435,"{'learning_rate': 0.010693178531368242, 'dropo..."
NeuroMF with optimized parameters,0.076772,0.021949,0.524144,0.653205,0.114313,0.243165,0.198788,0.231221,2791.187742,"{'learning_rate': 0.004837890834754644, 'l2_re..."
NeuroMF with default parameters,0.257495,0.187006,0.501317,0.626866,0.110592,0.317911,0.193122,0.235454,350.737231,


In [37]:
# weights of each recommender in ensemble
ensembles['Stack Recommender (LightFM + KNN + ALS)'][0].params

{'LightFMWrap': 0.9461744498894694,
 'KNN': 0.4653759137084064,
 'ALSWrap': 0.4443127394321243}

In [38]:
e.results.to_csv('res_24_rel_1.csv')

## 2.5 Models considering features

### 2.5.1 item features preprocessing

In [39]:
%%time
item_features = DataPreparator().transform(
    data=data.items,
    columns_names={
        "item_id": "item_id"
    }
)

CPU times: user 41.9 ms, sys: 0 ns, total: 41.9 ms
Wall time: 129 ms


In [40]:
item_features.show(2)

+-------+--------------------+----------------+
|item_id|              genres|           title|
+-------+--------------------+----------------+
|      1|Animation|Childre...|Toy Story (1995)|
|      2|Adventure|Childre...|  Jumanji (1995)|
+-------+--------------------+----------------+
only showing top 2 rows



In [41]:
year = item_features.withColumn('year', sf.substring(sf.col('title'), -5, 4).astype(st.IntegerType())).select('item_id', 'year')
year.show(2)

+-------+----+
|item_id|year|
+-------+----+
|      1|1995|
|      2|1995|
+-------+----+
only showing top 2 rows



In [42]:
genres = (
    State().session.createDataFrame(data.items[["item_id", "genres"]])
    .select(
        "item_id",
        sf.split("genres", "\|").alias("genres")
    )
)

In [43]:
genres_list = (
    genres.select(sf.explode("genres").alias("genre"))
    .distinct().filter('genre <> "(no genres listed)"')
    .toPandas()["genre"].tolist()
)

In [44]:
genres_list

['Documentary',
 'Fantasy',
 'Adventure',
 'War',
 'Animation',
 'Comedy',
 'Thriller',
 'Film-Noir',
 'Crime',
 'Sci-Fi',
 'Musical',
 'Mystery',
 "Children's",
 'Drama',
 'Horror',
 'Western',
 'Romance',
 'Action']

In [45]:
from pyspark.sql.functions import col, lit, array_contains
from pyspark.sql.types import IntegerType

item_features = genres
for genre in genres_list:
    item_features = item_features.withColumn(
        genre,
        array_contains(col("genres"), genre).astype(IntegerType())
    )
item_features = item_features.drop("genres").cache()
item_features.count()

3883

In [46]:
item_features = item_features.join(year, on='item_id', how='inner')
item_features.count()

3883

In [47]:
item_features.cache()

DataFrame[item_id: int, Documentary: int, Fantasy: int, Adventure: int, War: int, Animation: int, Comedy: int, Thriller: int, Film-Noir: int, Crime: int, Sci-Fi: int, Musical: int, Mystery: int, Children's: int, Drama: int, Horror: int, Western: int, Romance: int, Action: int, year: int]

In [48]:
item_features.show(3)

+-------+-----------+-------+---------+---+---------+------+--------+---------+-----+------+-------+-------+----------+-----+------+-------+-------+------+----+
|item_id|Documentary|Fantasy|Adventure|War|Animation|Comedy|Thriller|Film-Noir|Crime|Sci-Fi|Musical|Mystery|Children's|Drama|Horror|Western|Romance|Action|year|
+-------+-----------+-------+---------+---+---------+------+--------+---------+-----+------+-------+-------+----------+-----+------+-------+-------+------+----+
|      1|          0|      0|        0|  0|        1|     1|       0|        0|    0|     0|      0|      0|         1|    0|     0|      0|      0|     0|1995|
|      2|          0|      1|        1|  0|        0|     0|       0|        0|    0|     0|      0|      0|         1|    0|     0|      0|      0|     0|1995|
|      3|          0|      0|        0|  0|        0|     1|       0|        0|    0|     0|      0|      0|         0|    0|     0|      0|      1|     0|1995|
+-------+-----------+-------+-----

### 2.5.2 Models training

In [49]:
models_with_features = {'Classifier Recommender': [ClassifierRec(), 'no_opt'],
        'LightFM with item features': [LightFMWrap(random_state=SEED), {"no_components": [8, 512]}]}

In [None]:
%%time
full_pipeline(models_with_features, e, train)

In [51]:
e.results.sort_values('NDCG@10', ascending=False)

Unnamed: 0,Coverage@10,HitRate@1,HitRate@5,HitRate@10,MAP@10,MRR@10,NDCG@10,Surprisal@10,fit_pred_time,params
Implicit ALS,0.13281,0.305531,0.569798,0.685689,0.171672,0.419297,0.265372,0.162866,10.127573,{'rank': 8}
LightFM,0.151303,0.317823,0.574188,0.698859,0.167327,0.431049,0.262777,0.168066,10.833092,{'no_components': 8}
SLIM,0.040347,0.310799,0.567164,0.669008,0.171509,0.418741,0.26137,0.123728,12.456171,"{'beta': 4.528603379741062, 'lambda_': 0.01886..."
Stack Recommender (LightFM + KNN + ALS),0.057439,0.304653,0.562774,0.661106,0.169578,0.416054,0.26057,0.136999,1057.291856,
KNN,0.055758,0.294996,0.555751,0.65496,0.166407,0.408699,0.256174,0.137584,17.963558,"{'num_neighbours': 56, 'shrink': 99}"
LightFM with item features,0.231718,0.287972,0.585601,0.690957,0.159831,0.412271,0.254673,0.194597,86.704299,{'no_components': 78}
Popular Recommender,0.033903,0.28446,0.53029,0.645303,0.157194,0.390414,0.243614,0.118354,12.300273,
MultVAE with default parameters,0.032222,0.286216,0.519754,0.658472,0.154847,0.39379,0.243479,0.121923,32.609149,
MultVAE with optimized parameters,0.030821,0.287094,0.543459,0.640035,0.150969,0.395733,0.237955,0.122875,26.977435,"{'learning_rate': 0.010693178531368242, 'dropo..."
NeuroMF with optimized parameters,0.076772,0.021949,0.524144,0.653205,0.114313,0.243165,0.198788,0.231221,2791.187742,"{'learning_rate': 0.004837890834754644, 'l2_re..."


In [52]:
e.results.to_csv('res_25_rel_1.csv')

# 3. Results

The best results by quality and time were shown by the commonly-used models such as ALS, SLIM and LightFM. 