<i>Copyright (c) Microsoft Corporation. All rights reserved.</i>

<i>Licensed under the MIT License.</i>

# Benchmark with Movielens dataset

This notebooks compares several algorithms on Movielens dataset.



## 0 Globals settings

In [1]:
import sys
sys.path.append("../")
import os
import json
import pandas as pd
import numpy as np
import seaborn as sns
import pyspark
import torch
import fastai
import tensorflow as tf
import surprise

from reco_utils.common.general_utils import get_number_processors
from reco_utils.common.gpu_utils import get_cuda_version, get_cudnn_version
from reco_utils.dataset import movielens
from reco_utils.dataset.python_splitters import python_stratified_split

from benchmark_utils import * #TODO: change this

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("PySpark version: {}".format(pyspark.__version__))
print("Surprise version: {}".format(surprise.__version__))
print("PyTorch version: {}".format(torch.__version__))
print("Fast AI version: {}".format(fastai.__version__))
print("Tensorflow version: {}".format(tf.__version__))
print("CUDA version: {}".format(get_cuda_version()))
print("CuDNN version: {}".format(get_cudnn_version()))
n_cores = get_number_processors()
print("Number of cores: {}".format(n_cores))

%load_ext autoreload
%autoreload 2

System version: 3.6.8 |Anaconda, Inc.| (default, Dec 30 2018, 01:22:34) 
[GCC 7.3.0]
Pandas version: 0.24.1
PySpark version: 2.3.1
Surprise version: 1.0.6
PyTorch version: 1.0.0
Fast AI version: 1.0.46
Tensorflow version: 1.12.0
CUDA version: CUDA Version 9.2.148
CuDNN version: 7.2.1
Number of cores: 6


In [3]:
# Model parameters
EPOCHS_CPU = 30
EPOCHS_PYSPARK = 15
EPOCHS_GPU = 5


In [4]:
# Hide fastai progress bar
hide_fastai_progress_bar()

In [5]:
# fix random seeds to make sure out runs are reproducible
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

In [6]:
environments = {
    "als": "pyspark",
    "sar": "python_cpu",
    "svd": "python_cpu",
    "fastai": "python_gpu",
    "ncf": "python_gpu",
}

metrics = {
    "als": ["rating", "ranking"],
    "sar": ["ranking"],
    "svd": ["rating", "ranking"],
    "fastai": ["rating", "ranking"],
    "ncf": ["ranking"],
}

In [7]:
als_params = {
    "rank": 10,
    "maxIter": EPOCHS_PYSPARK,
    "implicitPrefs": False,
    "alpha": 0.1,
    "regParam": 0.05,
    "coldStartStrategy": "drop",
    "nonnegative": False,
    "userCol": USER_COL,
    "itemCol": ITEM_COL,
    "ratingCol": RATING_COL,
}

sar_params = {
    "remove_seen": True,
    "similarity_type": "jaccard",
    "time_decay_coefficient": 30,
    "time_now": None,
    "timedecay_formula": True,
    "col_user": USER_COL,
    "col_item": ITEM_COL,
    "col_rating": RATING_COL,
    "col_timestamp": TIMESTAMP_COL,
}

svd_params = {
    "n_factors": 200,
    "n_epochs": EPOCHS_CPU,
    "lr_all": 0.005,
    "reg_all": 0.02,
    "random_state": SEED,
    "verbose": False
}

fastai_params = {
    "n_factors": 40, 
    "y_range": [0,5.5], 
    "wd": 1e-1,
    "max_lr": 5e-3,
    "epochs": EPOCHS_GPU
}

ncf_params = {
    "model_type": "NeuMF",
    "n_factors": 4,
    "layer_sizes": [16,8,4],
    "n_epochs": EPOCHS_GPU,
    "batch_size": 1024,
    "learning_rate": 1e-3,
    "verbose": 10
}


params = {
    "als": als_params,
    "sar": sar_params,
    "svd": svd_params,
    "fastai": fastai_params,
    "ncf": ncf_params,
}

In [8]:
prepare_training_data = {
    "als": prepare_training_als,
    "svd": prepare_training_svd,
    "fastai": prepare_training_fastai,
    "ncf": prepare_training_ncf,
}

In [9]:
prepare_metrics_data = {
    "als": lambda train, test: prepare_metrics_als(train, test),
    "fastai": lambda train, test: prepare_metrics_fastai(train, test),    
}

In [10]:
trainer = {
    "als": lambda params, data: train_als(params, data),
    "svd": lambda params, data: train_svd(params, data),
    "sar": lambda params, data: train_sar(params, data), 
    "fastai": lambda params, data: train_fastai(params, data),
    "ncf": lambda params, data: train_ncf(params, data),
}

In [11]:
rating_predictor = {
    "als": lambda model, test: predict_als(model, test),
    "svd": lambda model, test: predict_svd(model, test),
    "fastai": lambda model, test: predict_fastai(model, test),
}

In [12]:
ranking_predictor = {
    "als": lambda model, test, train: recommend_k_als(model, test, train),
    "sar": lambda model, test, train: recommend_k_sar(model, test, train),
    "svd": lambda model, test, train: recommend_k_svd(model, test, train),
    "fastai": lambda model, test, train: recommend_k_fastai(model, test, train),
    "ncf": lambda model, test, train: recommend_k_ncf(model, test, train),
}

In [13]:
rating_evaluator = {
    "als": lambda test, predictions: rating_metrics_pyspark(test, predictions),
    "svd": lambda test, predictions: rating_metrics_python(test, predictions),
    "fastai": lambda test, predictions: rating_metrics_python(test, predictions)
}
    
    
ranking_evaluator = {
    "als": lambda test, predictions, k: ranking_metrics_pyspark(test, predictions, k),
    "sar": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
    "svd": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
    "fastai": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
    "ncf": lambda test, predictions, k: ranking_metrics_python(test, predictions, k),
}

In [14]:
def generate_summary(data, algo, k, train_time, time_rating=np.nan, rating_metrics=None, time_ranking=np.nan, ranking_metrics=None):
    summary = {"Data": data, "Algo": algo, "K": k, "Train time": train_time, "Rating time": time_rating, "Ranking time": time_ranking}
    if rating_metrics is None:
        rating_metrics = {
            "RMSE": np.nan,
            "MAE": np.nan,
            "R2": np.nan,
            "Explained Variance": np.nan,
        }
    if ranking_metrics is None:
        ranking_metrics = {
            "MAP": np.nan,
            "nDCG@k": np.nan,
            "Precision@k": np.nan,
            "Recall@k": np.nan,
        }
    summary.update(rating_metrics)
    summary.update(ranking_metrics)
    return summary

In [15]:
data_sizes = ["100k", "1m"] # Movielens data size: 100k, 1m, 10m, or 20m
algorithms = ["als", "svd", "sar", "ncf", "fastai"]

In [16]:
%%time

# For each data size and each algorithm, a recommender is evaluated. 
cols = ["Data", "Algo", "K", "Train time", "Rating time", "RMSE", "MAE", "R2", "Explained Variance", "Ranking time", "MAP", "nDCG@k", "Precision@k", "Recall@k", ]
df_results = pd.DataFrame(columns=cols)

for data_size in data_sizes:
    # Load the dataset
    df = movielens.load_pandas_df(
        size=data_size,
        header=[USER_COL, ITEM_COL, RATING_COL, TIMESTAMP_COL]
    )
    print("Size of Movielens {}: {}".format(data_size, df.shape))
    
    # Split the dataset
    df_train, df_test = python_stratified_split(df,
                                                ratio=0.75, 
                                                min_rating=1, 
                                                filter_by="item", 
                                                col_user=USER_COL, 
                                                col_item=ITEM_COL
                                                )
    print("Train set size: {}".format(df_train.shape))
    print("Test set size: {}".format(df_test.shape))
   
    # Loop through the algos
    for algo in algorithms:
        print("\nComputing {} algorithm on Movielens {}".format(algo, data_size))
          
        # Data prep for training set
        train = prepare_training_data.get(algo, lambda x:x)(df_train)
        
        # Get model parameters
        model_params = params[algo]
          
        # Train the model
        model, time_train = trainer[algo](model_params, train)
        print("Training time: {}".format(time_train))
                
        # Predict and evaluate
        print("\nEvaluating with {}".format(algo))
        train, test = prepare_metrics_data.get(algo, lambda x,y:(x,y))(df_train, df_test)
        
        if "rating" in metrics[algo]:   
            # Predict for rating
            preds, time_rating = rating_predictor[algo](model, test)
            print("Rating prediction time: {}".format(time_rating))
                        
            # Evaluate for rating
            ratings = rating_evaluator[algo](test, preds)
            print("Rating metrics: \n{}".format(json.dumps(ratings, indent=4, sort_keys=True)))
        else:
            ratings = None
            time_rating = None
        
        if "ranking" in metrics[algo]:
            # Predict for ranking
            top_k_scores, time_ranking = ranking_predictor[algo](model, test, train)
            print("Ranking prediction time: {}".format(time_ranking))
            
            # Evaluate for rating
            rankings = ranking_evaluator[algo](test, top_k_scores, TOP_K)
            print("Ranking metrics: \n{}".format(json.dumps(rankings, indent=4, sort_keys=True)))
        else:
            rankings = None
            time_ranking = None
            
        # Record results
        summary = generate_summary(data_size, algo, TOP_K, time_train, time_rating, ratings, time_ranking, rankings)
        df_results.loc[df_results.shape[0] + 1] = summary

4.93MB [00:01, 3.87MB/s]                            


Size of Movielens 100k: (100000, 4)
Train set size: (75066, 4)
Test set size: (24934, 4)

Computing als algorithm on Movielens 100k
Training time: 0:00:04.525615

Evaluating with als
Rating prediction time: 0:00:00.037825
Rating metrics: 
{
    "Explained Variance": 0.2576227331391404,
    "MAE": 0.7492501007129599,
    "R2": 0.26110288313602703,
    "RMSE": 0.9611774952919494
}
Ranking prediction time: 0:00:00.066403
Ranking metrics: 
{
    "MAP": 0.005953451154798406,
    "Precision@k": 0.05482502651113468,
    "Recall@k": 0.0183468268808136,
    "nDCG@k": 0.053792992815155614
}

Computing svd algorithm on Movielens 100k
Training time: 0:00:09.535290

Evaluating with svd
Rating prediction time: 0:00:02.710756
Rating metrics: 
{
    "Explained Variance": 0.2819863360397843,
    "MAE": 0.7443385332830427,
    "R2": 0.2819398068868513,
    "RMSE": 0.9453044174611394
}
Ranking prediction time: 0:00:13.131252
Ranking metrics: 
{
    "MAP": 0.015409627920705892,
    "Precision@k": 0.099151

0.00B [00:00, ?B/s]

Ranking metrics: 
{
    "MAP": 0.027356905704162698,
    "Precision@k": 0.14040296924708381,
    "Recall@k": 0.05665679617058913,
    "nDCG@k": 0.15886958210789323
}


5.92MB [00:01, 5.27MB/s]                            


Size of Movielens 1m: (1000209, 4)
Train set size: (750261, 4)
Test set size: (249948, 4)

Computing als algorithm on Movielens 1m
Training time: 0:00:03.275541

Evaluating with als
Rating prediction time: 0:00:00.013025
Rating metrics: 
{
    "Explained Variance": 0.4041969292713301,
    "MAE": 0.6807346835692278,
    "R2": 0.4097488621219778,
    "RMSE": 0.8620825822445642
}
Ranking prediction time: 0:00:00.052090
Ranking metrics: 
{
    "MAP": 0.0021579544184414574,
    "Precision@k": 0.0322179890674176,
    "Recall@k": 0.010284639696935318,
    "nDCG@k": 0.02590516971909417
}

Computing svd algorithm on Movielens 1m
Training time: 0:01:37.171597

Evaluating with svd
Rating prediction time: 0:00:25.726064
Rating metrics: 
{
    "Explained Variance": 0.36298478224632613,
    "MAE": 0.6989236238040821,
    "R2": 0.3629790170847784,
    "RMSE": 0.8914035737536283
}
Ranking prediction time: 0:03:16.591268
Ranking metrics: 
{
    "MAP": 0.01047125313776413,
    "Precision@k": 0.087907901

In [17]:
df_results

Unnamed: 0,Data,Algo,K,Train time,Rating time,RMSE,MAE,R2,Explained Variance,Ranking time,MAP,nDCG@k,Precision@k,Recall@k
1,100k,als,10,0:00:04.525615,0:00:00.037825,0.961177,0.74925,0.261103,0.257623,0:00:00.066403,0.005953,0.053793,0.054825,0.018347
2,100k,svd,10,0:00:09.535290,0:00:02.710756,0.945304,0.744339,0.28194,0.281986,0:00:13.131252,0.01541,0.111125,0.099152,0.034326
3,100k,sar,10,0:00:00.215702,,,,,,0:00:00.101380,0.003903,0.033111,0.040827,0.02401
4,100k,ncf,10,0:00:20.404797,,,,,,0:00:02.681851,0.091839,0.355767,0.309544,0.161812
5,100k,fastai,10,0:00:20.793080,0:00:00.040010,0.903492,0.714408,0.344057,0.344997,0:00:03.051865,0.027357,0.15887,0.140403,0.056657
6,1m,als,10,0:00:03.275541,0:00:00.013025,0.862083,0.680735,0.409749,0.404197,0:00:00.052090,0.002158,0.025905,0.032218,0.010285
7,1m,svd,10,0:01:37.171597,0:00:25.726064,0.891404,0.698924,0.362979,0.362985,0:03:16.591268,0.010471,0.096016,0.087908,0.024581
8,1m,sar,10,0:00:02.145341,,,,,,0:00:02.432556,0.002482,0.034275,0.042438,0.015325
9,1m,ncf,10,0:04:03.736511,,,,,,0:00:38.344802,0.053582,0.316194,0.289283,0.093617
10,1m,fastai,10,0:03:21.684542,0:00:00.421794,0.881981,0.701716,0.376375,0.378968,0:00:52.389096,0.024375,0.175191,0.160295,0.053431
