In [1]:
pip install surprise

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 11.7MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp36-cp36m-linux_x86_64.whl size=1618287 sha256=91992834b9d0d78b2416891ae67ac4d6307adfd488c2c6800332309e3acd32f6
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


In [106]:
import pandas as pd
import numpy as np
#KNNBasis
from surprise import KNNWithMeans, BaselineOnly
from surprise.model_selection import train_test_split
from surprise import accuracy

from surprise import Dataset, Reader

In [5]:
reader = Reader(line_format='user item rating timestamp', sep=',', skip_lines=1)
data = Dataset.load_from_file('ratings.csv', reader)
data

<surprise.dataset.DatasetAutoFolds at 0x7fd7089a5b70>

In [6]:
trainset, testset = train_test_split(data, test_size=.25)

In [60]:
metrics_dict={}

USER BASED - COSINE SIMILARITY 

In [61]:
#User Based - cosine sim 

sim_options = {
    "name": "cosine",
    "user_based": True
}

In [62]:
algo = KNNWithMeans(sim_options=sim_options)

In [63]:
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fd7056055c0>

In [64]:
predictions = algo.test(testset)
#predictions

In [65]:
accuracy.rmse(predictions)
accuracy.mae(predictions)
metrics_dict.update({'User based - cosine':[accuracy.rmse(predictions), accuracy.mae(predictions)]})

RMSE: 0.9015
MAE:  0.6885
RMSE: 0.9015
MAE:  0.6885


In [66]:
metrics_dict

{'User based - cosine': [0.9015134874314839, 0.6884815481477338]}

In [67]:
## User based -  Pearson 
sim_options = {"name": "pearson_baseline",
               "shrinkage": 0,  # no shrinkage,
               "user_based": True, 
               }

In [68]:
algo = KNNWithMeans(sim_options=sim_options)

In [69]:
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fd705341da0>

In [70]:
predictions = algo.test(testset)

In [71]:
accuracy.rmse(predictions)
accuracy.mae(predictions)
metrics_dict.update({'User based - Pearson ':[accuracy.rmse(predictions), accuracy.mae(predictions)]})

RMSE: 0.9077
MAE:  0.6908
RMSE: 0.9077
MAE:  0.6908


In [72]:
# ITEM BASED - Cosine Sim 
sim_options = {"name": "cosine","user_based": False,}

In [73]:
algo = KNNWithMeans(sim_options=sim_options)

In [74]:
algo.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fd705341c18>

In [75]:
predictions = algo.test(testset)

In [76]:
accuracy.rmse(predictions)
accuracy.mae(predictions)
metrics_dict.update({'Item Based - cosine':[accuracy.rmse(predictions), accuracy.mae(predictions)]})

RMSE: 0.9034
MAE:  0.6878
RMSE: 0.9034
MAE:  0.6878


In [77]:
# ITEM BASED - PEARSON 
sim_options = {"name": "pearson_baseline",
               "shrinkage": 0,  # no shrinkage,
               "user_based": False, 
               }

In [78]:
algo = KNNWithMeans(sim_options=sim_options)

In [79]:
algo.fit(trainset)

Estimating biases using als...
Computing the pearson_baseline similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7fd705341940>

In [80]:
predictions = algo.test(testset)

In [81]:
accuracy.rmse(predictions)
accuracy.mae(predictions)
metrics_dict.update({'Item based - Pearson ':[accuracy.rmse(predictions), accuracy.mae(predictions)]})

RMSE: 0.9030
MAE:  0.6848
RMSE: 0.9030
MAE:  0.6848


In [82]:
# BaseLine Model 
bsl_options = {'method': 'als',
               'n_epochs': 5,
               'reg_u': 12,
               'reg_i': 5
               }

In [83]:
algo = BaselineOnly(bsl_options=bsl_options)

In [84]:
algo.fit(trainset)

Estimating biases using als...


<surprise.prediction_algorithms.baseline_only.BaselineOnly at 0x7fd705341e48>

In [85]:
predictions = algo.test(testset)

In [86]:
accuracy.rmse(predictions)
accuracy.mae(predictions)
metrics_dict.update({'BaseLine Model ':[accuracy.rmse(predictions), accuracy.mae(predictions)]})

RMSE: 0.8670
MAE:  0.6667
RMSE: 0.8670
MAE:  0.6667


In [87]:
#We have a single dictionary with metrics for all Variations 
metrics_dict

{'BaseLine Model ': [0.8669635962071377, 0.6666955425332983],
 'Item Based - cosine': [0.903415333542627, 0.6878341932222296],
 'Item based - Pearson ': [0.9030214059522983, 0.6848020023489908],
 'User based - Pearson ': [0.907663629300559, 0.6907515694842661],
 'User based - cosine': [0.9015134874314839, 0.6884815481477338]}

In [96]:
data_table = []
for model in metrics_dict.keys():
  line = [model, metrics_dict[model][0],metrics_dict[model][1]]
  data_table.append(line)
print(data_table)



[['User based - cosine', 0.9015134874314839, 0.6884815481477338], ['User based - Pearson ', 0.907663629300559, 0.6907515694842661], ['Item Based - cosine', 0.903415333542627, 0.6878341932222296], ['Item based - Pearson ', 0.9030214059522983, 0.6848020023489908], ['BaseLine Model ', 0.8669635962071377, 0.6666955425332983]]


In [97]:
from tabulate import tabulate

In [98]:
tabulate(data_table,tablefmt="pipe")

'|:---------------------|---------:|---------:|\n| User based - cosine  | 0.901513 | 0.688482 |\n| User based - Pearson | 0.907664 | 0.690752 |\n| Item Based - cosine  | 0.903415 | 0.687834 |\n| Item based - Pearson | 0.903021 | 0.684802 |\n| BaseLine Model       | 0.866964 | 0.666696 |'

In [101]:
header = ['Model',
          'RMSE',
          'MAE'
          ]
df = pd.DataFrame(data_table)
df.columns = header
df

Unnamed: 0,Model,RMSE,MAE
0,User based - cosine,0.901513,0.688482
1,User based - Pearson,0.907664,0.690752
2,Item Based - cosine,0.903415,0.687834
3,Item based - Pearson,0.903021,0.684802
4,BaseLine Model,0.866964,0.666696


In [103]:
#RMSE 
import plotly.express as px
fig = px.bar(df, x='Model', y='RMSE', title="RMSE")
fig.show()

In [104]:
#MAE
import plotly.express as px
fig = px.bar(df, x='Model', y='MAE', title="MAE")
fig.show()