In [1]:
from matrix_factorization import SVD

In [2]:
import numpy as np
import pandas as pd
import surprise  # Only used for comparison and getting the dataset, not required to run refactored code
from tqdm import tqdm  # Only used for timing purposes

from sklearn.metrics import root_mean_squared_error

# Prepare the dataset

In [3]:
dataset = surprise.Dataset.load_builtin('ml-100k')

In [4]:
df = pd.DataFrame(dataset.raw_ratings, columns=['user_id', 'item_id', 'ratings', 'timestamp'])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   user_id    100000 non-null  object 
 1   item_id    100000 non-null  object 
 2   ratings    100000 non-null  float64
 3   timestamp  100000 non-null  object 
dtypes: float64(1), object(3)
memory usage: 3.1+ MB


# Refactored Code

## Build model and predict train predictions

In [6]:
new_predictions = []
for i in tqdm(range(100)):
    svd = SVD(n_factors=10, n_epochs=100, random_state=i)
    svd.fit(X=df[['user_id', 'item_id']], y=df['ratings'])
    new_predictions.append(svd.predict(df[['user_id', 'item_id']]))
all_new_predictions = np.vstack(new_predictions).T

100%|█████████████████████████████████████████████████████████| 100/100 [00:29<00:00,  3.40it/s]


In [7]:
# a column for each run
all_new_predictions.shape

(100000, 100)

# Surprise original package

## Create trainset (required for training) and helper function to score predictions

In [8]:
trainset = dataset.build_full_trainset()

In [9]:
def get_df_surprise_svd_predictions(df, surprise_svd):
    preds = []
    for raw_user, raw_item in df[['user_id', 'item_id']].values:
        preds.append(surprise_svd.estimate(trainset.to_inner_uid(raw_user), trainset.to_inner_iid(raw_item)))
    return np.asarray(preds)

## Build model and predict train predictions

In [10]:
surprise_predictions = []
for i in tqdm(range(100)):
    surprise_svd = surprise.SVD(n_factors=10, n_epochs=100, random_state=i)
    surprise_svd.fit(trainset)
    surprise_predictions.append(get_df_surprise_svd_predictions(df, surprise_svd))
all_surprise_predictions = np.vstack(surprise_predictions).T

100%|█████████████████████████████████████████████████████████| 100/100 [00:58<00:00,  1.71it/s]


In [11]:
# a column for each run
all_surprise_predictions.shape

(100000, 100)

# Compare predictions from both models

## Compare rmse's by test run pair

In [12]:
new_is_better = []
rmses = []
for i in range(all_surprise_predictions.shape[1]):
    new_rmse = root_mean_squared_error(df['ratings'], all_new_predictions[:, i])
    surprise_rmse = root_mean_squared_error(df['ratings'], all_surprise_predictions[:, i])
    rmses.append((new_rmse, surprise_rmse))
    new_is_better.append(new_rmse < surprise_rmse)

In [13]:
### What % is new better than old (close to 50% is just random)

In [14]:
np.mean(new_is_better)

0.49

In [15]:
### Compare the sum of all test run RMSEs

In [16]:
pd.DataFrame(rmses).sum()

0    71.040092
1    71.037300
dtype: float64

## Pair every run and every rating and tabulate rate at which new is better (smaller abs error) than surprise

In [17]:
new_abs_errors = np.abs(all_new_predictions - df['ratings'].values.reshape(-1, 1))
surprise_abs_errors = np.abs(all_surprise_predictions - df['ratings'].values.reshape(-1, 1))

In [18]:
(new_abs_errors < surprise_abs_errors).mean()

0.4997735

## The distribution of the standard deviations among the test runs across all ratings

In [19]:
std_distributions = pd.concat(
    [pd.Series(all_new_predictions.std(1)).describe(),
     pd.Series(all_surprise_predictions.std(1)).describe()], axis=1)
std_distributions.columns = ['new', 'surprise']
std_distributions

Unnamed: 0,new,surprise
count,100000.0,100000.0
mean,0.224764,0.223489
std,0.088388,0.088877
min,0.02909,0.015741
25%,0.159804,0.15845
50%,0.210414,0.209205
75%,0.274993,0.273883
max,0.898698,0.849841
