## Application of ALS, WALS & SVD on Movielens

In [11]:
#%load_ext watermark
%load_ext autoreload 
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [12]:
import os
import sys
import numpy as np
import pandas as pd
from pybpr import *
import matplotlib.pyplot as plt
from functools import partial
import itertools
import scipy.sparse as ss
import pathos.multiprocessing as mp
import tqdm

In [13]:
%%time
#df = load_movielens_data('ml-1m')
df = load_movielens_data('ml-100k')
df.head()

CPU times: user 19.1 ms, sys: 8.04 ms, total: 27.2 ms
Wall time: 56.6 ms


Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [14]:
%%time
dfshort = df[df['rating'] > 0]
cf = UserItemInteractions(
    name='MovieLens-1m',
    users = dfshort['user_id'], 
    items = dfshort['item_id'],
    min_num_rating_per_user=30,
    min_num_rating_per_item=10
)
cf.print_memory_usage()

--- Memory usage for MovieLens-1m:
Sparse User-Item matrix = 0.09 MB
User-Item dataframe df = 2.85 MB
Item dataframe df_item = 0.03 MB
Item dataframe df_user = 0.02 MB
---
CPU times: user 35.3 ms, sys: 3.18 ms, total: 38.5 ms
Wall time: 71.2 ms


In [15]:
%%time
cf.generate_train_test(user_test_ratio = 0.2)

CPU times: user 1.39 s, sys: 0 ns, total: 1.39 s
Wall time: 1.72 s


In [17]:
%%time
wals = MF_WALS(
    num_features=20, 
    reg_lambda=0., 
    weighting_strategy='same', 
    num_iters=20, 
    initial_std=0.1, 
    seed=None
)
wals.fit(cf.R_train)
wals_ndcg_score = cf.get_ndcg_metric(
    user_mat = wals.user_mat,
    item_mat=wals.item_mat,
    num_items = 10,
    test=True,
    truncate=True
)

MF_WALS: 100%|████████████████████████████████████████████| 20/20 [01:25<00:00,  4.27s/it]


CPU times: user 7min 16s, sys: 19min 44s, total: 27min 1s
Wall time: 1min 29s


In [18]:
%%time
als = MF_ALS(
    num_features=20, 
    reg_lambda=0., 
    num_iters=20, 
    initial_std=0.1, 
    seed=None
)
als.fit(cf.R_train)
als_ndcg_score = cf.get_ndcg_metric(
    user_mat = als.user_mat,
    item_mat=als.item_mat,
    num_items = 10,
    test=True,
    truncate=True
)

MF_ALS: 100%|████████████████████████████████████████████| 20/20 [00:00<00:00, 225.67it/s]


CPU times: user 3.51 s, sys: 11.4 s, total: 14.9 s
Wall time: 875 ms


In [19]:
%%time
svd = MF_SVD(
    num_features=20, 
    num_iters=100, 
    seed=None
)
svd.fit(cf.R_train)
svd_ndcg_score = cf.get_ndcg_metric(
    user_mat = svd.user_mat, 
    item_mat=svd.item_mat, 
    num_items = 10,
    test=True,
    truncate=True
)

CPU times: user 3.1 s, sys: 9.7 s, total: 12.8 s
Wall time: 698 ms


In [20]:
als_ndcg_score, wals_ndcg_score, svd_ndcg_score

(0.45894791526201023, 0.4585099780692262, 0.46026964320490765)

### Conclusions
- ALS = WALS (with uniform weighting) = SVD