In [1]:
%%capture
# ignore warnings in this cell
%load_ext autoreload
%autoreload 2

import os
os.environ["USE_NUMBA"] = "1"
import random

from eals.eals import ElementwiseAlternatingLeastSquares
from eals.util import create_user_items

In [2]:
# Data preparation
user_count = 2000
item_count = 1000
data_count = user_count * 20
new_user_count = 200
new_item_count = 100

user_items = lambda: create_user_items(
    user_count=user_count,
    item_count=item_count,
    data_count=data_count,
    new_user_count=new_user_count,
    new_item_count=new_item_count,
)

In [3]:
# Benchmarking fit()

model = ElementwiseAlternatingLeastSquares(max_iter=10)
model.fit(user_items())  # warm up
%time model.fit(user_items())

CPU times: user 903 ms, sys: 89.9 ms, total: 993 ms
Wall time: 144 ms


In [4]:
# Benchmarking update_user()

# existing users only
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_user(random.randrange(user_count))  # warm up
%timeit model.update_user(random.randrange(user_count))

# new users only
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_user(random.randrange(user_count, user_count + new_user_count))  # warm up
%timeit model.update_user(random.randrange(user_count, user_count + new_user_count))

# mixed users
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_user(random.randrange(user_count + new_user_count))  # warm up
%timeit model.update_user(random.randrange(user_count + new_user_count))

25.3 µs ± 458 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.83 µs ± 102 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
24.3 µs ± 973 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [5]:
# Benchmarking update_item()

# existing items only
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_item(random.randrange(item_count))  # warm up
%timeit model.update_item(random.randrange(item_count))

# new items only
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_item(random.randrange(item_count, item_count + new_item_count))  # warm up
%timeit model.update_item(random.randrange(item_count, item_count + new_item_count))

# mixed items
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_item(random.randrange(item_count + new_item_count))  # warm up
%timeit model.update_item(random.randrange(item_count + new_item_count))

31.4 µs ± 147 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
8.57 µs ± 44.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
29.5 µs ± 321 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
# Benchmarking update_model()
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.update_model(random.randrange(user_count), random.randrange(item_count))  # warm up
%timeit -n 1000 model.update_model(random.randrange(user_count), random.randrange(item_count))

119 µs ± 2.28 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [7]:
# Benchmarking calc_loss() for csr matrices
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model.calc_loss()  # warm up
%timeit model.calc_loss()

3.4 ms ± 56.7 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
# Benchmarking calc_loss() for lil matrices
model = ElementwiseAlternatingLeastSquares()
model.init_data(user_items())
model._convert_data_for_online_training()
model.calc_loss()  # warm up
%timeit model.calc_loss()

13 ms ± 213 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
%run benchmark.py -f 2020-08-24-view0100.json -m csr

Benchmarking calc_loss()
real data (2020-08-24-view0100.json), csr matrix
  load data: (use cache: 2020-08-24-view0100-train_data.npy) 2.457181930541992 sec
    user_count=1200777, item_count=110547
  setup: 3.7112250328063965 sec
  elapsed: 4.182994842529297 sec


In [10]:
%run benchmark.py -f 2020-08-24-view0100.json -m lil

Benchmarking calc_loss()
real data (2020-08-24-view0100.json), lil matrix
  load data: (use cache: 2020-08-24-view0100-train_data.npy) 2.387756109237671 sec
    user_count=1200777, item_count=110547
  setup: 30.067081928253174 sec
  elapsed: 11.525154113769531 sec
