In [80]:
import implicit
import pandas as pd
from scipy.sparse import coo_matrix

In [81]:
data = pd.read_csv('../data/raw/ua.base', sep='\t', header=None)
data = data.rename(columns={0: "user_id", 1: "item_id", 2: "rating", 3: "timestamp"})
data

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,1,5,874965758
1,1,2,3,876893171
2,1,3,4,878542960
3,1,4,3,876893119
4,1,5,3,889751712
...,...,...,...,...
90565,943,1047,2,875502146
90566,943,1074,4,888640250
90567,943,1188,3,888640250
90568,943,1228,3,888640275


In [82]:
sparse_matrix = coo_matrix((data['rating'].astype(float),
                           (data['user_id'], data['item_id'])))

model = implicit.als.AlternatingLeastSquares(factors=10, regularization=0.02, iterations=600)

model.fit(sparse_matrix)


100%|██████████| 600/600 [00:19<00:00, 30.23it/s]


In [83]:
user_id = 1
user_items = sparse_matrix.T.tocsr()
recommendations = model.recommend(user_id, user_items[user_id])

for movie_id, score in zip(*recommendations):
    print(f"Movie ID: {movie_id}, Score: {score}")


Movie ID: 7, Score: 1.2085438966751099
Movie ID: 50, Score: 1.1923744678497314
Movie ID: 100, Score: 1.1910204887390137
Movie ID: 475, Score: 1.1523393392562866
Movie ID: 173, Score: 1.0932046175003052
Movie ID: 175, Score: 1.0863113403320312
Movie ID: 172, Score: 1.050254225730896
Movie ID: 179, Score: 1.040592908859253
Movie ID: 433, Score: 1.0340453386306763
Movie ID: 176, Score: 1.0265707969665527


In [84]:
test_data = pd.read_csv('../data/raw/ua.test', sep='\t', header=None)
test_data = test_data.rename(columns={0: "user_id", 1: "item_id", 2: "rating", 3: "timestamp"})
test_data

Unnamed: 0,user_id,item_id,rating,timestamp
0,1,20,4,887431883
1,1,33,4,878542699
2,1,61,4,878542420
3,1,117,3,874965739
4,1,155,2,878542201
...,...,...,...,...
9425,943,232,4,888639867
9426,943,356,4,888639598
9427,943,570,1,888640125
9428,943,808,4,888639868


In [85]:
all_items = test_data.item_id.unique()
test_data = test_data.drop('timestamp', axis=1)
test_data = test_data.groupby("user_id").agg(list)

In [86]:
all_items

array([  20,   33,   61, ..., 1258,  570,  808], dtype=int64)

Here is the metric - presicion


In [87]:
m_p_at_k = []

for user_id, (item_ids, ratings) in test_data.iterrows():

    items = list(zip(item_ids, ratings))
    positive_items = list(map(lambda x: x[0], list(filter(lambda x: x[1] >= 3, items))))

    rec_mov_ids, _ = model.recommend(user_id, user_items[user_id], N=7)
    p_at_k = sum([1 if x in positive_items else 0 for x in rec_mov_ids]) / len(rec_mov_ids)
    m_p_at_k.append(p_at_k)

print(m_p_at_k)
print(sum(m_p_at_k) / len(m_p_at_k))

[0.0, 0.0, 0.14285714285714285, 0.14285714285714285, 0.0, 0.0, 0.0, 0.14285714285714285, 0.14285714285714285, 0.0, 0.0, 0.14285714285714285, 0.0, 0.0, 0.0, 0.14285714285714285, 0.0, 0.14285714285714285, 0.14285714285714285, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.0, 0.14285714285714285, 0.14285714285714285, 0.0, 0.5714285714285714, 0.0, 0.0, 0.0, 0.2857142857142857, 0.14285714285714285, 0.42857142857142855, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.0, 0.0, 0.42857142857142855, 0.14285714285714285, 0.2857142857142857, 0.2857142857142857, 0.0, 0.0, 0.0, 0.0, 0.2857142857142857, 0.0, 0.0, 0.0, 0.0, 0.14285714285714285, 0.0, 0.0, 0.14285714285714285, 0.0, 0.14285714285714285, 0.14285714285714285, 0.0, 0.14285714285714285, 0.2857142857142857, 0.14285714285714285, 0.14285714285714285, 0.2857142857142857, 0.0, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.14285714285714285, 0.0, 0.2857142857142857