Требуется построить модель рекомендаций на основе скрытых факторов (implicit) на основе dataset’а \
https://grouplens.org/datasets/hetrec-2011/ (Delicious Bookmarks)


- Documentation: [implicit](https://implicit.readthedocs.io/en/latest/quickstart.html)\
**!pip install implicit**


- Requirements:
This library requires SciPy version 0.16 or later. Running on OSX requires an OpenMP compiler, which can be installed with homebrew:\
**!brew install gcc**.

In [92]:
import numpy as np
import pandas as pd
import scipy.sparse as sparse
# import matplotlib.pyplot as plt 

from tqdm import tqdm_notebook, tqdm
from implicit.als import AlternatingLeastSquares

import warnings
warnings.simplefilter('ignore')

# %matplotlib inline

In [63]:
raw_data = pd.read_table('./hetrec2011-delicious-2k/user_taggedbookmarks-timestamps.dat', usecols=['userID','bookmarkID','tagID'])

raw_data.head()

Unnamed: 0,userID,bookmarkID,tagID
0,8,1,1
1,8,2,1
2,8,7,1
3,8,7,6
4,8,7,7


In [64]:
raw_data = raw_data.groupby(['userID', 'bookmarkID'], as_index=False)['tagID'].count()
raw_data.columns = ['userID', 'bookmarkID', 'tagCount']

display(raw_data.head())
display(raw_data.info())

Unnamed: 0,userID,bookmarkID,tagCount
0,8,1,1
1,8,2,1
2,8,7,3
3,8,8,3
4,8,9,2


<class 'pandas.core.frame.DataFrame'>
Int64Index: 104799 entries, 0 to 104798
Data columns (total 3 columns):
userID        104799 non-null int64
bookmarkID    104799 non-null int64
tagCount      104799 non-null int64
dtypes: int64(3)
memory usage: 3.2 MB


None

In [102]:
data = raw_data.copy()
data = data.dropna()

In [103]:
data['userID'] = data['userID'].astype("category").cat.codes
data['bookmarkID'] = data['bookmarkID'].astype("category").cat.codes

In [104]:
data.head()

Unnamed: 0,userID,bookmarkID,tagCount
0,0,0,1
1,0,1,1
2,0,2,3
3,0,3,3
4,0,4,2


In [105]:
users = list(np.sort(data.userID.unique()))
bookmarks = list(np.sort(data.bookmarkID.unique()))
tags = list(data.tagCount)

print(users[:5])
print(bookmarks[:5])
print(tags[:5])

rows = data.userID.astype(int)
cols = data.bookmarkID.astype(int)

print()
print(len(users), len(bookmark), len(tags))
print(len(rows), len(cols))

data_sparse = sparse.csr_matrix((tags, (cols, rows)), shape=(len(bookmarks), len(users)))


# ------------//L4-part2//------------
# users = list(np.sort(data.user_id.unique()))
# artists = list(np.sort(data.artist_id.unique()))
# plays = list(data.plays)
# rows = data.user_id.astype(int)
# cols = data.artist_id.astype(int)
# print(users[:5])                              ->   [0, 1, 2, 3, 4]
# print(artists[:5])                            ->   [0, 1, 2, 3, 4]
# print(plays[:5])                              ->   [1099, 897, 717, 706, 691]
# print(len(users), len(artists), len(plays))   ->   358868 292363 17535449
# print(len(rows), len(cols))                   ->   17535449 17535449
# data_sparse = sparse.csr_matrix((plays, (cols, rows)), shape=(len(artists), len(users)))
# ------------//--------//------------

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[1, 1, 3, 3, 2]

1867 69223 104799
104799 104799


In [106]:
model = AlternatingLeastSquares(factors=100)
model.fit(data_sparse)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))

In [161]:
userid = 10
user_items = data_sparse.T.tocsr()
recommendations = model.recommend(userid, user_items)

In [162]:
recommendations

[(655, 0.14221878),
 (647, 0.12029913),
 (654, 0.115144685),
 (646, 0.114259005),
 (635, 0.1108467),
 (648, 0.10973576),
 (644, 0.10944519),
 (627, 0.108881146),
 (642, 0.10791519),
 (656, 0.10789631)]

In [155]:
titles_bookmarks = pd.read_table('./hetrec2011-delicious-2k/bookmarks.dat', usecols=['id','title'], sep='\\n', delimiter='\\t')
# titles_bookmarks.head()

In [163]:
rec_ids = np.array(recommendations)[:, 0]
titles_bookmarks.query("id in @rec_ids")

Unnamed: 0,id,title
459,627,A List Apart: Articles: The Look That Says Book
466,635,"Free Wireframing Kits, UI Design Kits, PDFs an..."
473,642,A List Apart: Articles: Apps vs. the Web
475,644,30 Mind Blowing Music Website Designs | Dzine ...
477,646,10 Lessons From Finland&#039;s Summer of Startups
478,647,50 Tremendous Grunge Wallpapers For Your Deskt...
483,654,10 Beautiful Sketches for Website Prototypes –...
484,655,Worldy Inspiration from Tourism Websites | Web...
485,656,Five Best Personal Project Management Tools


In [170]:
itemid = 15366
related = model.similar_items(itemid)

In [171]:
related

[(15366, 0.2319476),
 (15369, 0.23194419),
 (15380, 0.23194419),
 (15374, 0.23192249),
 (15373, 0.2319089),
 (15365, 0.23190208),
 (15375, 0.23189825),
 (15378, 0.2318428),
 (15387, 0.23181821),
 (15376, 0.23181804)]

In [172]:
rel_ids = np.array(related)[:, 0]
titles_bookmarks.query("id in @rel_ids")

Unnamed: 0,id,title
10802,15373,Education/Projects/JetpackForLearning/Profiles...
10803,15374,Heuristics for User Interface Design
10804,15376,annalist » Neulich bei Twitter: Post-Privacy o...
10806,15380,"Drumbeat Learning, Freedom and the Web Festiva..."
10807,15387,Methodos e.V. - Startseite


In [None]:
pass