## ALS Recommendation model on implicit datasets known as [last.fm 360K](http://ocelma.net/MusicRecommendationDataset/lastfm-360K.html) for music recommendation

Use this model as base model for build recommendation ranking mode for implicit data

---
Library Importing

In [28]:
from implicit.datasets.lastfm import get_lastfm
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares

import numpy as np 
import pandas as pd

---
Dataset loading

In [3]:
artists, users, artist_user_plays = get_lastfm()

184MB [02:42, 1.14MB/s]                               


---
EDA

In [6]:
print(type(artists))
print(artists.shape)
print(artists[:10,])

<class 'numpy.ndarray'>
(292385,)
[' 2 ' ' 58725ab=>' ' 80lİ yillarin tÜrkÇe sÖzlÜ aŞk Şarkilari'
 ' amy winehouse' ' cours de la somme' ' fatboy slim' ' kanye west'
 ' mala rodriguez' ' mohamed lamine' ' oliver shanti & friends']


In [14]:
print(type(artist_user_plays))
print(artist_user_plays.shape)
print(artist_user_plays.nnz)
print(artist_user_plays.indices)
print(artist_user_plays.indptr)
print(artist_user_plays.getrow(73470))


<class 'scipy.sparse.csr.csr_matrix'>
(292385, 358868)
17535606
[ 73470  97856 235382 ... 245468  84262  85674]
[       0        4        5 ... 17535604 17535605 17535606]
  (0, 15205)	31.0


In [13]:
print(type(users))
print(users.shape)
print(users[:10,])

<class 'numpy.ndarray'>
(358868,)
['00000c289a1829a808ac09c00daf10bc3c4e223b'
 '00001411dc427966b17297bf4d69e7e193135d89'
 '00004d2ac9316e22dc007ab2243d6fcb239e707d'
 '000063d3fe1cf2ba248b9e3c3f0334845a27a6bf'
 '00007a47085b9aab8af55f52ec8846ac479ac4fe'
 '0000c176103e538d5c9828e695fed4f7ae42dd01'
 '0000ee7dd906373efa37f4e1185bfe1e3f8695ae'
 '0000ef373bbd0d89ce796abae961f2705e8c1faf'
 '0000f687d4fe9c1ed49620fbc5ed5b0d7798ea20'
 '0001399387da41d557219578fb08b12afa25ab67']


In [15]:
# update matrix view count by ranking model BM25

artist_user_plays = bm25_weight(artist_user_plays, K1=100, B=0.8)
user_plays = artist_user_plays.T.tocsr() 

In [24]:
print(type(user_plays))
print(user_plays[0,])

<class 'scipy.sparse.csr.csr_matrix'>
  (0, 19370)	352.9751779640889
  (0, 19606)	434.397274202691
  (0, 37529)	305.27620607761054
  (0, 45568)	585.4341693344771
  (0, 45575)	808.3451406668643
  (0, 46258)	142.2391605697496
  (0, 51099)	48.74467472041867
  (0, 90948)	39.97596241359612
  (0, 92187)	113.34032335425246
  (0, 100655)	13.24056433503626
  (0, 103727)	21.558948808986425
  (0, 106705)	28.653924454702246
  (0, 106720)	191.3328220153283
  (0, 125500)	241.32859292780725
  (0, 126706)	19.335073524300682
  (0, 129399)	13.089673010275645
  (0, 137079)	89.54607771892809
  (0, 144255)	4.720524922651388
  (0, 144310)	102.37990343980374
  (0, 151783)	7.391922803698345
  (0, 154701)	9.001361851612097
  (0, 155258)	480.5849215582812
  (0, 165137)	93.71950862504329
  (0, 168001)	53.10572249143597
  (0, 169923)	118.06681082623732
  (0, 171703)	383.0111813385659
  (0, 172591)	93.61513273326891
  (0, 175819)	440.11576278969085
  (0, 177614)	586.6872756186044
  (0, 184096)	46.12449489515663
  

#[NOTE] even though the datasets itself is implicit data collection, here ALS model take view count as proxy rating for user on play. Therefore, model works as explicit recommendation model

---
Model Training

In [26]:
model = AlternatingLeastSquares(factors=64, regularization=0.05)
model.fit(2*user_plays)

100%|██████████| 15/15 [03:31<00:00, 14.12s/it]


---
Model Evaluation & Prediction

In [31]:
userid = 234
ids, scores = model.recommend(userid, user_plays[userid], N=20, filter_already_liked_items=False)

In [32]:
pd.DataFrame({"artist": artists[ids], "score": scores, "already_liked": np.in1d(ids, user_plays[userid].indices)})

Unnamed: 0,artist,score,already_liked
0,ricky martin,1.019611,True
1,marc anthony,0.959728,False
2,juanes,0.947532,True
3,carlos santana,0.918384,False
4,enrique iglesias,0.907143,False
5,a.r. rahman,0.88911,False
6,shakira,0.88575,False
7,outlandish,0.862761,False
8,gipsy kings,0.85626,False
9,panjabi mc,0.855923,False
