In [1]:
import numpy as np
import pandas as pd
from scipy import sparse
from sklearn.decomposition import NMF

In [2]:
import scipy.sparse as sparse
import random
import implicit 

In [2]:
track_download = pd.read_csv('../../raw_data/track_download.csv')
track_download = track_download[['USER_ID', 'TRACK_ID']].assign(r=1)

In [4]:
track_like = pd.read_csv('../../raw_data/track_like.csv')
track_like = track_like[['USER_ID', 'TRACK_ID']].assign(r=5)

In [5]:
album_track_purchase = pd.read_csv('../../raw_data/album_track_purchase.csv')
track_purchase = album_track_purchase[['USER_ID', 'TRACK_ID']][~album_track_purchase.TRACK_ID.isna()].assign(r=10).drop_duplicates()
track_purchase.TRACK_ID = track_purchase.TRACK_ID.astype('int')

In [6]:
total = pd.concat([track_download, track_like, track_purchase])
total.columns = ['user', 'track', 'score']
total = total.groupby(['user', 'track']).score.sum().reset_index()

In [21]:
class Recommender:
    def __init__(self, rank = 10):
        self.rank = rank
        
    def fit(self, scores):
        scores.user = scores.user.astype('category')
        scores.track = scores.track.astype('category')
        mat = sparse.coo_matrix((
            scores.score.values.astype('float32'),
            (scores.user.cat.codes.values,
             scores.track.cat.codes.values))).tocsr()
        model = NMF(n_components=self.rank)
        model.fit(mat)
        self.model = model
        self.user_categories = scores.user.cat.categories
        self.track_categories = scores.track.cat.categories
        self.mat = mat
        self.user_index = dict(enumerate(scores.user.cat.categories))
        self.popular = scores.groupby('track').score.sum().reset_index().sort_values('score', ascending=False)['track'].values
        
    def recommend(self, user_id, count=5):
        if user_id not in self.user_index:
            return list(self.popular[:count])
        index = self.user_index[user_id]
        user_history = self.mat[index]
        score_pred = self.model.inverse_transform(self.model.transform(user_history))
        
        top_tracks = sorted(zip(scores, self.track_categories), reverse=True)
        top_tracks = [x[1] for x in top_tracks]
        
        previously_downloaded = set(self.track_categories[self.mat[index].nonzero()[1]])
        top_tracks = (x for x in top_tracks if x not in previously_downloaded)
        suggestions = list(islice(top_tracks, count))
        return suggestions

In [22]:
recommender = Recommender()
recommender.fit(total)

In [10]:
recommender.recommend(3568884)

[2867903, 2852012, 436926390, 2834013, 2832434]