In [1]:
import sys
import json
import pickle

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, load_npz, save_npz

In [2]:
sys.path.append('../app')

In [3]:
from models import ALSModel
from utils.metrics import precision_at_k, recall_at_k, ap_k, map_k

In [4]:
test = pd.read_csv('../app/data/test.csv', sep=',')
result = test.groupby('user_id')['song'].unique().reset_index().rename(columns={'song': 'actual'})

In [5]:
als_model = ALSModel('als_100.pkl')

In [8]:
als_model.model.recommend(0,
                          als_model.user_item_matrix[0])

(array([2465, 2457, 4057, 5541, 5434, 5532,  476, 7505, 2555,  473],
       dtype=int32),
 array([0.23887599, 0.1903713 , 0.17900366, 0.16791387, 0.15695928,
        0.15370278, 0.15346964, 0.14247274, 0.1420875 , 0.14184596],
       dtype=float32))

In [7]:
res = als_model.model.recommend(als_model.userid_to_id['00055176fea33f6e027cd3302289378b'], 
                          als_model.user_item_matrix[0])

In [10]:
res[0]

array([2465, 2457, 4057, 5541, 5434, 5532,  476, 7505, 2555,  473],
      dtype=int32)

In [6]:
result['als_100'] = result['user_id'].apply(lambda x: als_model.get_recommendations(x))

In [7]:
result.head()

Unnamed: 0,user_id,actual,als_100
0,00055176fea33f6e027cd3302289378b,"[Imagine Dragons__I Bet My Life, Meghan Traino...","[Ed Sheeran__Thinking Out Loud, Mark Ronson__U..."
1,0007f3dd09c91198371454c608d47f22,"[Lana Del Rey__Black Beauty, Evanescence__My I...","[Ed Sheeran__The A Team, Ed Sheeran__I See Fir..."
2,000b0f32b5739f052b9d40fcc5c41079,"[Veronica Maggio__Hela huset, First Aid Kit__S...","[First Aid Kit__Emmylou, First Aid Kit__The Li..."
3,000c11a16c89aa4b14b328080f5954ee,"[Bastille__Pompeii, Pharrell Williams__Know Wh...","[Mark Ronson__Uptown Funk, Disclosure__Latch, ..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Creedence Clearwater Revival__Long As I Can S...,"[Led Zeppelin__Immigrant Song, Led Zeppelin__W..."


In [8]:
def calculate_metric(metric):
    return result.apply(lambda row: metric(row['als_100'], row['actual']), axis=1).mean()

In [9]:
metrics = {}
for metric_name, metric in zip(('precision_at_5', 'recall_at_5', 'ap_5', 'map_5'), 
                               (precision_at_k, recall_at_k, ap_k, map_k)):
    metrics[metric_name] = calculate_metric(metric)

In [11]:
result.apply(lambda row: precision_at_k(row['als_100'], row['actual']), axis=1).mean()

0.029253249137234743

In [10]:
metrics

{'precision_at_5': 0.029253249137234743,
 'recall_at_5': 0.03825415473480677,
 'ap_5': 0.015924321413711237,
 'map_5': 0.3952941274852943}