In [1]:
import sys
import json
import pickle

import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, load_npz, save_npz

In [2]:
sys.path.append('../app')

In [3]:
from models import ALSModel
from utils.metrics import precision_at_k, recall_at_k, ap_k, map_k

In [4]:
# test = pd.read_csv('../app/data/test.csv', sep=',')
test = pd.read_csv('../datasets/test.csv.zip', sep=',', compression='zip')
result = test.groupby('user_id')['song'].unique().reset_index().rename(columns={'song': 'actual'})

In [5]:
result

Unnamed: 0,user_id,actual
0,00055176fea33f6e027cd3302289378b,"[Demi Lovato__Give Your Heart A Break, Ed Shee..."
1,0007f3dd09c91198371454c608d47f22,"[Ben Howard__Keep Your Head Up, Ed Sheeran__Sm..."
2,000b0f32b5739f052b9d40fcc5c41079,[Lars Winnerbäck__Om du lämnade mig nu]
3,000c11a16c89aa4b14b328080f5954ee,[Arctic Monkeys__Why'd You Only Call Me When Y...
4,00123e0f544dee3ab006aa7f1e5725a7,[Foster The People__Don't Stop (Color on the W...
...,...,...
13602,ffe32d5412269f3041c58cbf0dde3306,[U2__Even Better Than The Real Thing]
13603,ffec270eae226caa14ddaef291d73fff,[Afrojack__Ten Feet Tall]
13604,fff60baf392613ed33f745b89a9b38f7,"[Belinda Carlisle__Heaven Is A Place On Earth,..."
13605,fff616055993498d6127f3f467cf9f2b,"[Metric__Help I'm Alive, The Smiths__Asleep (2..."


In [38]:
als_model = ALSModel('als_best_params.pkl')

In [7]:
result = result[result['user_id'].isin(als_model.userid_to_id)]

In [44]:
result['als'] = result['user_id'].apply(lambda x: als_model.get_recommendations(x, n=5))

In [45]:
result['als_20'] = result['user_id'].apply(lambda x: als_model.get_recommendations(x, n=20))

In [49]:
def calculate_metric(metric, col='als', k=5):
    return result.apply(lambda row: metric(row['actual'], row[col], k=k), axis=1).mean()

In [50]:
metrics = {}
for metric_name, metric in zip(('precision_at_5', 'recall_at_5', 'ap_5', 'map_5'), 
                               (precision_at_k, recall_at_k, ap_k, map_k)):
    metrics[metric_name] = calculate_metric(metric)
    
for metric_name, metric in zip(('precision_at_20', 'recall_at_20', 'ap_20', 'map_20'), 
                               (precision_at_k, recall_at_k, ap_k, map_k)):
    metrics[metric_name] = calculate_metric(metric, col='als_20', k=20)

In [51]:
metrics

{'precision_at_5': 0.0030719482619240095,
 'recall_at_5': 0.0005434621785525629,
 'ap_5': 0.0014551333872271626,
 'map_5': 0.32837893486195835,
 'precision_at_20': 0.0027485852869846405,
 'recall_at_20': 0.002029893428875932,
 'ap_20': 0.0006770261699178088,
 'map_20': 0.22476323202795784}

#### base als

In [23]:
als_model = ALSModel('als_100.pkl')

In [24]:
als_model.get_recommendations('000b0f32b5739f052b9d40fcc5c41079')

['First Aid Kit__Emmylou',
 'Avicii__Wake Me Up',
 "First Aid Kit__The Lion's Roar",
 'Veronica Maggio__Jag kommer',
 'First Aid Kit__My Silver Lining']

In [25]:
result['als'] = result['user_id'].apply(lambda x: als_model.get_recommendations(x, n=5))

In [28]:
metrics = {}
for metric_name, metric in zip(('precision_at_5', 'recall_at_5', 'ap_5', 'map_5'), 
                               (precision_at_k, recall_at_k, ap_k, map_k)):
    metrics[metric_name] = calculate_metric(metric)

In [29]:
metrics

{'precision_at_5': 0.10212390681266995,
 'recall_at_5': 0.03817604818122016,
 'ap_5': 0.02535091256888349,
 'map_5': 0.2598869777529553}

In [31]:
result['als_20'] = result['user_id'].apply(lambda x: als_model.get_recommendations(x, n=20))

In [36]:
for metric_name, metric in zip(('precision_at_20', 'recall_at_20', 'ap_20', 'map_20'), 
                               (precision_at_k, recall_at_k, ap_k, map_k)):
    metrics[metric_name] = calculate_metric(metric, col='als_20', k=20)

In [37]:
metrics

{'precision_at_5': 0.10212390681266995,
 'recall_at_5': 0.03817604818122016,
 'ap_5': 0.02535091256888349,
 'map_5': 0.2598869777529553,
 'precision_at_20': 0.089259204821048,
 'recall_at_20': 0.1223279525172394,
 'ap_20': 0.04573830745996247,
 'map_20': 0.2591817789898237}