In [2]:
import os
import sys
import pickle
import pandas as pd
from pathlib import Path
sys.path.append('../app/utils')

from baseline import Baseline
from get_test import get_test
from baseline import Baseline
from metrics import precision_at_k
from scipy.sparse import load_npz, save_npz
from scipy.sparse import csr_matrix
from implicit.als import AlternatingLeastSquares

In [3]:
test = pd.read_csv('../datasets/test.csv.zip', compression='zip')
result = test.groupby('user_id')['song'].unique().reset_index().rename(columns={'song': 'actual'})

In [6]:
data_path = Path(os.getcwd()).parent / 'app' / 'data'

In [7]:
with open(data_path / 'userids.pkl', 'rb') as f:
    userids = pickle.load(f)

with open(data_path / 'itemids.pkl', 'rb') as f:
    itemids = pickle.load(f)

In [8]:
# user_item_matrix_ = load_npz('../app/data/train_sparse.npz')
# user_item_matrix_ = user_item_matrix_.astype('float')

In [9]:
user_item_matrix_ = load_npz('../app/data/train_user_item_matrix.npz')
user_item_matrix_ = user_item_matrix_.astype('float')

In [8]:
user_item_matrix_

<13607x10000 sparse matrix of type '<class 'numpy.float64'>'
	with 1938057 stored elements in Compressed Sparse Row format>

In [11]:
train = pd.read_csv('../datasets/train.csv.zip', sep=',', compression='zip')

In [None]:
train['dummy'] = 1

user_item_matrix = pd.pivot_table(train, 
                                  index='user_id', columns='song', 
                                  values='dummy',
                                  aggfunc='count', 
                                  fill_value=0)

user_item_matrix = user_item_matrix.astype('float')

In [11]:
# userids = user_item_matrix.index.values
# itemids = user_item_matrix.columns.values

In [35]:
# with open('../app/data/userids.pkl', 'wb') as f:
#     pickle.dump(userids, f)

In [36]:
# with open('../app/data/itemids.pkl', 'wb') as f:
#     pickle.dump(itemids, f)

In [12]:
sparse_matrix = user_item_matrix_

In [13]:
# sparse_matrix = csr_matrix(user_item_matrix).tocsr()

In [14]:
userid_to_id = {user_id: idx for idx, user_id in enumerate(userids)}
id_to_userid = {val: key for key, val in userid_to_id.items()}

item_to_id = {item_id: idx for idx, item_id in enumerate(itemids)}
id_to_item = {val: key for key, val in item_to_id.items()}

In [15]:
# def get_recommendations(model, user_id, user_items_matrix=sparse_matrix, n=5, *args, **kwargs):
#     recs = model.recommend(userid=userid_to_id[user_id], 
#                          user_items=user_items_matrix,   # на вход user-item matrix   
#                          N=n, # кол-во рекомендаций    
#                          filter_already_liked_items=False,   
#                          filter_items=None,   
#                          recalculate_user=True, 
#                          **kwargs)
    
#     return [id_to_item[rec[0]] for rec in recs]

In [25]:
user_item_matrix = sparse_matrix

In [29]:
def get_recommendations(model, user_id, n=5, **kwargs):
    user_index = userid_to_id[user_id]
    recs = model.recommend(
            userid=user_index, 
            user_items=user_item_matrix[user_index],   # на вход user-item matrix
            N=n, # кол-во рекомендаций
            filter_already_liked_items=False,   
            filter_items=None,   
            recalculate_user=False, 
            **kwargs
            )
    return [id_to_item[rec] for rec in recs[0]]

In [16]:
als_100 = AlternatingLeastSquares(factors=100, 
                              regularization=0.001,
                              iterations=15, 
                              calculate_training_loss=True, 
                              num_threads=4)

In [17]:
als_100.fit(sparse_matrix, show_progress=False)

In [21]:
result

Unnamed: 0,user_id,actual
0,00055176fea33f6e027cd3302289378b,"[Imagine Dragons__I Bet My Life, Meghan Traino..."
1,0007f3dd09c91198371454c608d47f22,"[Lana Del Rey__Black Beauty, Evanescence__My I..."
2,000b0f32b5739f052b9d40fcc5c41079,"[Veronica Maggio__Hela huset, First Aid Kit__S..."
3,000c11a16c89aa4b14b328080f5954ee,"[Bastille__Pompeii, Pharrell Williams__Know Wh..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Creedence Clearwater Revival__Long As I Can S...
...,...,...
13614,ffe32d5412269f3041c58cbf0dde3306,"[Diana Ross__I'm Coming Out, Aretha Franklin__..."
13615,ffec270eae226caa14ddaef291d73fff,[Walk the Moon__Shut Up and Dance]
13616,fff60baf392613ed33f745b89a9b38f7,"[Shaggy__Boombastic, Interpol__Everything Is W..."
13617,fff616055993498d6127f3f467cf9f2b,"[Matt and Kim__Let's Go, CAZZETTE__Sleepless -..."


In [30]:
get_recommendations(als_100, '00055176fea33f6e027cd3302289378b')

['Ed Sheeran__Thinking Out Loud',
 'Ed Sheeran__Sing',
 "Ed Sheeran__Don't",
 'Mark Ronson__Uptown Funk',
 'Ed Sheeran__I See Fire']

In [44]:
result = result[result['user_id'].isin(userids)]

In [31]:
result['als_100'] = result['user_id'].apply(lambda x: get_recommendations(als_100, x))

In [32]:
result

Unnamed: 0,user_id,actual,als_100
0,00055176fea33f6e027cd3302289378b,"[Demi Lovato__Give Your Heart A Break, Ed Shee...","[Ed Sheeran__Thinking Out Loud, Ed Sheeran__Si..."
1,0007f3dd09c91198371454c608d47f22,"[Ben Howard__Keep Your Head Up, Ed Sheeran__Sm...","[Ed Sheeran__The A Team, Ed Sheeran__I See Fir..."
2,000b0f32b5739f052b9d40fcc5c41079,[Lars Winnerbäck__Om du lämnade mig nu],"[First Aid Kit__Emmylou, Avicii__Wake Me Up, F..."
3,000c11a16c89aa4b14b328080f5954ee,[Arctic Monkeys__Why'd You Only Call Me When Y...,"[Lorde__Royals, Haim__The Wire, Mark Ronson__U..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Foster The People__Don't Stop (Color on the W...,"[Led Zeppelin__Immigrant Song, Led Zeppelin__W..."
...,...,...,...
13602,ffe32d5412269f3041c58cbf0dde3306,[U2__Even Better Than The Real Thing],"[Stevie Wonder__Signed, Sealed, Delivered (I'm..."
13603,ffec270eae226caa14ddaef291d73fff,[Afrojack__Ten Feet Tall],"[Calvin Harris__Under Control, Calvin Harris__..."
13604,fff60baf392613ed33f745b89a9b38f7,"[Belinda Carlisle__Heaven Is A Place On Earth,...","[Soft Cell__Tainted Love, Culture Club__Karma ..."
13605,fff616055993498d6127f3f467cf9f2b,"[Metric__Help I'm Alive, The Smiths__Asleep (2...","[Imagine Dragons__It's Time, Bon Iver__Skinny ..."


In [46]:
result

Unnamed: 0,user_id,actual,als_100
0,00055176fea33f6e027cd3302289378b,"[Imagine Dragons__I Bet My Life, Meghan Traino...","[Ed Sheeran__Thinking Out Loud, Mark Ronson__U..."
1,0007f3dd09c91198371454c608d47f22,"[Lana Del Rey__Black Beauty, Evanescence__My I...","[Ed Sheeran__The A Team, Ed Sheeran__I See Fir..."
2,000b0f32b5739f052b9d40fcc5c41079,"[Veronica Maggio__Hela huset, First Aid Kit__S...","[First Aid Kit__Emmylou, First Aid Kit__The Li..."
3,000c11a16c89aa4b14b328080f5954ee,"[Bastille__Pompeii, Pharrell Williams__Know Wh...","[Mark Ronson__Uptown Funk, Lorde__Royals, Haim..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Creedence Clearwater Revival__Long As I Can S...,"[Led Zeppelin__Whole Lotta Love, Led Zeppelin_..."
...,...,...,...
13614,ffe32d5412269f3041c58cbf0dde3306,"[Diana Ross__I'm Coming Out, Aretha Franklin__...","[R.E.M.__Losing My Religion, Nirvana__Smells L..."
13615,ffec270eae226caa14ddaef291d73fff,[Walk the Moon__Shut Up and Dance],"[Imagine Dragons__Radioactive, Imagine Dragons..."
13616,fff60baf392613ed33f745b89a9b38f7,"[Shaggy__Boombastic, Interpol__Everything Is W...","[Guns N' Roses__Sweet Child O' Mine, Guns N' R..."
13617,fff616055993498d6127f3f467cf9f2b,"[Matt and Kim__Let's Go, CAZZETTE__Sleepless -...","[The Rolling Stones__Gimme Shelter, The Rollin..."


In [40]:
result

Unnamed: 0,user_id,actual,als_100
0,00055176fea33f6e027cd3302289378b,"[Imagine Dragons__I Bet My Life, Meghan Traino...","[Ed Sheeran__Thinking Out Loud, Mark Ronson__U..."
1,0007f3dd09c91198371454c608d47f22,"[Lana Del Rey__Black Beauty, Evanescence__My I...","[Ed Sheeran__The A Team, Ed Sheeran__I See Fir..."
2,000b0f32b5739f052b9d40fcc5c41079,"[Veronica Maggio__Hela huset, First Aid Kit__S...","[First Aid Kit__Emmylou, First Aid Kit__The Li..."
3,000c11a16c89aa4b14b328080f5954ee,"[Bastille__Pompeii, Pharrell Williams__Know Wh...","[Mark Ronson__Uptown Funk, Lorde__Royals, Disc..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Creedence Clearwater Revival__Long As I Can S...,"[Led Zeppelin__Immigrant Song, Led Zeppelin__W..."
...,...,...,...
13614,ffe32d5412269f3041c58cbf0dde3306,"[Diana Ross__I'm Coming Out, Aretha Franklin__...","[Stevie Wonder__Signed, Sealed, Delivered (I'm..."
13615,ffec270eae226caa14ddaef291d73fff,[Walk the Moon__Shut Up and Dance],"[Calvin Harris__Under Control, Tiësto__Red Lig..."
13616,fff60baf392613ed33f745b89a9b38f7,"[Shaggy__Boombastic, Interpol__Everything Is W...","[Soft Cell__Tainted Love, Blondie__Call Me, Cu..."
13617,fff616055993498d6127f3f467cf9f2b,"[Matt and Kim__Let's Go, CAZZETTE__Sleepless -...","[Bon Iver__Holocene, Bon Iver__Skinny Love, Ba..."


In [57]:
import numpy as np

In [55]:
def precision_at_k(recommended_list, true_list, k=5) -> float:
    true_list = np.array(true_list)
    recommended_list = np.array(recommended_list)
    recommended_list = recommended_list[:k]
    
    flags = np.isin(true_list, recommended_list)
    precision = flags.sum() / len(recommended_list)
    
    return precision

In [35]:
result

Unnamed: 0,user_id,actual,als_100
0,00055176fea33f6e027cd3302289378b,"[Demi Lovato__Give Your Heart A Break, Ed Shee...","[Ed Sheeran__Thinking Out Loud, Ed Sheeran__Si..."
1,0007f3dd09c91198371454c608d47f22,"[Ben Howard__Keep Your Head Up, Ed Sheeran__Sm...","[Ed Sheeran__The A Team, Ed Sheeran__I See Fir..."
2,000b0f32b5739f052b9d40fcc5c41079,[Lars Winnerbäck__Om du lämnade mig nu],"[First Aid Kit__Emmylou, Avicii__Wake Me Up, F..."
3,000c11a16c89aa4b14b328080f5954ee,[Arctic Monkeys__Why'd You Only Call Me When Y...,"[Lorde__Royals, Haim__The Wire, Mark Ronson__U..."
4,00123e0f544dee3ab006aa7f1e5725a7,[Foster The People__Don't Stop (Color on the W...,"[Led Zeppelin__Immigrant Song, Led Zeppelin__W..."
...,...,...,...
13602,ffe32d5412269f3041c58cbf0dde3306,[U2__Even Better Than The Real Thing],"[Stevie Wonder__Signed, Sealed, Delivered (I'm..."
13603,ffec270eae226caa14ddaef291d73fff,[Afrojack__Ten Feet Tall],"[Calvin Harris__Under Control, Calvin Harris__..."
13604,fff60baf392613ed33f745b89a9b38f7,"[Belinda Carlisle__Heaven Is A Place On Earth,...","[Soft Cell__Tainted Love, Culture Club__Karma ..."
13605,fff616055993498d6127f3f467cf9f2b,"[Metric__Help I'm Alive, The Smiths__Asleep (2...","[Imagine Dragons__It's Time, Bon Iver__Skinny ..."


In [33]:
result.apply(lambda row: precision_at_k(row['actual'], row['als_100']), axis=1).mean()

0.10212390681266995

In [48]:
result.apply(lambda row: precision_at_k(row['als_100'], row['actual']), axis=1).mean()

0.0016588038350327198

In [42]:
result.apply(lambda row: precision_at_k(row['als_100'], row['actual']), axis=1).mean()

0.09997797195095087

In [37]:
with open('../app/models/als_100.pkl', 'wb') as f:
    pickle.dump(als_100, f)

In [53]:
save_npz('../data/train_user_item_matrix.npz', sparse_matrix)

In [50]:
als_256 = AlternatingLeastSquares(factors=256, 
                              regularization=0.001,
                              iterations=15, 
                              calculate_training_loss=True, 
                              num_threads=4)

In [51]:
als_256.fit(sparse_matrix.T, show_progress=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [52]:
result['als_256'] = result['user_id'].apply(lambda x: get_recommendations(als_256, x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  result['als_256'] = result['user_id'].apply(lambda x: get_recommendations(als_256, x))


In [59]:
result.apply(lambda row: precision_at_k(row['als_256'], row['actual']), axis=1).mean()

0.010439811292040784

In [48]:
result.apply(lambda row: precision_at_k(row['als_256'], row['actual']), axis=1).mean()

0.09225346941772525