# Item Based Recommendation 

Recommendation based on track similarity. A track is similar to another one if it is listened by the same user.

In [1]:
import pandas as pd
import numpy as np
import sys
from scipy.sparse import *
from scipy import *
import pickle

In [2]:
# read training set
training = pd.read_csv("training.csv")[['user', 'track']]
training.head()

Unnamed: 0,user,track
0,000c556aa021426138c47076d53dfbec,spotify:track:4t1iATlAmtRUidtyJkX83E
1,000c556aa021426138c47076d53dfbec,spotify:track:6pb5BBnIM5IM7R1cqag6rE
2,000c556aa021426138c47076d53dfbec,spotify:track:4H9637mkUDyk9Rq0WgDEwc
3,000c556aa021426138c47076d53dfbec,spotify:track:2frPJPY60CPvMwESF6rEXk
4,000c556aa021426138c47076d53dfbec,spotify:track:5ikdUUm6JbnEVnp35c7dvy


In [3]:
tracks = training['track'].unique()
size = len(tracks)
#items = np.zeros((size,size))
items_sparse = csr_matrix( (size,size), dtype=int16 )
track_id_dict = {}
id_track_dict = {}
for i,track in enumerate(tracks):
    track_id_dict[track] = i
    id_track_dict[i] = track
    
with open('track_to_id.pickle', 'wb') as handle:
    pickle.dump(track_id_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)    
with open('id_to_track.pickle', 'wb') as handle:
    pickle.dump(id_track_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)        

In [4]:
user_dict = {}
groups = training.groupby('user')
for i, group in groups:
    user_id = group['user'].iloc[0]
    track_ids = set(group['track'])
    user_dict[user_id] = track_ids    

In [7]:
count = 0
for tracks in user_dict.values():
    track_ids = [track_id_dict[track] for track in tracks]
    sys.stdout.write("User: "+str(count)+" --- "+str(len(track_ids))+" tracks \r")
    sys.stdout.flush()
    count += 1
    if len(track_ids) < 500 and len(track_ids) > 25:
        for track_id in track_ids:
            #print(track_id_dict[track_id])
            row = array([track_id for i in track_ids])
            col = array(track_ids)
            data = array([1 for i in track_ids])
            acc = csr_matrix( (data,(row,col)), shape=(size,size), dtype=int16 )
            items_sparse += acc

User: 568 --- 1561 tracks  

In [8]:
#items_sparse.size
save_npz('items_sparse.npz', items_sparse)
#items_sparse.todense()
#len(find(items_sparse)[2])
#from scipy import sparse
#sparse = sparse.csr_matrix(items)

## Eval

In [2]:
training = pd.read_csv("training.csv")[['user', 'track']]
test = pd.read_csv("test.csv")[['user', 'track']]

In [3]:
test.head()

Unnamed: 0,user,track
0,000c556aa021426138c47076d53dfbec,spotify:track:6i5TjPi3OBBj4RhAIWOY0I
1,000c556aa021426138c47076d53dfbec,spotify:track:0QidGooAdHGlLqQNuyh9cY
2,000c556aa021426138c47076d53dfbec,spotify:track:2SWBfqj1FrS8t8z56G55rP
3,000c556aa021426138c47076d53dfbec,spotify:track:5TQ6L7Wzq1uY7xyvISGyzG
4,000c556aa021426138c47076d53dfbec,spotify:track:7ryorjLv0ReRSUmcwo659l


In [5]:
user_dict_train = {}
groups = training.groupby('user')
for i, group in groups:
    user_id = group['user'].iloc[0]
    track_ids = set(group['track'])
    user_dict_train[user_id] = track_ids 

In [10]:
user_dict_test = {}
groups = test.groupby('user')
for i, group in groups:
    user_id = group['user'].iloc[0]
    track_ids = set(group['track'])
    user_dict_test[user_id] = track_ids 

In [11]:
len(user_dict_test['000c556aa021426138c47076d53dfbec'])

268

In [12]:
len(user_dict_train['000c556aa021426138c47076d53dfbec'])

1068

In [13]:
tracks_train = user_dict_train['000c556aa021426138c47076d53dfbec']

In [19]:
tracks_test = user_dict_test['000c556aa021426138c47076d53dfbec']

In [15]:
import ibrec

In [41]:
tracks_result = ibrec.recommend(tracks_train, 50000)

In [42]:
len(tracks_result)

26285

In [43]:
def get_precision(result_query, result_given):
    if len(result_query) == 0:
        return 0
    else:
        return len(set(result_given).intersection(result_query))/len(result_query)
def get_recall(result_query, result_given):
    if len(result_given) == 0:
        return 0
    else:
        return len(set(result_given).intersection(result_query))/len(result_given)


precision = get_precision(tracks_result, tracks_test)
recall = get_recall(tracks_result, tracks_test)
    
#precision = get_precision([1, 2], [1, 2, 3, 4])
#recall = get_recall([1, 2], [1, 2, 3, 4])
    
print(precision)
print(recall)

0.0022446262126688227
0.22014925373134328
