In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.sparse as sps
import time
%matplotlib inline

#train_final.csv - the training set of interactions
train_final = pd.read_csv('input/train_final.csv', delimiter = "\t");

#tracks_final.csv - supplementary information about the items
tracks_final = pd.read_csv('input/tracks_final.csv', delimiter = "\t");

#playlists_final.csv - supplementary information about the users
playlists_final = pd.read_csv('input/playlists_final.csv', delimiter = "\t");

#target_playlists.csv - the set of target playlists that will receive recommendations
target_playlists = pd.read_csv('input/target_playlists.csv');

#target_tracks.csv - the set of target items (tracks) to be recommended
target_tracks = pd.read_csv('input/target_tracks.csv');


In [129]:
import time

In [3]:
#This step is not needed yet, will make ratings worse! 

def get_relevant_tracks()
    #Now we want to remove some redundant stuff. 

    #We will remove all songs which are not occurring more than 10 times in train_final
    #Nevertheless, we still want to keep all tracks which are in the target tracks.  

    popularity = train_final.groupby(by="track_id").playlist_id.nunique().to_frame()

    #remove index name
    popularity.reset_index(level = 0, inplace = True)

    #Rename the columns
    popularity.columns = ['track_id','occurrences']

    #Remove all targeted tracks - TESTED, working as expected
    tracks_relevant = popularity[~popularity['track_id'].isin(target_tracks['track_id'])]

    #Remove tracks occurring less than 10 times
    tracks_relevant = tracks_relevant[tracks_relevant['occurrences'] > 10]

    #Add the targeteted tracks back again
    tracks_relevant = pd.concat([tracks_relevant, target_tracks])

    return(tracks_relevant)

    print("Removed %s redundant tracks which occured less than 10 times." %(tracks_final-tracks_relevant))

tracks_relevant = get_relevant_tracks()

#Remove irrelevant tracks from train_final and tracks_final
train_final = train_final[train_final['track_id'].isin(tracks_relevant['track_id'])]

print("Train_final now contains %s interactions. " %(train_final.shape[0]))

tracks_final = tracks_final[tracks_final['track_id'].isin(tracks_relevant['track_id'])]

print("Tracks_final now contains %s tracks. "%(tracks_final.shape[0]))

(41756, 2)

<57561x41756 sparse matrix of type '<class 'numpy.float64'>'
	with 0 stored elements in Compressed Sparse Row format>

In [6]:
#Now lets take a look at the tags.
tracks_final.head()

Unnamed: 0,track_id,artist_id,duration,playcount,album,tags
0,2972914,144,224000,49.0,[7],"[54087, 1757, 1718, 116712, 189631]"
1,2750239,246,157000,1.0,[8],"[189631, 3424, 177424, 46208, 205245]"
2,1550729,144,217000,554.0,[9],"[54087, 109806, 46869, 183258, 54337]"
3,2169950,144,207000,200.0,[9],"[54087, 70618, 207003, 109806, 116712]"
4,1903709,144,198000,5.0,[None],"[54087, 81223, 116712, 215342, 71028]"


In [139]:
### Translating all content ids into indexes.

#We need to create buckets for the playcount and duration. 
#Lets create buckets and a help function for the duration. 

n_duration_buckets = 8
def duration_to_bucket(duration):
    if duration <= 0:
        print("Null duration reached bucket function. ")
        return None
    elif duration < 90000: #not a song
        return 1
    elif duration < 140000: #short song
        return 2
    elif duration < 220000: #radio song
        return 3
    elif duration < 340000: #normal song
        return 4
    elif duration < 480000: #long song
        return 5
    elif duration < 720000: #really long
        return 6
    elif duration < 1200000: #super long
        return 7
    elif duration > 1200000: #mixtape/compilation
        return 8

n_playcount_buckets = 7
def playcount_to_bucket(playcount):
    if playcount <= 0 or playcount is None:
        print("Null playcount reached bucket function. ")
        return None
    elif playcount < 254: #0,4 percentile not popular
        return 1
    elif playcount < 881: #0,6 perc: known
        return 2
    elif playcount < 1560: #0,7 popular
        return 3
    elif playcount < 2808: #0,8 very popular
        return 4
    elif playcount < 5900: #0,9 hits
        return 5
    elif playcount < 10494: #0,95 super hits
        return 6
    elif playcount > 10494: # mega hits
        return 7


In [144]:
tracks_final['tags'].head()

content_to_index = {}
content_to_id = {}
content_counter = 0

#Lets translate the tags to indexes. 
for row in tracks_final['tags']:
    tags = row.strip('[ ]').split(', ')
    for tag in tags:
        if len(tag) > 0: 
            tag = "t"+tag
            if not(tag in content_to_index):
                content_to_index[tag] = content_counter
                content_to_id[content_counter] = tag
                content_counter += 1;
                
#Lets translate album into indexes
for album in tracks_final['album']:
    album = album.strip('[ ]')
    if album != None and len(album) > 0: #None should not be considered content
        album = "al"+album
        if not(album in content_to_index):
            content_to_index[album] = content_counter
            content_to_id[content_counter] = album
            content_counter += 1;

#Lets translate artist_id into indexes
for artist in tracks_final['artist_id']:
    artist = str(artist)
    artist = "ar"+artist
    if not(artist in content_to_index):
        content_to_index[artist] = content_counter
        content_to_id[content_counter] = artist
        content_counter += 1;
        

#Lets translate the duration buckets into indexes. 
for bucket in range(n_duration_buckets): 
    bucket = "d"+str(bucket)
    content_to_index[bucket] = content_counter
    content_to_id[content_counter] = bucket
    
    content_counter += 1

#Lets translate the playcount buckets into indexes. 
for playcount in range(n_playcount_buckets): 
    playcount = "p"+str(playcount)
    content_to_index[playcount] = content_counter
    content_to_id[content_counter] = playcount
    
    content_counter += 1

#Fun thing to try: can I add all duration/playcounts in one col, normalizing from 0-1? 

print(len(content_to_index))

77056


In [45]:
#If we translate each track_id to a track_index which will serve as matrix index, we can save a lot of time. 


#We need a way to get from track_id to index in O(1).
#Let's create a dictionary

track_to_id = {}
track_to_index = {}
counter = 0; #We will start at 1, reserving col 0 for indexes.  
for track_id in tracks_final['track_id']:
    track_id = int(track_id)
    track_to_index[track_id] = counter
    track_to_id[counter] = track_id
    counter += 1;
    
#and a way to get from playlist_id to index in O(1)


playlist_to_index = {}
playlist_to_id = {}
counter = 0; 
for playlist_id in playlists_final['playlist_id']:
    playlist_id = int(playlist_id)
    playlist_to_index[playlist_id] = counter
    playlist_to_id[counter] = playlist_id
    counter += 1;
    
print("We have {} playlists with {} unique tracks with {} unique content types. ".format(len(playlist_to_index), len(track_to_index), len(content_to_index)))


We have 57561 playlists with 100000 unique tracks with 77041 unique content types. 


In [46]:
#Now we can create an Item Content Matrix. 

#ICM_all = np.zeros((len(tracks_indexes), len(tags_indexes)), int)
#ICM_all = sps.coo_matrix((len(track_to_index), len(content_to_index)), int)
#print(ICM_all.shape)


(100000, 77041)


In [143]:
tracks_final[:10]

Unnamed: 0,track_id,artist_id,duration,playcount,album,tags
0,2972914,144,224000,49.0,[7],"[54087, 1757, 1718, 116712, 189631]"
1,2750239,246,157000,1.0,[8],"[189631, 3424, 177424, 46208, 205245]"
2,1550729,144,217000,554.0,[9],"[54087, 109806, 46869, 183258, 54337]"
3,2169950,144,207000,200.0,[9],"[54087, 70618, 207003, 109806, 116712]"
4,1903709,144,198000,5.0,[None],"[54087, 81223, 116712, 215342, 71028]"
5,2256817,144,218000,2.0,[9],"[54087, 109806, 189631, 49166, 116712]"
6,2561768,928,223000,249.0,[26],"[50764, 4425, 11056, 205245, 81223]"
7,474864,928,193000,73.0,[22],"[205245, 81223, 11056, 267, 3982]"
8,1378455,928,304000,73.0,[22],"[11056, 205245, 81223, 189631, 84597]"
9,1523190,928,206000,10.0,[22],"[205245, 11056, 81223, 4425, 189631]"


In [105]:
#So let's fill the ICM with our data.

def build_ICM():
    tracks_matrix = tracks_final.as_matrix()
    rows = np.array([], dtype = int)
    cols = np.array([], dtype = int)
    #val[i] = value of row[i] col[i]
    #val = []
    counter = 0
    starttime = time.time()
    lasttime = starttime
    for track in tracks_matrix: 
        track_id, artist_id, duration, playcount, album, tags = np.split(track, 6)

        #Get track index
        track_index = track_to_index[int(track_id[0])]


        #add artist
        artist_index = content_to_index["ar"+str(artist_id[0])]

        rows = np.append(rows, track_index)
        cols = np.append(cols, artist_index)

        #add album
        album = album[0].strip("[ ]")
        if album != None and len(album) > 0 and not album == ("None"):
            album_index = content_to_index["al"+album]

            rows = np.append(rows, track_index)
            cols = np.append(cols, album_index)

        #add tags
        tags = tags[0].strip('[ ]').split(', ')

        for tag in tags: 
            if len(tag) > 0:
                tag = "t"+tag
                tag_index = content_to_index[tag]

                rows = np.append(rows, track_index)
                cols = np.append(cols, tag_index)

        if counter%5000 == 0:
            print("Track %s of %s. %s ms per track." %(counter, tracks_matrix.shape[0], (time.time()-lasttime)/5000)  
            lasttime = time.time()-lasttime
        counter += 1

        #add duration
        duration = int(duration)
        if duration > 0:
            duration_bucket = duration_to_bucket(duration)
            duration_index = content_to_index["d"+duration_bucket]

            rows = np.append(rows, track_index)
            cols = np.append(cols, duration_index)

        #add playcount
        playcount = int(playcount)
        if playcount > 0:
            playcount_bucket = playcount_to_bucket(playcount)
            playcount_index = content_to_index["p"+playcount_bucket]

            rows = np.append(rows, track_index)
            cols = np.append(cols, playcount_index)

    #Implicit ratings: all ratings are 1.             
    val = np.ones(rows.shape, dtype = int)

    #Build ICM matrix. 
    ICM_all = sps.coo_matrix((val, (rows, cols)), dtype = int)
    
    return ICM_all
    print("Built ICM matrix with %s content values." %(val.shape[0]))

#Build new ICM
#ICM_all = build_ICM()
                  
#Get old ICM

Track 0 of 100000
Track 500 of 100000
Track 1000 of 100000
Track 1500 of 100000
Track 2000 of 100000
Track 2500 of 100000
Track 3000 of 100000
Track 3500 of 100000
Track 4000 of 100000
Track 4500 of 100000
Track 5000 of 100000
Track 5500 of 100000
Track 6000 of 100000
Track 6500 of 100000
Track 7000 of 100000
Track 7500 of 100000
Track 8000 of 100000
Track 8500 of 100000
Track 9000 of 100000
Track 9500 of 100000
Track 10000 of 100000
Track 10500 of 100000
Track 11000 of 100000
Track 11500 of 100000
Track 12000 of 100000
Track 12500 of 100000
Track 13000 of 100000
Track 13500 of 100000
Track 14000 of 100000
Track 14500 of 100000
Track 15000 of 100000
Track 15500 of 100000
Track 16000 of 100000
Track 16500 of 100000
Track 17000 of 100000
Track 17500 of 100000
Track 18000 of 100000
Track 18500 of 100000
Track 19000 of 100000
Track 19500 of 100000
Track 20000 of 100000
Track 20500 of 100000
Track 21000 of 100000
Track 21500 of 100000
Track 22000 of 100000
Track 22500 of 100000
Track 23000 

In [106]:
#Save the ICM

sps.save_npz("Saved Matrixes/ICM_all_coo", ICM_all)
print("Saved ICM!")

Saved ICM!


In [107]:
#Let's convert to csr. 
ICM_all = ICM_all.tocsr()
print("Converted")

Converted


In [None]:
#Lets build a URM matrix. Should be nofplaylists x nofitems

item_playlist_matrix = np.zeros([playlists_final.shape[0], tracks_final.shape[0]],int) 

interactions = train_final.as_matrix()
for row in interactions:
    #Lets get the info
    playlist_id = row[0]
    track_id = row[1]
    
    #Now lets get the proper indexes. 
    playlist_index = playlist_to_index[playlist_id]
    track_index = track_to_index[track_id]
    
    #And now lets add it to the matrix
    item_playlist_matrix[playlist_index][track_index] = 1
    

print(item_playlist_matrix.shape)
print("hej1")

In [116]:
def build_URM(train_test_split = 0.80):
    #Builds urm 
    
    numInteractions = train_final.shape[0]


    train_mask = np.random.choice([True,False], numInteractions, [train_test_split, 1-train_test_split])

    playlistList = train_final['playlist_id'].values
    itemList = train_final['track_id'].values

    #Translate ids
    playlistList_translated = np.zeros(playlistList.shape)
    itemList_translated = np.zeros(itemList.shape)
    ratingList = np.ones((playlistList.shape), int)
    
    for i in range(train_final.shape[0]):
        playlistList_translated[i] = playlist_to_index[playlistList[i]]
        itemList_translated[i] = track_to_index[itemList[i]]
    print("Translated ids to indexes.")
    
    #Build URM matrix. 
    URM_train = sps.coo_matrix((ratingList[train_mask], (playlistList_translated[train_mask], itemList_translated[train_mask])))
    URM_train = URM_train.tocsr()
    print("Built URM_train")
    
    
    #Build URM_test
    test_mask = np.logical_not(train_mask)
    URM_test = sps.coo_matrix((ratingList[test_mask], (playlistList[test_mask], itemList[test_mask])))
    URM_test = URM_test.tocsr()
    print("Built URM_test")
    
    return URM_train, URM_test

URM_train, URM_test = build_URM(1)

Translated ids to indexes.
Built URM_train
Built URM_test


In [149]:
#Evaluation functions

def precision(recommended_items, relevant_items):
    
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    precision_score = np.sum(is_relevant, dtype=np.float32) / len(is_relevant)
    
    return precision_score

def recall(recommended_items, relevant_items):
    
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    recall_score = np.sum(is_relevant, dtype=np.float32) / relevant_items.shape[0]
    
    return recall_score

def MAP(recommended_items, relevant_items):
   
    is_relevant = np.in1d(recommended_items, relevant_items, assume_unique=True)
    
    # Cumulative sum: precision at 1, at 2, at 3 ...
    p_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))
    
    map_score = np.sum(p_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])

    return map_score

def evaluate_algorithm(URM_test, recommender_object, recommendations, at=5):
    
    cumulative_precision = 0.0
    cumulative_recall = 0.0
    cumulative_MAP = 0.0
    
    num_eval = 0
    
    playlists = target_playlists['playlist_id']

    for i,user_id in  enumerate(playlists):
        
        if i % 500 == 0:
            print("User %d of %d" % (i, len(playlists)))

        relevant_items = URM_test[user_id].indices
        
        if len(relevant_items)>0:
            
            recommended_items = recommender_object.recommend(user_id, at=at)
            num_eval+=1

            cumulative_precision += precision(recommended_items, relevant_items)
            cumulative_recall += recall(recommended_items, relevant_items)
            cumulative_MAP += MAP(recommended_items, relevant_items)


    cumulative_precision /= num_eval
    cumulative_recall /= num_eval
    cumulative_MAP /= num_eval
    
    print("Recommender performance is: Precision = {:.4f}, Recall = {:.4f}, MAP = {:.4f}".format(
        cumulative_precision, cumulative_recall, cumulative_MAP))



In [None]:
class myRecommender(object):
    def __init__(self, URM, k=50, shrinkage=100, similarity='cosine'):
        self.dataset = URM
        self.k = k
        self.shrinkage = shrinkage
        self.similarity_name = similarity
        if similarity == 'cosine':
            self.distance = Cosine(shrinkage=self.shrinkage)
        elif similarity == 'pearson':
            self.distance = Pearson(shrinkage=self.shrinkage)
        elif similarity == 'adj-cosine':
            self.distance = AdjustedCosine(shrinkage=self.shrinkage)
        else:
            raise NotImplementedError('Distance {} not implemented'.format(similarity))


In [146]:
class BasicItemKNNRecommender(object):
    """ ItemKNN recommender with cosine similarity and no shrinkage"""

    def __init__(self, URM, k=50, shrinkage=100, similarity='cosine'):
        self.dataset = URM
        self.k = k
        self.shrinkage = shrinkage
        self.similarity_name = similarity
        if similarity == 'cosine':
            self.distance = Cosine(shrinkage=self.shrinkage)
        elif similarity == 'pearson':
            self.distance = Pearson(shrinkage=self.shrinkage)
        elif similarity == 'adj-cosine':
            self.distance = AdjustedCosine(shrinkage=self.shrinkage)
        else:
            raise NotImplementedError('Distance {} not implemented'.format(similarity))

    def __str__(self):
        return "ItemKNN(similarity={},k={},shrinkage={})".format(
            self.similarity_name, self.k, self.shrinkage)

    def fit(self, X):
        item_weights = self.distance.compute(X)
        
        item_weights = check_matrix(item_weights, 'csr') # nearly 10 times faster
        print("Converted to csr")
        
        # for each column, keep only the top-k scored items
        # THIS IS THE SLOW PART, FIND A BETTER SOLUTION        
        values, rows, cols = [], [], []
        nitems = self.dataset.shape[1]
        for i in range(nitems):
            if (i % 10000 == 0):
                print("Item %d of %d" % (i, nitems))
                
            this_item_weights = item_weights[i,:].toarray()[0]
            top_k_idx = np.argsort(this_item_weights) [-self.k:]
                       #This shit is wrong 
            values.extend(this_item_weights[top_k_idx])
            rows.extend(np.arange(nitems)[top_k_idx])
            cols.extend(np.ones(self.k) * i)
            
        self.W_sparse = sps.csc_matrix((values, (rows, cols)), shape=(nitems, nitems), dtype=np.float32)

    def recommend(self, user_id, at=None, exclude_seen=True):
        # compute the scores using the dot product
        user_profile = self.dataset[user_id]
        scores = user_profile.dot(self.W_sparse).toarray().ravel()

        # rank items
        ranking = scores.argsort()[::-1]
        if exclude_seen:
            ranking = self._filter_seen(user_id, ranking)
            
        export = [0,0,0,0,0]
        for i in range(5):
            t_id = track_to_id[ranking[i]]
            export[i] = t_id
            
        return export
    def _filter_seen(self, user_id, ranking):
        user_profile = self.dataset[user_id]
        seen = user_profile.indices
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]
    
print("done")

done


In [119]:
def check_matrix(X, format='csc', dtype=np.float32):
    if format == 'csc' and not isinstance(X, sps.csc_matrix):
        return X.tocsc().astype(dtype)
    elif format == 'csr' and not isinstance(X, sps.csr_matrix):
        return X.tocsr().astype(dtype)
    elif format == 'coo' and not isinstance(X, sps.coo_matrix):
        return X.tocoo().astype(dtype)
    elif format == 'dok' and not isinstance(X, sps.dok_matrix):
        return X.todok().astype(dtype)
    elif format == 'bsr' and not isinstance(X, sps.bsr_matrix):
        return X.tobsr().astype(dtype)
    elif format == 'dia' and not isinstance(X, sps.dia_matrix):
        return X.todia().astype(dtype)
    elif format == 'lil' and not isinstance(X, sps.lil_matrix):
        return X.tolil().astype(dtype)
    else:
        return X.astype(dtype)

In [120]:
import scipy
class ISimilarity(object):
    """Abstract interface for the similarity metrics"""

    def __init__(self, shrinkage=10):
        self.shrinkage = shrinkage

    def compute(self, X):
        pass


class Cosine(ISimilarity):
    def compute(self, X):
        # convert to csc matrix for faster column-wise operations
        X = check_matrix(X, 'csc', dtype=np.float32)

        # 1) normalize the columns in X
        # compute the column-wise norm
        # NOTE: this is slightly inefficient. We must copy X to compute the column norms.
        # A faster solution is to  normalize the matrix inplace with a Cython function.
        Xsq = X.copy()
        Xsq.data **= 2
        norm = np.sqrt(Xsq.sum(axis=0))
        norm = np.asarray(norm).ravel()
        norm += 1e-6
        # compute the number of non-zeros in each column
        # NOTE: this works only if X is instance of sparse.csc_matrix
        col_nnz = np.diff(X.indptr)
        # then normalize the values in each column
        X.data /= np.repeat(norm, col_nnz)
        print("Normalized")

        # 2) compute the cosine similarity using the dot-product
        dist = X * X.T
        print("Computed")
        
        # zero out diagonal values
        dist = dist - sps.dia_matrix((dist.diagonal()[scipy.newaxis, :], [0]), shape=dist.shape)
        print("Removed diagonal")
        
        # and apply the shrinkage
        if self.shrinkage > 0:
            dist = self.apply_shrinkage(X, dist)
            print("Applied shrinkage")    
        
        return dist

    def apply_shrinkage(self, X, dist):
        # create an "indicator" version of X (i.e. replace values in X with ones)
        X_ind = X.copy()
        X_ind.data = np.ones_like(X_ind.data)
        # compute the co-rated counts
        co_counts = X_ind * X_ind.T
        # remove the diagonal
        co_counts = co_counts - sps.dia_matrix((co_counts.diagonal()[scipy.newaxis, :], [0]), shape=co_counts.shape)
        # compute the shrinkage factor as co_counts_ij / (co_counts_ij + shrinkage)
        # then multiply dist with it
        co_counts_shrink = co_counts.copy()
        co_counts_shrink.data += self.shrinkage
        co_counts.data /= co_counts_shrink.data
        dist.data *= co_counts.data
        return dist


In [121]:
URM_train.shape

(57560, 100000)

In [None]:
#TODO: Can we create a "better" URM by matrix factorization? 
from lightfm import LightFM

model = LightFM(no_components=5)

model.fit(URM_train, epochs=20)

predictions = model.predict(target_playlists[0:10], target_tracks[0:1000])






In [None]:
print(predictions)

In [130]:
starttime = time.time()
rec = BasicItemKNNRecommender(URM=URM_train, shrinkage=0.0, k=50)
rec.fit(ICM_all)
print("Done in %s seconds" %(time.time()-starttime))

Normalized
Computed
Removed diagonal
Converted to csr
Item 0 of 100000
Item 10000 of 100000
Item 20000 of 100000
Item 30000 of 100000
Item 40000 of 100000
Item 50000 of 100000
Item 60000 of 100000
Item 70000 of 100000
Item 80000 of 100000
Item 90000 of 100000
Done in 683.4752779006958 seconds


In [26]:
num_tot_items = ICM_all.shape[0]

# let's count how many items have a certain feature
items_per_feature = (ICM_all > 0).sum(axis=0)

IDF = np.array(np.log(num_tot_items / items_per_feature))[0]

print(ICM_all.shape)
print(IDF.shape)

(100000, 31900)
(31900,)


  


In [34]:
ICM_idf = sps.csr_matrix(ICM_all, dtype=np.float64)
# compute the number of non-zeros in each col
# NOTE: this works only if X is instance of sparse.csc_matrix
col_nnz = np.diff(check_matrix(ICM_idf, 'csc').indptr)
print(col_nnz.shape)
print(ICM_idf.shape)
print(IDF.shape)
# then normalize the values in each col
ICM_idf.data *= np.repeat(IDF, col_nnz)

(31900,)
float64
float64


In [36]:
rec_idf = BasicItemKNNRecommender(URM=URM_train, shrinkage=0.0, k=50)
rec_idf.fit(ICM_idf)

Normalized
Computed
Removed diagonal
Converted to csr
Item 0 of 100000
Item 10000 of 100000
Item 20000 of 100000
Item 30000 of 100000
Item 40000 of 100000
Item 50000 of 100000
Item 60000 of 100000
Item 70000 of 100000
Item 80000 of 100000
Item 90000 of 100000


In [131]:
zeros = np.zeros((target_playlists.size, 6), dtype = int)
recommendations = pd.DataFrame(zeros)
recommendations.columns = ['playlist_id', 1, 2, 3, 4, 5]

counter = 0
starttime = time.time()
for playlist_id in target_playlists['playlist_id']:
    
    if counter % 1000 == 0: 
        print ("%s out of 10000 playlists, %s sec." %(counter, time.time()-starttime))
    
    
    playlist_id_translated = playlist_to_index[int(playlist_id)]
    #print(rec.recommend(playlist_id_translated, at=5))
    recommendations.iloc[counter, 1:6] = rec.recommend(playlist_id_translated, at=5)
    recommendations.iloc[counter, 0] = playlist_id
    counter += 1
    
#print(recommendations)
print("done")

0 out of 10000 playlists, 0.0018591880798339844 sec.
1000 out of 10000 playlists, 103.81256532669067 sec.
2000 out of 10000 playlists, 207.41671419143677 sec.
3000 out of 10000 playlists, 309.9324781894684 sec.
4000 out of 10000 playlists, 412.0452392101288 sec.
5000 out of 10000 playlists, 515.3428280353546 sec.
6000 out of 10000 playlists, 618.8582541942596 sec.
7000 out of 10000 playlists, 721.2632591724396 sec.
8000 out of 10000 playlists, 820.9152603149414 sec.
9000 out of 10000 playlists, 923.4950971603394 sec.
done


In [150]:
evaluate_algorithm(URM_test, rec)

User 0 of 10000


IndexError: index (10024884) out of range

In [132]:
def save_to_file():
    #Saves the recommendations dataframe to the .csv-file. 
    np.savetxt("output/recommendations_more_content.csv",recommendations, fmt = '%s,%s %s %s %s %s', header = "playlist_id,track_ids", newline = "\n")
    
    
def test():
    #Do something
    print("Result: ")
    pass


save_to_file()
print(recommendations.head)

<bound method NDFrame.head of       playlist_id        1        2        3        4        5
0        10024884  1637241  1432851  3789197   820484   435345
1        10624787  2969077  3719846  1629279  1980691   680692
2         4891851  2089117   301240  2724257  1371741   193077
3         4267369  2950102  1838583  1820820  1461310  3276967
4           65078   949178  1492640  3276761   631238  2863566
5        10637124  2340644  3846549  2582934   273502  3032626
6         3223162  2967703  3610791  1254269   681739  1526301
7         7541503   956454  1480755  4591425  1825510  1825990
8         6189367  1675280     8795  1510163  1729189  3205586
9         8459943  2222883   802763  2214075  2752335  1334909
10       10138804  1155458  2935861  3866159  1887656  2137341
11       10562075  4420433  3299694  1308886  1210562  3624009
12       10184821  1056611  2127102   377629  3781394  1868817
13        4189678  2664733   362133  1402667  2664942  2215412
14        6299524   19577