In [1]:
import numpy as np
import pandas as pd

import faiss
from cornac import Experiment
from cornac.eval_methods import RatioSplit
from cornac.models import BPR
from cornac.metrics import Precision, Recall

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
pd.options.mode.chained_assignment = None

In [3]:
df = pd.read_csv("../data/spotify_playlists.csv")

In [4]:
playlist_df = df[['playlist_id', 'track_id']]
playlist_df['feedback'] = 1
playlist_df = playlist_df.drop_duplicates()

data = playlist_df.values.tolist()

In [5]:
%%time

rs = RatioSplit(data=data, test_size=0.2, rating_threshold=0.5)

bpr = BPR(k=300)
metrics = [Precision(k=10), Recall(k=10)]

Experiment(eval_method=rs, models=[bpr], metrics=metrics, 
           user_based=True, save_dir="../logs").run()


TEST:
...
    | Precision@10 | Recall@10 | Train (s) | Test (s)
--- + ------------ + --------- + --------- + --------
BPR |       0.0069 |    0.0083 |   33.0046 | 293.3275

CPU times: user 5min 21s, sys: 3.8 s, total: 5min 25s
Wall time: 5min 27s


In [6]:
index = faiss.IndexFlat(bpr.i_factors.shape[1])   
index.add(bpr.i_factors)          
print(index.ntotal)

142543


### Validation

In [7]:
tracks_df = df[['track_id', 'artist_name', 'track_name']].drop_duplicates()
id2name = {r['track_id']: [r['artist_name'], r['track_name']] for i, r in tracks_df.iterrows()}

idx2name = {v: id2name[u] for u, v in bpr.train_set.iid_map.items()}

In [8]:
def search_track_id(artist, track):
    return df[df['artist_name'].str.contains(artist) & 
              df['track_name'].str.contains(track)].iloc[0]['track_id']


def similar_tracks(artist, track, topn=10):
    target_id = search_track_id(artist, track)
    target_idx = bpr.train_set.iid_map[target_id]
    dist, idx = index.search(np.array([bpr.i_factors[target_idx]]), topn + 1)
    similar_idx = [i for i in idx[0] if i != target_idx]
    return [idx2name[s] for s in similar_idx]

In [9]:
similar_tracks("The xx", "Intro")

[['The Chainsmokers', 'Closer'],
 ['Migos', 'Bad and Boujee (feat. Lil Uzi Vert)'],
 ['Diljit Dosanjh', 'Do You Know'],
 ['Gülben Ergen', 'Şıkır Şıkır'],
 ['Post Malone', 'Congratulations'],
 ['Audio Adrenaline', 'Love Was Stronger'],
 ['Feder', 'Blind (feat. Emmi) - Radio Edit'],
 ['Khalid', 'Location'],
 ['Plastilina Mosh', 'Pervert Pop Song'],
 ['Justin Bieber', 'Sorry']]

In [10]:
similar_tracks("Daft Punk", "One More Time")

[['Kendrick Lamar', 'HUMBLE.'],
 ['Nathan Sykes', 'Give It Up'],
 ['Aminé', 'Caroline'],
 ['The Chainsmokers', 'Closer'],
 ['Lil Uzi Vert', 'XO TOUR Llif3'],
 ['R. Kelly', 'Ignition - Remix'],
 ['Childish Gambino', 'IV. sweatpants'],
 ['Diamond Rings', 'Day & Night'],
 ['Flume', 'Depth Charge'],
 ['Simon & Garfunkel', 'Sparrow']]

In [11]:
similar_tracks("Nirvana", "Come As You Are")

[['Kendrick Lamar', 'HUMBLE.'],
 ['X Ambassadors', 'Unsteady'],
 ['Chris Knight', "It Ain't Easy Being Me"],
 ['Drake', 'One Dance'],
 ['The Chainsmokers', 'Closer'],
 ['7eventh Time Down', 'God Is on the Move'],
 ['112', 'Just A Little While'],
 ['Sophie Hunger', 'Die Ganze Welt'],
 ['Logic', '1-800-273-8255'],
 ['Black Lips', 'Modern Art']]

In [12]:
similar_tracks("Beatles", "Yesterday")

[['Misery Index', 'You Lose'],
 ['Mike WiLL Made-It', 'Game For A Lame'],
 ['KYLE', 'iSpy (feat. Lil Yachty)'],
 ['Genesis', 'Ripples - 2007 - Remaster'],
 ['The Velvet Underground', 'Heroin'],
 ['Iggy Grande', 'Get Your Freak On - Trap Mix'],
 ['MYNGA', 'Back Home'],
 ['Nightmares On Wax', 'Hear in colour'],
 ['Mike Posner', 'I Took A Pill In Ibiza - Seeb Remix'],
 ['Monsters Of Folk', 'Termazcal']]

In [13]:
similar_tracks("Olive", "You're Not Alone")

[['The Chainsmokers', 'Closer'],
 ['Drake', 'One Dance'],
 ['Ana Tijoux', 'Somos Sur'],
 ['Florida Georgia Line', 'H.O.L.Y.'],
 ['Calvin Harris', 'This Is What You Came For'],
 ['Justin Timberlake', 'SexyBack'],
 ['Post Malone', 'Congratulations'],
 ['Kendrick Lamar', 'HUMBLE.'],
 ['Gryffin', 'Love In Ruins (feat. Sinead Harnett)'],
 ['Maxwell', 'Bad Habits - uncut']]