In [1]:
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from sklearn.metrics import mean_squared_error
import os
import sys
import pickle
import itertools

sys.path.append('../Models')
sys.path.append('../')

import wrmf, wrmf_helpers
from settings import PATH_TO_SPARSE_MATRIX

# (1) Compute the wrmf factors 

In [2]:
"""
NOTE: ONLY RUN THIS IF YOU WANT TO RECOMPUTE WRMF WITH NEW PARAMS.
OTHERWISE USE PRECOMPUTED FACTORIZATION (STORED LOCALLY/IN DRIVE)
"""
# params = {
#     'factors': 20,
#     'reg': 0.1,
#     'iters': 20,
#     'alpha': 15
# }
# song_factors = wrmf_helpers.get_fitted_wrmf(PATH_TO_SPARSE_MATRIX, params)

'\nNOTE: ONLY RUN THIS IF YOU WANT TO RECOMPUTE WRMF WITH NEW PARAMS.\nOTHERWISE USE PRECOMPUTED FACTORIZATION (STORED LOCALLY/IN DRIVE)\n'

# (2) Get the song factorization 

In [3]:
with open('wrmf_song_factors.pickle', 'rb') as f:
    song_factors = pickle.load(f)
    
with open('wrmf_playlist_factors.pickle', 'rb') as f:
    playlist_factors = pickle.load(f)

print(song_factors.shape)
print(playlist_factors.shape)

(1610661, 20)
(500000, 20)


# (4) Example: get "n_similar" top songs for random seed songs 

In [4]:
# example: It Wasn't Me, by Shaggy and Ignition - Remix
song_ids = ['1lzr43nnXAijIGYnCT8M8H', '5dNfHmqgr128gMY2tc5CeJ']
top_matches = wrmf_helpers.get_top_similar_from_tracks(song_factors, song_ids, n_similar=10, verbose=True)


Recommended Songs for ["It Wasn't Me", 'Ignition - Remix']
------------------------------------------------------------
Track Name                                        Score
------------------------------------------------------------
Ignition - Remix                                  0.988348
It Wasn't Me                                      0.986976
Ride Wit Me                                       0.939032
Buy U a Drank (Shawty Snappin') (feat. Yung Joc)  0.910351
Remix to Ignition                                 0.905673
Playin' the Game                                  0.896503
Ms. Jackson                                       0.894765
Gold Digger                                       0.894054
It Wasn't Me                                      0.892494
Yeah!                                             0.889881


# (3) Example: get "n_similar" top songs for seed from actual test playlist

In [5]:
filename = '/Users/mabasta/Desktop/CS109a/playlist-generation/data/Songs/songs501.csv'
cols_ignore = ['pos', 'artist_name', 'artist_uri', 'track_name', 'album_uri', 'duration_ms', 'album_name']
df = pd.read_csv(filename).drop(columns = cols_ignore)

In [6]:
# print out the playlist
sample_playlists = [group.track_uri for _, group in df.groupby('pid')]
test_playlist = sample_playlists[0]
test_playlist = test_playlist.str.replace('spotify:track:', '')

for tid in test_playlist:
    print(wrmf_helpers.get_song_name(tid))

My Girl (feat. T. Gatez)
New Religion
Broad Shoulders (feat. Chance the Rapper)
Free
She Moves In Her Own Way
Don't Take the Money
that one song
Lovely
Always Like This
Twice
Caroline
Never Seen Anything "Quite Like You"
Sunshine & Kool-Aid
No Such Thing
Heart's Content
Smoke and Mirrors
Best For Last
90210 (feat. G-Eazy)
Spaces
Milk & Honey
Don't Dream It's Over
Lover Come Back
A Dream Is A Wish Your Heart Makes


In [7]:
# get similar using song to song
seed_ids = list(test_playlist[:2])
top_matches = wrmf_helpers.get_top_similar_from_tracks(
    song_factors,
    seed_ids,
    n_similar=20000,
    verbose=False
)
overlaps = set(test_playlist).intersection(set(top_matches))
print(f"Found {len(overlaps)} overlaps:")
for overlap in overlaps:
    print(wrmf_helpers.get_song_name(overlap))

Found 9 overlaps:
Twice
Smoke and Mirrors
that one song
Lovely
A Dream Is A Wish Your Heart Makes
My Girl (feat. T. Gatez)
New Religion
Milk & Honey
Caroline


In [8]:
# get similar using playlist to song
top_matches = wrmf_helpers.get_top_similar_from_playlists(
    song_factors,
    playlist_factors,
    seed_ids,
    n_similar_songs=20000,
    n_similar_playlists=200
)
overlaps = set(test_playlist).intersection(set(top_matches))
print(f"Found {len(overlaps)} overlaps:")
for overlap in overlaps:
    print(wrmf_helpers.get_song_name(overlap))

Found 17 overlaps:
Twice
She Moves In Her Own Way
New Religion
No Such Thing
Broad Shoulders (feat. Chance the Rapper)
that one song
Smoke and Mirrors
Heart's Content
Always Like This
Don't Dream It's Over
Free
My Girl (feat. T. Gatez)
90210 (feat. G-Eazy)
Sunshine & Kool-Aid
Milk & Honey
Caroline
Lover Come Back


In [9]:
# get similar using ensemble playlist and song

top_matches = wrmf_helpers.get_top_similar_from_ensemble(
    song_factors,
    playlist_factors,
    seed_ids,
    n_similar_songs=20000,
    n_similar_playlists=200
)
overlaps = set(test_playlist).intersection(set(top_matches))
print(f"Found {len(overlaps)} overlaps:")
for overlap in overlaps:
    print(wrmf_helpers.get_song_name(overlap))

Found 18 overlaps:
Twice
She Moves In Her Own Way
New Religion
No Such Thing
Broad Shoulders (feat. Chance the Rapper)
that one song
Smoke and Mirrors
Heart's Content
Always Like This
Don't Dream It's Over
Free
Never Seen Anything "Quite Like You"
My Girl (feat. T. Gatez)
90210 (feat. G-Eazy)
Sunshine & Kool-Aid
Milk & Honey
Caroline
Lover Come Back
