In [2]:
#data
import numpy as np
import numpy_indexed as npi
import pandas as pd
from scipy.spatial.distance import cdist
from sklearn.externals import joblib
from keras.models import load_model
#visualization
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
#APIs
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
#API Keys
import api_keys
#Utils
from tqdm import tqdm_notebook as tqdm
from IPython.display import display

In [4]:
#get model, y-j transformer, std-scaler, PCA, and RNN model
model = load_model('cloud/model.h5')
std_scaler = joblib.load('scalers/std_scaler.pkl')
y_j = joblib.load('scalers/yeo_johnson.pkl')
pca = joblib.load('scalers/pca.pkl')

features = [
    'mode',
    'acousticness',
    'danceability',
    'energy',
    'instrumentalness',
    'liveness',
    'loudness',
    'speechiness',
    'valence',
] 

In [5]:
#Authenticate
sp = spotipy.Spotify(
    client_credentials_manager = SpotifyClientCredentials(
        client_id=api_keys.CLIENT_ID,
        client_secret=api_keys.CLIENT_SECRET
    )
) 

In [9]:
class Playlist():
    playlists = {}
    def __init__(self):
        self.name = input('Playlist Name:')
        Playlist.playlists[self.name] = self
        self.init_song_strings = []
        self.search_results = []
        self.recommended_track_ids = [] #list of track ids straight from spotify
        self.trax = [] #all tracks as dict
        self.df = None #this is where the data goes
        self.playlist = None
        
        #input your songs
        self.init_song_strings.append(input('Song 1: '))
        self.init_song_strings.append(input('Song 2: '))
        self.init_song_strings.append(input('Song 3: '))
        
        # DO EVERYTHING
        self.get_recommendations()
        self.get_features()
        self.transform()
        self.build_playlist() 
        self.show_playlist() 
        self.do_pca()
        
    def get_recommendations(self):
        print('Getting Recommendations...')
        for ss in self.init_song_strings:
            r = sp.search(ss,limit=1)['tracks']['items'][0]
            self.search_results.append({
                        'id':r['id'],
                        'artists':[i['name'] for i in r['artists']],
                        'name':r['name']
                })
        for id_ in tqdm(self.search_results):
            #append to self.recommended_track_ids
            results = sp.recommendations(seed_tracks = [id_['id']],limit=100)
            for r in results['tracks']:
                if r['id'] not in [i['id'] for i in self.recommended_track_ids]:
                    self.recommended_track_ids.append({
                        'id':r['id'],
                        'artists':[i['name'] for i in r['artists']],
                        'name':r['name']
                        }) #there may be duplicates
    
    def get_features(self):
        print('Getting Initial Song Features')
        for id_ in tqdm(self.search_results):
            dict_ = sp.audio_features(id_['id'])[0]
            dict_.update(id_)
            self.trax.append(dict_)
        print('Getting Recommended Song Features')
        for id_ in tqdm(self.recommended_track_ids):
            dict_ = sp.audio_features(id_['id'])[0]
            dict_.update(id_)
            self.trax.append(dict_)
    
    def transform(self):
        '''
        rehape and then apply yeo-johnson and standard scaler transformations - 
        these are already trained above and are global vars
        '''
        print('Applying Transformations...')

        columns = ['id','artists','name','tempo','time_signature','key',] + features
        self.df = pd.DataFrame(self.trax)[columns].dropna()
        self.df[features[1:]] = std_scaler.transform(y_j.transform(self.df[features[1:]]))
        self.playlist = self.df.iloc[0:3].copy()
        
    def rnn_predict(self):
        return model.predict(np.array(
            [np.array(
                self.playlist[features]
            )]
        ))[0,-1]
    
    @staticmethod
    def tempo_similarity(t1,t2):
        if t1 <= 0:
             return -1
        t2 *= (t2 > 0)
        return np.cos(2*np.pi*np.log2(t1/t2))
    
    @staticmethod
    def key_similarity(s1,s2):
        '''between -1 and 1, 1 being most similar
        uses the circle of fifths - optimized for broadcasting
        perfect fifth and same octave are given values of 1.
        s1 - a single song (row of dataframe)
        s2 - many songs (multiple rows of dataframe)
        '''
        #get relative major:
        #this is for each and is NOT OPTIMIZED >:(
        k1 = s1['key']
        k2 = s2['key']
        m1 = s1['mode']
        m2 = s2['mode']
        #broadcast dat b00lean
        k1 += 3*(m1==0)
        k2 += 3*(m2==0)
        #get remainder
        k1,k2 = np.remainder((k1,k2),12)
        #get the position
        circle_of_fifths = {0:0,7:1,2:2,9:3,4:4,11:5,6:6,1:7,8:8,3:9,10:10,5:11,}
        #return similarity
        #https://stackoverflow.com/questions/32243062/numpy-broadcast-through-dictionary
        diff = np.abs(
            circle_of_fifths[k1] - npi.remap(k2, list(circle_of_fifths.keys()), list(circle_of_fifths.values()))
        )
        diff = np.abs((diff>6)*12-diff)
        return 1 - ((diff == 0) + diff - 1)/2.5
    
    def argmin_song(self,songs):
        '''get the next song song which minimizes objective function,
        which itself is a function of uses key_similarity, tempo_similarity,
        and euclidian distance to the vector given by rnn_predict.
        alpha, beta, and gamma are tuning parameters.
        '''
        song = self.playlist.iloc[-1]
        
        alpha = 1 #how much to count distance
        beta = 1 #how much to count key similarity
        gamma = 1 #how much to count tempo similarity
        delta = 1.5 #scaler for RNN vector
        
        distance = cdist([self.rnn_predict()*delta],songs[features[1:]])[0]
        key_similarity = Playlist.key_similarity(song,songs)
        tempo_similarity = Playlist.tempo_similarity(song['tempo'],songs['tempo']).values
        
        
        
        return songs.reset_index().iloc[np.argmin(
            alpha*distance - beta*key_similarity - gamma*tempo_similarity
        )]
    
    def build_playlist(self):
        '''use self.argmin_song() to build the whole playlist'''
        print('Determining Best Song Sequence...')
        for i in tqdm(range(10)):
            songs = self.df[~self.df['id'].isin(self.playlist['id'].to_list())]
            self.playlist = self.playlist.append(self.argmin_song(songs), ignore_index = True)
    
    def show_playlist(self):
        'give the playlist showing (artist,song,id) in a dataframe'
        display(self.playlist[['artists','name','id']])
    
    def do_pca(self):
        'do the nice 3d graph based on global trained PCA'
        print('Visualizing...')
        x,y,z = pca.transform(self.playlist[features[1:]]).T
        fig = go.Figure(data=[go.Scatter3d(
            x=x,y=y,z=z,
            mode='lines+markers',
            text=self.playlist['artists'].apply(lambda x: ', '.join(x)) \
            + ' - ' + self.playlist['name'].astype(str),
            marker=dict(
                size=5,
                color=z,                # set color to an array/list of desired values
                colorscale='Viridis',   # choose a colorscale
                opacity=0.8,
            ),
            line=dict(
            color='#000000',
            width=1
        ) 
        )])
        fig.update_layout(
            margin=dict(l=0, r=0, b=0, t=0),
            scene={
                'xaxis_title':'PC0',
                'yaxis_title':'PC1',
                'zaxis_title':'PC2'
            }
        ),
        fig.show()

In [10]:
Playlist() 

Playlist Name:test delta
Song 1: kaytranada
Song 2: bonobo
Song 3: disclosure
Getting Recommendations...


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


Getting Initial Song Features


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))


Getting Recommended Song Features


HBox(children=(IntProgress(value=0, max=296), HTML(value='')))


Applying Transformations...
Determining Best Song Sequence...


HBox(children=(IntProgress(value=0, max=10), HTML(value='')))




Unnamed: 0,artists,name,id
0,"[KAYTRANADA, Shay Lia]",CHANCES,439X8jGytErRiPnaoUJHju
1,[Bonobo],Linked,3rCtueI7qBN2kZBZnXuk5K
2,"[Khalid, Disclosure]",Talk,0rTV5WefWd1J3OwIheTzxM
3,[Ruel],Painkiller,1abFkY2jm6KDFMZ7RD9YJh
4,"[Moon Boots, Nic Hanson]",Keep The Faith,5d6C3XycqRLFAGLtyO3Rfx
5,"[Rex Orange County, Benny Sings]",Loving Is Easy,5EYi2rH4LYs6M21ZLOyQTx
6,[Queen Naija],Butterflies,4A58lXGBRCg2cumRXTRxHO
7,[Franc Moody],Dance Moves,1DriOnyA5tFc2bUdM6nglI
8,[Elder Island],Black Fur,3Ipgo2twyvBySMwsTzunlH
9,[LEISURE],On My Mind,2MIAO0Bqj54tY3uQ9wP7KG


Visualizing...


<__main__.Playlist at 0x13a3f2358>