# Import Libraries

In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Spotify API Configuration with Spotipy

In [60]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import cred

scope='user-library-read playlist-modify-public playlist-read-private playlist-modify-private user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=cred.client_ID, 
    client_secret=cred.client_SECRET, 
    redirect_uri=cred.redirect_url, 
    scope=scope))

[None]


In [64]:
audio_features = sp.audio_features(['11dFghVXANMlKmJXsNCbNl'])
print(audio_features)

[{'danceability': 0.696, 'energy': 0.905, 'key': 2, 'loudness': -2.743, 'mode': 1, 'speechiness': 0.103, 'acousticness': 0.011, 'instrumentalness': 0.000905, 'liveness': 0.302, 'valence': 0.625, 'tempo': 114.944, 'type': 'audio_features', 'id': '11dFghVXANMlKmJXsNCbNl', 'uri': 'spotify:track:11dFghVXANMlKmJXsNCbNl', 'track_href': 'https://api.spotify.com/v1/tracks/11dFghVXANMlKmJXsNCbNl', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/11dFghVXANMlKmJXsNCbNl', 'duration_ms': 207960, 'time_signature': 4}]


In [68]:
def analyze_playlist(creator, playlist_id):
    # Create empty dataframe
    playlist_features_list = ["artist","album","track_name", "track_id","danceability","energy","key","loudness","mode", "speechiness","acousticness","instrumentalness","liveness","valence","tempo", "duration_ms","time_signature"]
    
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Create empty dict
        playlist_features = {}
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        
    return playlist_df

### First-time Run

In [256]:
vibes_to_reset = analyze_playlist('Matthew Lim','30cdY3LXb1nZBdTY05wEXj')
vibes_to_reset.to_csv("vibes_to_reset.csv", index = False)
vibes_to_reset.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Mac Ayres,Drive Slow,Easy,58dSdjfEYNSxte1aNVxuNf,0.722,0.282,5,-7.484,1,0.106,0.373,0.00557,0.147,0.217,91.367,314358,4
1,Daniel Caesar,Freudian,Best Part (feat. H.E.R.),1RMJOxR6GRPsBHL8qeC2ux,0.524,0.364,7,-10.209,1,0.0367,0.808,0.0,0.1,0.436,75.24,209831,4
2,Bruno Major,A Song For Every Moon,Easily,2k9N4caeCIJLOWwWwssrEM,0.772,0.256,7,-8.545,0,0.0481,0.491,0.00612,0.144,0.357,118.902,210240,3
3,keshi,skeletons,skeletons,2BuJrxYKhYky20dQqTlobO,0.719,0.265,2,-12.782,1,0.0754,0.296,0.0865,0.108,0.273,80.021,152480,4
4,Kendrick Lamar,DAMN.,LOVE. FEAT. ZACARI.,6PGoSes0D9eUDeeAafB2As,0.8,0.585,10,-7.343,1,0.0924,0.264,0.0,0.153,0.779,126.058,213400,4


### Existing CSV Run

In [555]:
vibes_to_reset = pd.read_csv('data/vibes_to_reset.csv')
vibes_to_reset.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Mac Ayres,Drive Slow,Easy,58dSdjfEYNSxte1aNVxuNf,0.722,0.282,5,-7.484,1,0.106,0.373,0.00557,0.147,0.217,91.367,314358,4
1,Daniel Caesar,Freudian,Best Part (feat. H.E.R.),1RMJOxR6GRPsBHL8qeC2ux,0.524,0.364,7,-10.209,1,0.0367,0.808,0.0,0.1,0.436,75.24,209831,4
2,Bruno Major,A Song For Every Moon,Easily,2k9N4caeCIJLOWwWwssrEM,0.772,0.256,7,-8.545,0,0.0481,0.491,0.00612,0.144,0.357,118.902,210240,3
3,keshi,skeletons,skeletons,2BuJrxYKhYky20dQqTlobO,0.719,0.265,2,-12.782,1,0.0754,0.296,0.0865,0.108,0.273,80.021,152480,4
4,Kendrick Lamar,DAMN.,LOVE. FEAT. ZACARI.,6PGoSes0D9eUDeeAafB2As,0.8,0.585,10,-7.343,1,0.0924,0.264,0.0,0.153,0.779,126.058,213400,4


In [556]:
def ohe_prep(df, column, new_name): 
    ''' 
    Create One Hot Encoded features of a specific column
    ---
    Input: 
    df (pandas dataframe): Spotify Dataframe
    column (str): Column to be processed
    new_name (str): new column name to be used
        
    Output: 
    tf_df: One-hot encoded features 
    '''
    
    tf_df = pd.get_dummies(df[column])
    feature_names = tf_df.columns
    tf_df.columns = [new_name + "|" + str(i) for i in feature_names]
    tf_df.reset_index(drop = True, inplace = True)    
    return tf_df

In [557]:
from sklearn.preprocessing import MinMaxScaler

def generate_feature_set(df):
    # One-hot Encodingfor 'Key' and 'Mode' Column
    key_ohe = ohe_prep(df, 'key','key') * 0.5
    mode_ohe = ohe_prep(df, 'mode','mode') * 0.5

    # Scale Other Audio Columns
    floats_features = ["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness","valence","tempo", "duration_ms","time_signature"]
    floats = df[floats_features].reset_index(drop=True)
    scaler = MinMaxScaler()
    floats_scaled = pd.DataFrame(scaler.fit_transform(floats),columns=floats.columns)*0.2
    
    #final = pd.concat([floats_scaled,key_ohe,mode_ohe],axis=1)
    df.drop(['artist','album','track_name','track_id','key','mode'],axis=1,inplace=True)
    final = pd.concat([df,key_ohe,mode_ohe],axis=1)
    return final

In [558]:
vibes_to_reset = generate_feature_set(vibes_to_reset).to_numpy()

In [559]:
def generate_playlist_vector(df):
    """
    Generates finalized playlist vector by taking the sum of each song's normalized features
    """
    return df.sum(axis=0)

In [560]:
vibes_to_reset = generate_playlist_vector(vibes_to_reset)

In [567]:
from sklearn.metrics.pairwise import cosine_similarity
def calc_similarity(playlist_vector, song_id):
    """
    Calculates a similarity metric via cosine similarity between a playlist and a song
    """
    key_features = ['danceability', 'energy', 'loudness', 'speechiness', 'acousticness',
       'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms',
       'time_signature', 'key|0', 'key|1', 'key|2', 'key|3', 'key|4', 'key|5',
       'key|6', 'key|7', 'key|8', 'key|9', 'key|10', 'key|11', 'mode|0',
       'mode|1']

    song_vector = sp.audio_features(song_id)
    song_vector = pd.DataFrame(song_vector) w
    song_vector.drop(['type','id','uri','track_href','analysis_url'],axis=1,inplace=True)
    key_ohe = ohe_prep(song_vector, 'key','key') * 0.5
    mode_ohe = ohe_prep(song_vector, 'mode','mode') * 0.5
    song_vector = pd.concat([song_vector,key_ohe,mode_ohe],axis=1)
    song_vector = pd.DataFrame(song_vector,columns=key_features)
    song_vector = song_vector.fillna(0).to_numpy().reshape(1,-1)

    return cosine_similarity(playlist_vector.reshape(1,-1),song_vector)
calc_similarity(vibes_to_reset,'1ORJS3w16Z3bwNMzZpfULO')

#calc_similarity(vibes_to_reset,'0DiWol3AO6WpXZgp0goxAV')

array([[1.]])