In [1]:
import os
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format ='retina'
import random
from functools import reduce
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy import oauth2

In [2]:
# Insert your Spotify username and the credentials that you obtained from spotify developer
cid = 'XXXXXXX'
secret = 'XXXXXXXX'
redirect_uri='http://localhost:7777/callback'
username = 'XXXXXXXXX'

In [3]:
# Once the Authorisation is complete, we just need to `sp` to call the APIs
scope = 'user-top-read'
token = util.prompt_for_user_token(username, scope, client_id=cid, client_secret=secret, redirect_uri=redirect_uri)

if token:
    sp = spotipy.Spotify(auth=token)
else:
    print("Can't get token for", username)

In [4]:
# Getting top 1000 tracks from user
results = sp.current_user_top_tracks(limit=1000, offset=0,time_range='short_term')

In [5]:
# Convert it to Dataframe
track_name = []
track_id = []
artist = []
album = []
duration = []
popularity = []
for i, items in enumerate(results['items']):
        track_name.append(items['name'])
        track_id.append(items['id'])
        artist.append(items["artists"][0]["name"])
        duration.append(items["duration_ms"])
        album.append(items["album"]["name"])
        popularity.append(items["popularity"])

# Create the final df   
df_favourite = pd.DataFrame({ "track_name": track_name, 
                             "album": album, 
                             "track_id": track_id,
                             "artist": artist, 
                             "duration": duration, 
                             "popularity": popularity})

df_favourite.head()

Unnamed: 0,track_name,album,track_id,artist,duration,popularity
0,Deep End Freestyle,Sleepy Hallow Presents: Sleepy For President,2AlYncTpVHKwHb55F9lF6O,Sleepy Hallow,115200,75
1,Glory Boy,Freewave 3,5LpnrXjrt0BOU0iOGH78UN,LUCKI,111048,40
2,Place,Whole Lotta Red,1Bg2CNZw6S4e9cGWPmi0uI,Playboi Carti,117239,69
3,Feels Like Death,Feels Like Death,03vMyCyCK7pVWjC1i1zur0,Levi Carter,237505,44
4,King Vamp,Whole Lotta Red,2iqHcRoOfLl1fXCf1bGO0J,Playboi Carti,186776,63


In [6]:
# Getting features for each song
def fetch_audio_features(sp, df):
    playlist = df[['track_id','track_name']] 
    index = 0
    audio_features = []
    
    # Make the API request
    while index < playlist.shape[0]:
        audio_features += sp.audio_features(playlist.iloc[index:index + 50, 0])
        index += 50
    
    # Create an empty list to feed in different charactieritcs of the tracks
    features_list = []
    #Create keys-values of empty lists inside nested dictionary for album
    for features in audio_features:
        features_list.append([features['danceability'],
                              features['acousticness'],
                              features['energy'], 
                              features['tempo'],
                              features['instrumentalness'], 
                              features['loudness'],
                              features['liveness'],
                              features['duration_ms'],
                              features['key'],
                              features['valence'],
                              features['speechiness'],
                              features['mode']
                             ])
    
    df_audio_features = pd.DataFrame(features_list, columns=['danceability', 'acousticness', 'energy','tempo', 
                                                             'instrumentalness', 'loudness', 'liveness','duration_ms', 'key',
                                                             'valence', 'speechiness', 'mode'])
    
    # Create the final df, using the 'track_id' as index for future reference
    df_playlist_audio_features = pd.concat([playlist, df_audio_features], axis=1)
    df_playlist_audio_features.set_index('track_name', inplace=True, drop=True)
    return df_playlist_audio_features

In [7]:
df_fav = fetch_audio_features(sp, df_favourite)
df_fav.head()

Unnamed: 0_level_0,track_id,danceability,acousticness,energy,tempo,instrumentalness,loudness,liveness,duration_ms,key,valence,speechiness,mode
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Deep End Freestyle,2AlYncTpVHKwHb55F9lF6O,0.868,0.703,0.479,133.928,0.0,-6.687,0.102,115200,4,0.776,0.445,0
Glory Boy,5LpnrXjrt0BOU0iOGH78UN,0.784,0.0966,0.519,129.953,0.0109,-9.868,0.0748,111048,1,0.599,0.096,1
Place,1Bg2CNZw6S4e9cGWPmi0uI,0.891,0.000674,0.628,131.06,0.0,-6.35,0.119,117240,1,0.04,0.105,1
Feels Like Death,03vMyCyCK7pVWjC1i1zur0,0.755,0.337,0.626,110.088,0.0,-6.247,0.137,237505,6,0.465,0.293,1
King Vamp,2iqHcRoOfLl1fXCf1bGO0J,0.691,0.0117,0.756,144.188,0.0,-5.638,0.257,186776,8,0.396,0.352,0


In [8]:
# Function to create a df of featured playlist
def featured_playlists(sp):
    id = []
    name = []
    num_tracks = []
 # For looping through the API request  
    response = sp.featured_playlists(limit = 50)
    playlists = response['playlists']
    for i, items in enumerate(playlists['items']):
        id.append(items['id'])
        name.append(items['name'])
        num_tracks.append(items['tracks']['total'])

# Create the final df   
    df_playlists = pd.DataFrame({"id":id, "name": name, "#tracks": num_tracks})
    return df_playlists

In [9]:
df_playlists = featured_playlists(sp)
df_playlists

Unnamed: 0,id,name,#tracks
0,37i9dQZF1DXcBWIGoYBM5M,Today's Top Hits,50
1,37i9dQZF1DWYBO1MoTDhZI,Good Vibes,133
2,37i9dQZF1DXbYM3nMM0oPk,Mega Hit Mix,75
3,37i9dQZF1DX0MLFaUdXnjA,Chill Pop,109
4,37i9dQZF1DWTcqUzwhNmKv,Kickass Metal,60
5,37i9dQZF1DX6GwdWRQMQpq,Feelin' Myself,50
6,37i9dQZF1DWXRvPx3nttRN,Feel Good Acoustic,150
7,37i9dQZF1DX4dyzvuaRJ0n,mint,88
8,37i9dQZF1DXaz7CcPuNVXF,No Stress!,20
9,37i9dQZF1DXdPec7aLTmlC,Happy Hits!,100


In [10]:
# Getting the tracks in each playlist
def fetch_playlist_tracks(sp, playlistsid): 
    offset = 0
    tracks = []
    # Make the API request
    while True:
            content = sp.playlist_tracks( playlistsid, fields=None, limit=100, offset=offset, market=None)
            tracks += content['items']
        
            if content['next'] is not None:
                offset += 100
            else:
                break
    
    track_id = []
    track_name = []
    
    for track in tracks:
        track_id.append(track['track']['id'])
        track_name.append(track['track']['name'])
    
# Create the final df
    df_playlists_tracks = pd.DataFrame({"track_id":track_id, "track_name": track_name})
    return df_playlists_tracks

In [11]:
fetch_playlist_tracks(sp,'37i9dQZF1DXaz7CcPuNVXF').head()

Unnamed: 0,track_id,track_name
0,7vFv0yFGMJW3qVXbAd9BK9,Your Body Is a Wonderland
1,24YvUQnuPL8gObCfSnAobH,"Hey, Soul Sister"
2,3rKYiySCDMUKTw5kGVVhaa,Marvin Gaye (feat. Meghan Trainor)
3,0ULAOsJR33XQzlZVZvjGpB,Don't Dream It's Over
4,0IktbUcnAGrvD03AWnz3Q8,Lucky


In [12]:
# Function to fetch the audio features from the songs of given playlist
def fetch_audio_features(sp, playlist_id):
    playlist = fetch_playlist_tracks(sp, playlist_id)
    index = 0
    audio_features = []
    
    # Make the API request
    while index < playlist.shape[0]:
        audio_features += sp.audio_features(playlist.iloc[index:index + 50, 0])
        index += 50
    
    # Create an empty list to feed in different charactieritcs of the tracks
    features_list = []
    #Create keys-values of empty lists inside nested dictionary for album
    for features in audio_features:
        features_list.append([features['danceability'],
                              features['acousticness'],
                              features['energy'], 
                              features['tempo'],
                              features['instrumentalness'], 
                              features['loudness'],
                              features['liveness'],
                              features['duration_ms'],
                              features['key'],
                              features['valence'],
                              features['speechiness']
                             ])
    
    df_audio_features = pd.DataFrame(features_list, columns=['danceability', 'acousticness', 'energy','tempo', 
                                                             'instrumentalness', 'loudness', 'liveness', 'duration_ms', 'key',
                                                             'valence', 'speechiness'])
    
    # Create the final df, using the 'track_id' as index for future reference
    df_playlist_audio_features = pd.concat([playlist, df_audio_features], axis=1)
    df_playlist_audio_features.set_index('track_name', inplace=True, drop=True)
    return df_playlist_audio_features


In [13]:
# Build the dtaframe froms the playlists
for i, playlist in enumerate(df_playlists['id']):
    try:
        string_command = "df_{} = fetch_audio_features(sp, playlist)".format(playlist)
        print("Create {}".format(string_command))
        exec(string_command)
    except:
        print("playlist with id {} is not valid, skiping ".format(playlist))
        pass


Create df_37i9dQZF1DXcBWIGoYBM5M = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DWYBO1MoTDhZI = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DXbYM3nMM0oPk = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DX0MLFaUdXnjA = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DWTcqUzwhNmKv = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DX6GwdWRQMQpq = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DWXRvPx3nttRN = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DX4dyzvuaRJ0n = fetch_audio_features(sp, playlist)
playlist with id 37i9dQZF1DX4dyzvuaRJ0n is not valid, skiping 
Create df_37i9dQZF1DXaz7CcPuNVXF = fetch_audio_features(sp, playlist)
Create df_37i9dQZF1DXdPec7aLTmlC = fetch_audio_features(sp, playlist)


In [14]:
# To understand the playlist, it would be useful to get the average value for each audio feature
def fetch_audio_features_mean(sp, playlist_id):
    Playlist = fetch_audio_features(sp, playlist_id)
    return pd.DataFrame(Playlist.mean(), columns= [playlist_id])

In [15]:
df = fetch_audio_features_mean(sp, '37i9dQZF1DX0MLFaUdXnjA')
df.head()

Unnamed: 0,37i9dQZF1DX0MLFaUdXnjA
danceability,0.605716
acousticness,0.459792
energy,0.465119
tempo,117.807
instrumentalness,0.005687


In [16]:
# Merge all playlists together to get a large df of tracks
dataframes = []
# Loop through the filenames to populate dataframes with different dataframes 
for i in df_playlists['id']:
    try:
        dataframes.append(fetch_audio_features_mean(sp, i))
    except:
        pass