In [1]:
# Import Packages
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import time
import sys
import random

In [2]:
# ID and Password for accessing Spotify API
client_id = "xxxxxxxxx"
client_secret = "xxxxxxxxxx"

# Setup the credentials
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

# Make the connection
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [3]:
# Get all spotify playlists
playlists = sp.user_playlists('supriyadi.idn')

# Empty list to hold playlist information
spotify_playlists = []

# Loop to get data for each playlist
while playlists:
    
    for i, playlist in enumerate(playlists['items']):
        names = playlist['name']
        track_count = playlist['tracks']['total']
        ids = playlist['id']
        uri = playlist['uri']
        href = playlist['href']
        public = playlist['public']
        data_aggregation = names, track_count, ids, uri, href, public
        spotify_playlists.append(data_aggregation)
        
    if playlists['next']:
        playlists = sp.next(playlists)
    
    else:
        playlists = None

In [4]:
# Convert list into a dataframe
data = pd.DataFrame(np.array(spotify_playlists).reshape(len(spotify_playlists),6), 
                    columns=['Name', 'No. of Tracks', 'ID', 'URI', 'HREF', 'Public'])
data.head()

Unnamed: 0,Name,No. of Tracks,ID,URI,HREF,Public
0,pop,365,45UoCqtVh2BR1HteUkmRje,spotify:user:supriyadi.idn:playlist:45UoCqtVh2...,https://api.spotify.com/v1/users/supriyadi.idn...,True


In [5]:
# New function to get tracks in playlist
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [6]:
# Subsample of data to pull
Spotify_playlists = data.iloc[0:10]

# Create playlist cache in memory
playlist_tracks = dict()

In [7]:
# Collect audio features per track per playlist
for playlist in Spotify_playlists["ID"]:
    if Spotify_playlists.loc[Spotify_playlists['ID'] == playlist, 'No. of Tracks'].item():
        try:
            playlist_tracks[playlist] = get_playlist_tracks('supriyadi.idn', playlist)
            time.sleep(random.randint(1, 3))
        except:
            pass

In [8]:
songs_playlist = []

for item,playlist in enumerate(playlist_tracks):
    track_len = len(playlist_tracks[playlist])
    for song_item,song in enumerate(playlist_tracks[playlist]):
        songs_playlist.append((playlist,playlist_tracks[playlist][song_item]['track']['id']))
        
print("Number of Songs in Playlists: {}".format(len(songs_playlist)))

Number of Songs in Playlists: 365


In [9]:
# Create audio feature dictionary and set sleeping time thresholds
songs = [item[1] for item in songs_playlist]

audio_feat = dict()
limit_songs_small = 10
limit_songs_medium = 200

In [10]:
# Audio feature extraction - saves information in cache
for item,song in enumerate(songs):
    if song not in audio_feat:
        try:
            audio_feat[song] = sp.audio_features(song)
        except:
            pass

        if item % limit_songs_small == 0:
            time.sleep(random.randint(0, 1))

        if item % limit_songs_medium == 0:
            time.sleep(random.randint(0, 1))

        out = np.floor(item * 1. / len(songs_playlist) * 100)
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

sys.stdout.write("\r%d%%" % 100)

100%

In [13]:
# Convert raw data into dictionaries
acousticness = dict()
danceability = dict()
energy = dict()
loudness = dict()
speechiness = dict()
tempo = dict()
valence = dict()

for item,song in enumerate(audio_feat):
    try:
        acousticness[song] = audio_feat[song][0]['acousticness']
        danceability[song] = audio_feat[song][0]['danceability']
        energy[song] = audio_feat[song][0]['energy']
        loudness[song] = audio_feat[song][0]['loudness']
        speechiness[song] = audio_feat[song][0]['speechiness']
        tempo[song] = audio_feat[song][0]['tempo']
        valence[song] = audio_feat[song][0]['valence']
    except TypeError:
        pass

In [15]:
# Creation of audio feature dataframes from dictionaries
acc_df = pd.DataFrame(pd.Series(acousticness)).reset_index().rename(columns={'index': 'song', 0: 'acousticness'})
dan_df = pd.DataFrame(pd.Series(danceability)).reset_index().rename(columns={'index': 'song', 0: 'dance'})
ene_df = pd.DataFrame(pd.Series(energy)).reset_index().rename(columns={'index': 'song', 0: 'energy'})
loud_df = pd.DataFrame(pd.Series(loudness)).reset_index().rename(columns={'index': 'song', 0: 'loudness'})
spee_df = pd.DataFrame(pd.Series(speechiness)).reset_index().rename(columns={'index': 'song', 0: 'speech'})
temp_df = pd.DataFrame(pd.Series(tempo)).reset_index().rename(columns={'index': 'song', 0: 'tempo'})
vale_df = pd.DataFrame(pd.Series(valence)).reset_index().rename(columns={'index': 'song', 0: 'valence'})

In [20]:
# Merge individual dataframes into one features dataframe
playlist_df = pd.DataFrame(songs_playlist,columns=['playlist','song'])

frame_V1 = [acc_df,dan_df,ene_df,loud_df,spee_df,temp_df,vale_df]
features = pd.concat(frame_V1,axis=1).T.groupby(level=0).first().T

frame_V2 = [features,playlist_df]
features_df = pd.concat(frame_V2,axis=1).T.groupby(level=0).first().T.dropna()

features_df.head()

Unnamed: 0,acousticness,dance,energy,loudness,playlist,song,speech,tempo,valence
0,0.694,0.585,0.303,-10.058,45UoCqtVh2BR1HteUkmRje,05pKAafT85jeeNhZ6kq7HT,0.0398,136.703,0.142
1,0.142,0.773,0.525,-11.398,45UoCqtVh2BR1HteUkmRje,09pYzzyrc6MsJXuG3K1ruC,0.0385,119.024,0.616
2,0.0824,0.472,0.373,-9.54,45UoCqtVh2BR1HteUkmRje,09xf9RNDZweJOgQmPO34ey,0.028,149.804,0.337
3,0.697,0.547,0.39,-5.946,45UoCqtVh2BR1HteUkmRje,0AZU3CiFFyt5pO5BacwNQV,0.0271,128.728,0.477
4,0.544,0.643,0.569,-7.459,45UoCqtVh2BR1HteUkmRje,0B5KeB25moPkcQUnbDvj3t,0.0276,120.507,0.275


In [42]:
# Save as csv file
features_df.to_csv('GetFeatures.csv', sep=',')