In [1]:
#Import Packages
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import time
import sys
import random

In [3]:
# accessing Spotify API
client_id = "xxxxxxx"
client_secret = "xxxxxxx"
# Setup the credentials
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)

# Make the connection
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
# Get playlist
playlists = sp.user_playlists('supriyadi.idn')

# Empty list to hold playlist information
spotify_playlists = []

# Loop to get data for each playlist
while playlists:
    
    for i, playlist in enumerate(playlists['items']):
        names = playlist['name']
        track_count = playlist['tracks']['total']
        ids = playlist['id']
        uri = playlist['uri']
        href = playlist['href']
        public = playlist['public']
        data_aggregation = names, track_count, ids, uri, href, public
        spotify_playlists.append(data_aggregation)
        
    if playlists['next']:
        playlists = sp.next(playlists)
    
    else:
        playlists = None

In [5]:
# Convert list into a dataframe
data = pd.DataFrame(np.array(spotify_playlists).reshape(len(spotify_playlists),6), 
                    columns=['Name', 'No. of Tracks', 'ID', 'URI', 'HREF', 'Public'])
data.head()

Unnamed: 0,Name,No. of Tracks,ID,URI,HREF,Public
0,pop,365,45UoCqtVh2BR1HteUkmRje,spotify:user:supriyadi.idn:playlist:45UoCqtVh2...,https://api.spotify.com/v1/users/supriyadi.idn...,True


In [6]:
# New function to get tracks in playlist
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [7]:
# Subsample of data to pull
Spotify_playlists = data.iloc[0:10]

# Create playlist cache in memory
playlist_tracks = dict()

In [8]:
# Collect audio features per track per playlist
for playlist in Spotify_playlists["ID"]:
    if Spotify_playlists.loc[Spotify_playlists['ID'] == playlist, 'No. of Tracks'].item():
        try:
            playlist_tracks[playlist] = get_playlist_tracks('supriyadi.idn', playlist)
            time.sleep(random.randint(1, 3))
        except:
            pass

In [9]:
songs_playlist = []

for item,playlist in enumerate(playlist_tracks):
    track_len = len(playlist_tracks[playlist])
    for song_item,song in enumerate(playlist_tracks[playlist]):
        songs_playlist.append((playlist,playlist_tracks[playlist][song_item]['track']['id']))
        
print("Number of Songs in Playlists: {}".format(len(songs_playlist)))

Number of Songs in Playlists: 365


In [10]:
# Create audio feature dictionary and set sleeping time thresholds
songs = [item[1] for item in songs_playlist]

audio_feat = dict()
limit_songs_small = 10
limit_songs_medium = 200

In [11]:
# Audio feature extraction - saves information in cache
for item,song in enumerate(songs):
    if song not in audio_feat:
        try:
            audio_feat[song] = sp.audio_features(song)
        except:
            pass

        if item % limit_songs_small == 0:
            time.sleep(random.randint(0, 1))

        if item % limit_songs_medium == 0:
            time.sleep(random.randint(0, 1))

        out = np.floor(item * 1. / len(songs_playlist) * 100)
        sys.stdout.write("\r%d%%" % out)
        sys.stdout.flush()

sys.stdout.write("\r%d%%" % 100)

100%

In [15]:
# Convert raw data into dictionaries
acousticness = dict()
danceability = dict()
duration_ms = dict()
energy = dict()
instrumentalness = dict()
key = dict()
liveness = dict()
loudness = dict()
mode = dict()
speechiness = dict()
tempo = dict()
time_signature = dict()
valence = dict()

for item,song in enumerate(audio_feat):
    try:
        acousticness[song] = audio_feat[song][0]['acousticness']
        danceability[song] = audio_feat[song][0]['danceability']
        duration_ms[song] = audio_feat[song][0]['duration_ms']
        energy[song] = audio_feat[song][0]['energy']
        instrumentalness[song] = audio_feat[song][0]['instrumentalness']
        key[song] = audio_feat[song][0]['key']
        liveness[song] = audio_feat[song][0]['liveness']
        loudness[song] = audio_feat[song][0]['loudness']
        mode[song] = audio_feat[song][0]['mode']
        speechiness[song] = audio_feat[song][0]['speechiness']
        tempo[song] = audio_feat[song][0]['tempo']
        time_signature[song] = audio_feat[song][0]['time_signature']
        valence[song] = audio_feat[song][0]['valence']
    except TypeError:
        pass

In [16]:
# Creation of audio feature dataframes from dictionaries
acc_df = pd.DataFrame(pd.Series(acousticness)).reset_index().rename(columns={'index': 'song', 0: 'acousticness'})
dan_df = pd.DataFrame(pd.Series(danceability)).reset_index().rename(columns={'index': 'song', 0: 'dance'})
dur_df = pd.DataFrame(pd.Series(duration_ms)).reset_index().rename(columns={'index': 'song', 0: 'duration'})
ene_df = pd.DataFrame(pd.Series(energy)).reset_index().rename(columns={'index': 'song', 0: 'energy'})
inst_df = pd.DataFrame(pd.Series(instrumentalness)).reset_index().rename(columns={'index': 'song', 0: 'instrumentalness'})
key_df = pd.DataFrame(pd.Series(key)).reset_index().rename(columns={'index': 'song', 0: 'key'})
live_df = pd.DataFrame(pd.Series(liveness)).reset_index().rename(columns={'index': 'song', 0: 'liveness'})
loud_df = pd.DataFrame(pd.Series(loudness)).reset_index().rename(columns={'index': 'song', 0: 'loudness'})
mode_df = pd.DataFrame(pd.Series(mode)).reset_index().rename(columns={'index': 'song', 0: 'mode'})
spee_df = pd.DataFrame(pd.Series(speechiness)).reset_index().rename(columns={'index': 'song', 0: 'speech'})
temp_df = pd.DataFrame(pd.Series(tempo)).reset_index().rename(columns={'index': 'song', 0: 'tempo'})
time_df = pd.DataFrame(pd.Series(time_signature)).reset_index().rename(columns={'index': 'song', 0: 'time'})
vale_df = pd.DataFrame(pd.Series(valence)).reset_index().rename(columns={'index': 'song', 0: 'valence'})

In [18]:
# Merge individual dataframes into one features dataframe
playlist_df = pd.DataFrame(songs_playlist,columns=['playlist','song'])

frame_V1 = [acc_df,dan_df,dur_df,ene_df,inst_df,key_df,live_df,loud_df,mode_df,spee_df,temp_df,time_df,vale_df]
features = pd.concat(frame_V1,axis=1).T.groupby(level=0).first().T

frame_V2 = [features,playlist_df]
features_df = pd.concat(frame_V2,axis=1).T.groupby(level=0).first().T.dropna()

features_df.tail()

Unnamed: 0,acousticness,dance,duration,energy,instrumentalness,key,liveness,loudness,mode,playlist,song,speech,tempo,time,valence
350,0.853,0.49,239466,0.335,1.77e-05,3,0.1,-9.048,1,45UoCqtVh2BR1HteUkmRje,7vZTvo2aJ6wHKCGjwjdWIf,0.0347,173.493,4,0.519
351,0.0653,0.421,202533,0.873,0.0,10,0.123,-4.343,1,45UoCqtVh2BR1HteUkmRje,7wMq5n8mYSKlQIGECKUgTX,0.0565,84.803,4,0.63
352,0.793,0.554,282040,0.266,0.0,11,0.11,-9.435,0,45UoCqtVh2BR1HteUkmRje,7wPlxhTeGIT9tm0pk03vwq,0.0319,103.902,4,0.302
353,0.0122,0.439,168000,0.891,0.0,0,0.547,-4.764,1,45UoCqtVh2BR1HteUkmRje,7yCPwWs66K8Ba5lFuU2bcx,0.057,148.599,4,0.695
354,0.89,0.585,187614,0.496,0.00282,8,0.122,-9.769,1,45UoCqtVh2BR1HteUkmRje,7zDGRvUdsgVWBfsYp6YffE,0.0257,98.548,4,0.242


In [None]:
# Save as csv file
features_df.to_csv('GetFeatures.csv', sep=',')