### Data Collection

The Spotipy library was used for data collection (https://spotipy.readthedocs.io/en/2.16.1/). No matter how many songs are contained in a Spotify playlist, the method $user\_playlist\_tracks()$ only returns information on the first 100 songs&mdash;as is the case with $audio\_features()$. We currently have audio features on 300 songs from three playlists:

1. All Out 90s (100/150 songs)
2. All Out 00s (100/100 songs)
3. All Out 10s (100/150 songs)

In [1]:
import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import config
from twython import Twython, TwythonError

api_keys = Twython(config.client_id, config.client_secret)
credentials = SpotifyClientCredentials(client_id = config.client_id,
                                       client_secret = config.client_secret)
spotify = spotipy.Spotify(client_credentials_manager = credentials)

In [2]:
def get_track_ids(creator, playlist_id):
    df = pd.DataFrame(columns = ["artist", "album", "track_name", "track_id"])
    playlist = spotify.user_playlist_tracks(creator, playlist_id)["items"]
    
    for i in playlist:
        relevant_info = {}
        relevant_info["artist"] = i["track"]["album"]["artists"][0]["name"]
        relevant_info["album"] = i["track"]["album"]["name"]
        relevant_info["track_name"] = i["track"]["name"]
        relevant_info["track_id"] = i["track"]["id"]
        df = df.append(relevant_info, ignore_index = True)
        
    return df

link_all_out_90s = "https://open.spotify.com/playlist/37i9dQZF1DXbTxeAdrVG2l"
link_all_out_00s = "https://open.spotify.com/playlist/37i9dQZF1DX4o1oenSJRJd"
link_all_out_10s = "https://open.spotify.com/playlist/37i9dQZF1DX5Ejj0EkURtP"
links = [link_all_out_90s, 
         link_all_out_00s, 
         link_all_out_10s]

playlist_ids = [i[-22:] for i in links]
names = ["all_out_90s", "all_out_00s", "all_out_10s"]
dfs = {} #key: playlist, value: artists/albums/songs/track ids
for i, j in zip(playlist_ids, names):
    dfs[j] = get_track_ids("spotify", i)

In [3]:
audio_features = {} #key: track id, value: audio features
for i in dfs.values():
    for j in i["track_id"]:
        audio_features[j] = spotify.audio_features(j)
        
names = ["danceability", "energy", "key", "loudness", "mode", "speechiness", "acousticness", "liveness",
         "valence", "tempo", "type", "id", "uri", "track_href", "analysis_url", "duration_ms", "time_signature"]
audio_features_as_df = pd.DataFrame()

for i in audio_features.keys(): #the keys are dictionaries contained in lists
    audio_features_as_df = audio_features_as_df.append(audio_features[i][0], ignore_index = True)

In [4]:
irrelevant_info = ["track_id", "id", "analysis_url", "track_href", "type", "uri"]
data = pd.concat([pd.concat(dfs, ignore_index = True), #artists/albums/songs/track ids from all playlists
                  audio_features_as_df], #audio features
                 axis = "columns")
data = data.drop(irrelevant_info, axis = "columns")
data

Unnamed: 0,artist,album,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Backstreet Boys,The Hits--Chapter One,I Want It That Way,0.216000,0.689,213600.0,0.702,0.000000,6.0,0.159,-5.642,0.0,0.0261,99.031,4.0,0.484
1,Natalie Imbruglia,Glorious: The Singles 97-07,Torn,0.074600,0.559,244813.0,0.928,0.000056,5.0,0.696,-3.040,1.0,0.0355,96.315,4.0,0.599
2,Cher,It's a Man's World,Walking in Memphis,0.587000,0.696,235493.0,0.679,0.000000,2.0,0.165,-7.363,1.0,0.0305,128.982,4.0,0.527
3,Roxette,It Must Have Been Love,"It Must Have Been Love - From the Film ""Pretty...",0.340000,0.520,258787.0,0.652,0.000055,5.0,0.256,-6.655,1.0,0.0274,80.609,4.0,0.722
4,Oasis,Stop The Clocks,Wonderwall,0.000807,0.409,258613.0,0.892,0.000000,2.0,0.207,-4.373,1.0,0.0336,174.426,4.0,0.651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,P!nk,The Truth About Love,Just Give Me a Reason,0.346000,0.778,242733.0,0.547,0.000302,2.0,0.132,-7.273,1.0,0.0489,95.002,4.0,0.441
296,Marshmello,FRIENDS,FRIENDS,0.205000,0.626,202621.0,0.880,0.000000,9.0,0.128,-2.384,0.0,0.0504,95.079,4.0,0.534
297,Justin Bieber,Sorry,Sorry,0.076700,0.665,200787.0,0.759,0.000000,0.0,0.313,-3.694,0.0,0.0399,99.992,4.0,0.389
298,Martin Garrix,In the Name of Love,In the Name of Love,0.109000,0.501,195707.0,0.519,0.000000,4.0,0.454,-5.880,0.0,0.0409,133.990,4.0,0.168
