### Data Collection

The Spotipy library was used for data collection (https://spotipy.readthedocs.io/en/2.16.1/). No matter how many songs are contained in a Spotify playlist, the method $user\_playlist\_tracks()$ only returns information on the first 100 songs&mdash;as is the case with $audio\_features()$. We currently have audio features on 300 songs from three playlists:

1. All Out 90s (100/150 songs)
2. All Out 00s (100/100 songs)
3. All Out 10s (100/150 songs)

In [1]:
import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

import config
from twython import Twython, TwythonError

api_keys = Twython(config.client_id, config.client_secret)
credentials = SpotifyClientCredentials(client_id = config.client_id,
                                       client_secret = config.client_secret)
spotify = spotipy.Spotify(client_credentials_manager = credentials)

In [2]:
def get_track_ids(creator, playlist_id):
    df = pd.DataFrame(columns = ["artist", "album", "track_name", "track_id"])
    playlist = spotify.user_playlist_tracks(creator, playlist_id)["items"]
    
    for i in playlist:
        relevant_info = {}
        relevant_info["artist"] = i["track"]["album"]["artists"][0]["name"]
        relevant_info["album"] = i["track"]["album"]["name"]
        relevant_info["track_name"] = i["track"]["name"]
        relevant_info["track_id"] = i["track"]["id"]
        df = df.append(relevant_info, ignore_index = True)
        
    return df

link_all_out_90s = "https://open.spotify.com/playlist/37i9dQZF1DXbTxeAdrVG2l"
link_all_out_00s = "https://open.spotify.com/playlist/37i9dQZF1DX4o1oenSJRJd"
link_all_out_10s = "https://open.spotify.com/playlist/37i9dQZF1DX5Ejj0EkURtP"
links = [link_all_out_90s, 
         link_all_out_00s, 
         link_all_out_10s]

playlist_ids = [i[-22:] for i in links]
names = ["all_out_90s", "all_out_00s", "all_out_10s"]
dfs = {} #key: playlist, value: artists/albums/songs/track ids
for i, j in zip(playlist_ids, names):
    dfs[j] = get_track_ids("spotify", i)

In [3]:
audio_features = {} #key: track id, value: audio features
for i in dfs.values():
    for j in i["track_id"]:
        audio_features[j] = spotify.audio_features(j)
        
names = ["danceability", "energy", "key", "loudness", "mode", "speechiness", "acousticness", "liveness",
         "valence", "tempo", "type", "id", "uri", "track_href", "analysis_url", "duration_ms", "time_signature"]
audio_features_as_df = pd.DataFrame()

for i in audio_features.keys(): #the keys are dictionaries contained in lists
    audio_features_as_df = audio_features_as_df.append(audio_features[i][0], ignore_index = True)

In [4]:
irrelevant_info = ["track_id", "id", "analysis_url", "track_href", "type", "uri"]
data = pd.concat([pd.concat(dfs, ignore_index = True), #artists/albums/songs/track ids from all playlists
                  audio_features_as_df], #audio features
                 axis = "columns")
data = data.drop(irrelevant_info, axis = "columns")
data

Unnamed: 0,artist,album,track_name,acousticness,danceability,duration_ms,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,Wet Wet Wet,End Of Part One (Their Greatest Hits),"Love Is All Around - From ""Four Weddings And A...",0.256000,0.428,237893.0,0.542,0.000000,10.0,0.169,-12.381,1.0,0.0304,169.896,4.0,0.566
1,Natalie Imbruglia,Glorious: The Singles 97-07,Torn,0.074600,0.558,244813.0,0.928,0.000054,5.0,0.696,-3.040,1.0,0.0356,96.313,4.0,0.600
2,4 Non Blondes,"Bigger, Better, Faster, More !",What's Up?,0.156000,0.566,295493.0,0.570,0.000000,11.0,0.118,-9.875,0.0,0.0285,134.537,4.0,0.491
3,Roxette,It Must Have Been Love,"It Must Have Been Love - From the Film ""Pretty...",0.340000,0.520,258787.0,0.652,0.000055,5.0,0.256,-6.655,1.0,0.0274,80.609,4.0,0.722
4,Oasis,Stop The Clocks,Wonderwall,0.000807,0.409,258613.0,0.892,0.000000,2.0,0.207,-4.373,1.0,0.0336,174.426,4.0,0.651
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,Ava Max,Sweet but Psycho,Sweet but Psycho,0.011800,0.513,210580.0,0.768,0.000019,4.0,0.294,-4.868,0.0,0.0587,84.264,4.0,0.235
296,Bruno Mars,Doo-Wops & Hooligans,Grenade,0.414000,0.668,181933.0,0.613,0.000000,4.0,0.102,-4.225,1.0,0.1020,99.852,4.0,0.498
297,Joel Adams,Please Don't Go,Please Don't Go,0.303000,0.377,290900.0,0.595,0.000000,3.0,0.095,-5.936,1.0,0.0489,144.464,4.0,0.264
298,Hailee Steinfeld,HAIZ,Starving,,,,,,,,,,,,,
