# Spotify Five Year Analysis Project: <br />All Album Track Table Creation Notebook

## Imports and Setup

### Imports

In [1]:
import json
import time
from tqdm import tqdm
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import creds

### Set display options

In [2]:
pd.set_option('display.max_columns', 500)

### Spotify Credentials

#### Load credentials

Loads the creds.py file, containing the following two lines for variables client_id and secret, which is gitignored for sharing. 

client_id = 'Your Client ID Here'<br />
secret = 'Your secret here'

In [3]:
%run -i 'creds.py'

#### Set credentials

In [4]:
client_credentials_manager = SpotifyClientCredentials(client_id=client_id,client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

### Album List Creation

#### Import master chart table

In [5]:
master_chart_table = pd.read_csv('../data/mastercharttable.csv')
master_chart_table

Unnamed: 0,playlist_id,playlist_name,position,album_name,album_id,album_release_date,album_artists,album_artists_ids,album_artists_genres,album_artists_popularity,album_artists_followers,track_name,track_id,track_popularity,track_artists,track_artists_ids,track_artists_genres,track_artists_popularity,track_explicit,track_artists_followers,danceability,energy,key,speechiness,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,3avCwQPH6DkhMTRsizon7N,Billboard 200 Top Albums 2021,1,Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,2021-01-08,Morgan Wallen,['4oUHIQIBe0LHzYfvXNW4QM'],[['contemporary country']],[86],[2789002],More Than My Hometown,65mMCEOu5Ll1DBAfEUmerU,34,['Morgan Wallen'],['4oUHIQIBe0LHzYfvXNW4QM'],[['contemporary country']],[86],False,[2789002],0.621,0.868,6,0.0460,-5.478,1,0.617000,0.000000,0.1310,0.594,126.010,216573,4
1,3avCwQPH6DkhMTRsizon7N,Billboard 200 Top Albums 2021,2,SOUR,6s84u2TUpR3wdUv4NgKA2j,2021-05-21,['Olivia Rodrigo'],['1McMsnEElThX1knmY4oliG'],[['pop']],[91],[12849364],drivers license,5wANPM4fQCJwkGd4rN57mH,92,['Olivia Rodrigo'],['1McMsnEElThX1knmY4oliG'],[['pop']],[91],True,[12849364],0.561,0.431,10,0.0578,-8.810,1,0.768000,0.000014,0.1060,0.137,143.875,242013,4
2,3avCwQPH6DkhMTRsizon7N,Billboard 200 Top Albums 2021,3,Shoot For The Stars Aim For The Moon,7e7t0MCrNDcJZsPwUKjmOc,2020-07-03,['Pop Smoke'],['0eDvMgVFoNV3TpwtrVCoTj'],[['brooklyn drill']],[89],[8935893],What You Know Bout Love,1tkg4EHVoqnhR6iFEXb60y,85,['Pop Smoke'],['0eDvMgVFoNV3TpwtrVCoTj'],[['brooklyn drill']],[89],True,[8935893],0.709,0.548,10,0.3530,-8.493,1,0.650000,0.000002,0.1330,0.543,83.995,160000,4
3,3avCwQPH6DkhMTRsizon7N,Billboard 200 Top Albums 2021,4,evermore,2Xoteh7uEpea4TohMxjtaq,2020-12-11,['Taylor Swift'],['06HL4z0CvFAxyc27GXpf02'],[['pop']],[98],[48097228],willow,0lx2cLdOt3piJbcaXIV74f,82,['Taylor Swift'],['06HL4z0CvFAxyc27GXpf02'],[['pop']],[98],False,[48097228],0.392,0.574,7,0.1700,-9.195,1,0.833000,0.001790,0.1450,0.529,81.112,214707,4
4,3avCwQPH6DkhMTRsizon7N,Billboard 200 Top Albums 2021,5,Certified Lover Boy,3SpBlxme9WbeQdI9kx7KAV,2021-09-03,['Drake'],['3TVXtAsR1Inumwj472S9r4'],"[['canadian hip hop', 'canadian pop', 'hip hop...",[98],[60310746],Way 2 Sexy (with Future & Young Thug),0k1WUmIRnG3xU6fvvDVfRG,88,"['Drake', 'Future', 'Young Thug']",['3TVXtAsR1Inumwj472S9r4'],"[['canadian hip hop', 'canadian pop', 'hip hop...",[98],True,[60310746],0.803,0.597,11,0.1410,-6.035,0,0.000619,0.000005,0.3230,0.331,136.008,257605,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
988,1N9WQ0C6m7n1TVeLXdUoNb,Billboard 200 Top Albums 2017,195,+,0W5GGnapMz0VwemQvJDqa7,2011-09-09,['Ed Sheeran'],['6eUKZXaKkcviH0Ku9w2n3V'],"[['pop', 'uk pop']]",[96],[91177982],The A Team,1VdZ0vKfR5jneCmWIUAMxK,77,['Ed Sheeran'],['6eUKZXaKkcviH0Ku9w2n3V'],"[['pop', 'uk pop']]",[96],False,[91177982],0.642,0.289,9,0.0367,-9.918,1,0.669000,0.000000,0.1800,0.407,84.996,258373,4
989,1N9WQ0C6m7n1TVeLXdUoNb,Billboard 200 Top Albums 2017,196,Sing It Now: Songs Of Faith & Hope,3zSyGvLk5FcLhC3BtbKr9z,2017-02-03,['Reba McEntire'],['02rd0anEWfMtF7iMku9uor'],"[['contemporary country', 'country', 'country ...",[66],[1548058],Back To God,3W29VxuZ2AZNkPPJwVmD3a,34,['Reba McEntire'],['02rd0anEWfMtF7iMku9uor'],"[['contemporary country', 'country', 'country ...",[66],False,[1548058],0.501,0.674,6,0.0315,-5.666,1,0.039000,0.000003,0.0783,0.359,147.931,289840,4
990,1N9WQ0C6m7n1TVeLXdUoNb,Billboard 200 Top Albums 2017,197,BEYONCÉ [Platinum Edition],2UJwKSBUz6rtW4QLK74kQu,2014-11-24,['Beyoncé'],['6vWDO969PvNqNYHIOW5v0m'],"[['dance pop', 'pop', 'r&b']]",[86],[30095227],7/11,02M6vucOvmRfMxTXDUwRXu,72,['Beyoncé'],['6vWDO969PvNqNYHIOW5v0m'],"[['dance pop', 'pop', 'r&b']]",[86],False,[30095227],0.747,0.705,9,0.1260,-5.137,0,0.012800,0.000000,0.1260,0.560,136.024,213507,4
991,1N9WQ0C6m7n1TVeLXdUoNb,Billboard 200 Top Albums 2017,199,The Breaker,2aQOzEjLzPkffXDwREXdAh,2017-02-24,['Little Big Town'],['3CygdxquGHurS7f9LjNLkv'],"[['contemporary country', 'country', 'country ...",[69],[2325885],Better Man,23TxRN09aR1RB0G0tFoT0b,68,['Little Big Town'],['3CygdxquGHurS7f9LjNLkv'],"[['contemporary country', 'country', 'country ...",[69],False,[2325885],0.514,0.836,5,0.0381,-3.663,1,0.112000,0.000000,0.0878,0.501,143.888,263120,4


#### Create album id table

In [6]:
album_id_table = master_chart_table[['album_name','album_id']].copy()
album_id_table

Unnamed: 0,album_name,album_id
0,Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz
1,SOUR,6s84u2TUpR3wdUv4NgKA2j
2,Shoot For The Stars Aim For The Moon,7e7t0MCrNDcJZsPwUKjmOc
3,evermore,2Xoteh7uEpea4TohMxjtaq
4,Certified Lover Boy,3SpBlxme9WbeQdI9kx7KAV
...,...,...
988,+,0W5GGnapMz0VwemQvJDqa7
989,Sing It Now: Songs Of Faith & Hope,3zSyGvLk5FcLhC3BtbKr9z
990,BEYONCÉ [Platinum Edition],2UJwKSBUz6rtW4QLK74kQu
991,The Breaker,2aQOzEjLzPkffXDwREXdAh


#### Remove duplicates

In [7]:
album_ids_no_dupes = album_id_table.drop_duplicates(subset=['album_id'], keep='first')
album_ids_no_dupes

Unnamed: 0,album_name,album_id
0,Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz
1,SOUR,6s84u2TUpR3wdUv4NgKA2j
2,Shoot For The Stars Aim For The Moon,7e7t0MCrNDcJZsPwUKjmOc
3,evermore,2Xoteh7uEpea4TohMxjtaq
4,Certified Lover Boy,3SpBlxme9WbeQdI9kx7KAV
...,...,...
988,+,0W5GGnapMz0VwemQvJDqa7
989,Sing It Now: Songs Of Faith & Hope,3zSyGvLk5FcLhC3BtbKr9z
990,BEYONCÉ [Platinum Edition],2UJwKSBUz6rtW4QLK74kQu
991,The Breaker,2aQOzEjLzPkffXDwREXdAh


#### Create list of albums ids

In [8]:
album_id_list = album_ids_no_dupes['album_id'].tolist()
album_id_list

['1qW1C4kDOXnrly22daHbxz',
 '6s84u2TUpR3wdUv4NgKA2j',
 '7e7t0MCrNDcJZsPwUKjmOc',
 '2Xoteh7uEpea4TohMxjtaq',
 '3SpBlxme9WbeQdI9kx7KAV',
 '6RuwGwQcAureaOraKJNFQG',
 '0S9D5NIDp2YXhYwlvuJzqx',
 '3BSzygCIET0gzTTIs7iB3y',
 '7fJJK56U9fHixgO0HQkhtI',
 '1ynyQdPQiXdYJNQEDL1S3d',
 '6n9DKpOxwifT5hOXtgLZSL',
 '0xS0iOtxQRoJvfcFcJA5Gv',
 '4g1ZRSobMefqF6nelkgibi',
 '03NCvBIGqzLPhLoi4pDb3L',
 '5dGWwsZ9iB2Xc3UKR0gif2',
 '7xV2TzoaVc0ycW7fwBwAml',
 '2d9BCZeAAhiZWPpbX9aPCW',
 '7lQouCmPggSsg1972D3TiE',
 '1Gonl2Jwl4WeJM6qUomM8s',
 '1nAQbHeOWTfQzbOoFrvndW',
 '4FY0HCt6PEbOF1RqUbYVzq',
 '6tkjU4Umpo79wwkgPMV3nZ',
 '6a8nlV9V8kPUbTTCJNVSsh',
 '3DvW6xTKba1sol3kkj4JoB',
 '1kCHru7uhxBUdzkm4gzRQc',
 '5ffogo3K3fYibGWa93IzUe',
 '0S0KGZnfBGSIssfF54WSJh',
 '4JAvwK4APPArjIsOdGoJXX',
 '1lhNch5NkOONvFhRPh8qaj',
 '0wTx7qk1I85v3WSKyXWTNc',
 '5L5evi5tJPh8WaEFAQp7Tp',
 '0sOeI7pbAmIc8aDFyvkBUW',
 '4hDok0OAJd57SGIT8xuWJH',
 '0BwWUstDMUbgq2NYONRqlu',
 '5lJqux7orBlA1QzyiBGti1',
 '1IR2nlwX6YVTXXeu2qzoWO',
 '55WCJ7ddhO0SFk8DXaKWMx',
 

In [9]:
print(len(album_id_list))

560


#### Create list of all album tracks from album id list

In [10]:
albums_track_ids = []
#  Loop over album ids and get their track ids
for i in tqdm(range(len(album_id_list))):
    time.sleep(.5)
    album_items = sp.album_tracks(album_id_list[i])['items']
    for track in album_items:
        album_tracks = track['id']
        albums_track_ids.append(album_tracks)

100%|██████████| 560/560 [05:48<00:00,  1.61it/s]


In [11]:
print(len(albums_track_ids))

8996


## Functions for data extraction

### Supporting function definitions

#### Album artists names function

In [12]:
def get_all_album_artists_names(track_id):
    meta = sp.track(track_id)
    album_artist_list = []
    for item in (meta['album']['artists']):
        album_artist = item['name']
        album_artist_list.append(album_artist)
    return album_artist_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [13]:
get_all_album_artists_names('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

['Future', 'Juice WRLD']

#### Album artists ids function

In [14]:
def get_all_album_artists_ids(track_id):
    meta = sp.track(track_id)
    album_artist_id_list = []
    for item in (meta['album']['artists']):
        album_artist_id = item['id']
        album_artist_id_list.append(album_artist_id)
    return album_artist_id_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [15]:
get_all_album_artists_ids('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

['1RyvyyTE3xzB2ZywiAwp0i', '4MCBfE4596Uoi2O4DtmEMz']

#### Album artists genres function

In [16]:
def get_all_album_artists_genres(track_id):
    meta = sp.track(track_id)
    album_artist_genre_list = []
    for item in (meta['album']['artists']):
        album_artist_id = item['id']
        album_artist_genres = sp.artist(album_artist_id)['genres']
        album_artist_genre_list.append(album_artist_genres)
    return album_artist_genre_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [17]:
get_all_album_artists_genres('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[['atl hip hop', 'pop rap', 'rap', 'southern hip hop', 'trap'],
 ['chicago rap', 'melodic rap']]

#### Album artists popularity function

In [18]:
def get_all_album_artists_popularity(track_id):
    meta = sp.track(track_id)
    album_artist_popularity_list = []
    for item in (meta['album']['artists']):
        album_artist_id = item['id']
        album_artist_popularity = sp.artist(album_artist_id)['popularity']
        album_artist_popularity_list.append(album_artist_popularity)
    return album_artist_popularity_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [19]:
get_all_album_artists_popularity('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[91, 96]

#### Album artists followers function

In [20]:
def get_all_album_artists_followers(track_id):
    meta = sp.track(track_id)
    album_artist_followers_list = []
    for item in (meta['album']['artists']):
        album_artist_id = item['id']
        album_artist_followers = sp.artist(album_artist_id)['followers']['total']
        album_artist_followers_list.append(album_artist_followers)
    return album_artist_followers_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [21]:
get_all_album_artists_followers('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[11044152, 21889473]

#### Track artists names function

In [22]:
def get_all_track_artists_names(track_id):
    meta = sp.track(track_id)
    track_artist_list = []
    for item in (meta['artists']):
        track_artist = item['name']
        track_artist_list.append(track_artist)
    return track_artist_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [23]:
get_all_track_artists_names('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

['Future', 'Juice WRLD', 'Young Thug']

#### Track artists ids function

In [24]:
def get_all_track_artists_ids(track_id):
    meta = sp.track(track_id)
    track_artist_id_list = []
    for item in (meta['artists']):
        track_artist = item['id']
        track_artist_id_list.append(track_artist)
    return track_artist_id_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [25]:
get_all_track_artists_ids('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

['1RyvyyTE3xzB2ZywiAwp0i', '4MCBfE4596Uoi2O4DtmEMz', '50co4Is1HCEo8bhOyUWKpn']

#### Track artists genres function

In [26]:
def get_all_track_artists_genres(track_id):
    meta = sp.track(track_id)
    track_artist_genre_list = []
    for item in (meta['artists']):
        track_artist_id = item['id']
        track_artist_genres = sp.artist(track_artist_id)['genres']
        track_artist_genre_list.append(track_artist_genres)
    return track_artist_genre_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [27]:
get_all_track_artists_genres('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[['atl hip hop', 'pop rap', 'rap', 'southern hip hop', 'trap'],
 ['chicago rap', 'melodic rap'],
 ['atl hip hop',
  'atl trap',
  'gangster rap',
  'hip hop',
  'melodic rap',
  'rap',
  'trap']]

#### Track artists popularity function

In [28]:
def get_all_track_artists_popularity(track_id):
    meta = sp.track(track_id)
    track_artist_popularity_list = []
    for item in (meta['artists']):
        track_artist_id = item['id']
        track_artist_popularity = sp.artist(track_artist_id)['popularity']
        track_artist_popularity_list.append(track_artist_popularity)
    return track_artist_popularity_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [29]:
get_all_track_artists_popularity('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[91, 96, 90]

#### Track artists followers function

In [30]:
def get_all_track_artists_followers(track_id):
    meta = sp.track(track_id)
    track_artist_followers_list = []
    for item in (meta['artists']):
        track_artist_id = item['id']
        track_artist_followers = sp.artist(track_artist_id)['followers']['total']
        track_artist_followers_list.append(track_artist_followers)
    return track_artist_followers_list     

Test with track id '3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a'

In [31]:
get_all_track_artists_followers('3fFBZvG777xoKyvcrBq7lc?si=f241a44d776b451a')

[11044152, 21889473, 6891556]

### Album track data return function

In [32]:
def get_all_album_tracks_more_than_100_songs(album_track_id_list):
    track_name = []
    track_id = []
    track_popularity = []
    track_explicit = []
    track_artists = []
    track_artists_ids = []
    track_artists_genres = []
    track_artists_popularity = []
    track_artists_followers = []
    album_name = []
    album_id = []

    for i in tqdm(range(len(album_track_id_list))):
        track = sp.track(album_track_id_list[i])
        this_id = track['id']
        if i == 0:
            track_name = track['name']
            track_id = this_id
            track_popularity = track['popularity']
            track_explicit = track['explicit']
            track_artists = get_all_track_artists_names(this_id)
            track_artists_ids = get_all_album_artists_ids(this_id)
            track_artists_genres = get_all_album_artists_genres(this_id)
            track_artists_popularity = get_all_album_artists_popularity(this_id)
            track_artists_followers = get_all_album_artists_followers(this_id)
            album_name = track['album']['name']
            album_id = track['album']['id']
            features = sp.audio_features(this_id)
            features_df = pd.DataFrame(data=features, columns=features[0].keys())
            features_df['track_name'] = [track_name]
            features_df['track_id'] = [track_id]
            features_df['track_popularity'] = [track_popularity]
            features_df['track_explicit'] = [track_explicit]
            features_df['track_artists'] = [track_artists]
            features_df['track_artists_ids'] = [track_artists_ids]
            features_df['track_artists_genres'] = [track_artists_genres]
            features_df['track_artists_popularity'] = [track_artists_popularity]
            features_df['track_artists_followers'] = [track_artists_followers]           
            features_df['album_name'] = [album_name]
            features_df['album_id'] = [album_id]
            features_df = features_df[['track_name', 'track_id', 'track_popularity', 'track_artists', 
                                       'track_artists_ids', 'track_artists_genres', 'track_artists_popularity', 
                                       'track_explicit', 'track_artists_followers', 
                                       'album_name', 'album_id', 'danceability', 'energy', 
                                       'key', 'speechiness', 'loudness', 'mode', 'acousticness', 'instrumentalness',
                                       'liveness', 'valence', 'tempo',
                                       'duration_ms', 'time_signature']]
            continue
        else:
            try:
                track_name = track['name']
                track_id = this_id
                track_popularity = track['popularity']
                track_explicit = track['explicit']
                track_artists = get_all_track_artists_names(this_id)
                track_artists_ids = get_all_album_artists_ids(this_id)
                track_artists_genres = get_all_album_artists_genres(this_id)
                track_artists_popularity = get_all_album_artists_popularity(this_id)
                track_artists_followers = get_all_album_artists_followers(this_id)
                album_name = track['album']['name']
                album_id = track['album']['id']
                features = sp.audio_features(this_id)
                new_row = {'track_name': [track_name],
                    'track_id': [track_id],
                    'track_popularity': [track_popularity],
                    'track_artists': [track_artists],
                    'track_artists_ids': [track_artists_ids],
                    'track_artists_genres': [track_artists_genres],
                    'track_artists_popularity': [track_artists_popularity],
                    'track_explicit': [track_explicit],
                    'track_artists_followers': [track_artists_followers],
                    'album_name': [album_name],
                    'album_id': [album_id],
                    'danceability':[features[0]['danceability']],
                    'energy':[features[0]['energy']],
                    'key':[features[0]['key']],
                    'speechiness':[features[0]['speechiness']],
                    'loudness':[features[0]['loudness']],
                    'mode':[features[0]['mode']],
                    'acousticness':[features[0]['acousticness']],
                    'instrumentalness':[features[0]['instrumentalness']],
                    'liveness':[features[0]['liveness']],
                    'valence':[features[0]['valence']],
                    'tempo':[features[0]['tempo']],
                    'duration_ms':[features[0]['duration_ms']],
                    'time_signature':[features[0]['time_signature']]
                }

                dfs = [features_df, pd.DataFrame(new_row)]
                features_df = pd.concat(dfs, ignore_index = True)
            except:
                continue
                
    return features_df

Create test set from albums list

In [33]:
albums_track_ids_test = albums_track_ids[0:15]
albums_track_ids_test

['4qeMbs55QlONyrE9YBMA93',
 '4xv9BDq764NSKG3geku9X2',
 '7cWMnSxmQfKFsIIoRWifbb',
 '6Mn2GFiNNadK0G2ZXRK1fd',
 '3lb2f27695Iuyd82khwU9k',
 '6o6jHTdTaqQilpLLY1q8AG',
 '5WP1LPI7kLIQqEVEZKhnoo',
 '6Vfl4qLFPMY73whXRef8Mk',
 '4kPLqNab6ZMGxbzsg423iX',
 '7fgKAJlw9WNLoBORPAwBoB',
 '2JsDkPZtdgs21JWfKIkIeH',
 '7MVGhr6Gfg0Wx959PN2LxL',
 '5jKqR3BfRMAUnE7gAIq1Fh',
 '23V03OIpI0BpblCZredJGe',
 '65mMCEOu5Ll1DBAfEUmerU']

Test with test set

In [34]:
testing_album_tracks = get_all_album_tracks_more_than_100_songs(albums_track_ids_test)

100%|██████████| 15/15 [00:14<00:00,  1.04it/s]


View test dataframe

In [35]:
testing_album_tracks

Unnamed: 0,track_name,track_id,track_popularity,track_artists,track_artists_ids,track_artists_genres,track_artists_popularity,track_explicit,track_artists_followers,album_name,album_id,danceability,energy,key,speechiness,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Sand In My Boots,4qeMbs55QlONyrE9YBMA93,42,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.354,0.54,3,0.0323,-6.625,1,0.587,0.0,0.112,0.416,69.97,202133,4
1,Wasted On You,4xv9BDq764NSKG3geku9X2,42,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.505,0.658,11,0.0318,-5.24,0,0.371,0.00136,0.121,0.255,196.002,178520,3
2,Somebody’s Problem,7cWMnSxmQfKFsIIoRWifbb,35,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.658,0.539,5,0.0289,-7.674,1,0.665,0.0,0.123,0.622,136.959,161773,4
3,More Surprised Than Me,6Mn2GFiNNadK0G2ZXRK1fd,35,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.488,0.748,6,0.0297,-6.049,1,0.244,0.0,0.124,0.431,84.938,157440,4
4,865,3lb2f27695Iuyd82khwU9k,38,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.454,0.586,8,0.0253,-5.254,1,0.729,0.0,0.101,0.366,87.891,190680,4
5,Warning,6o6jHTdTaqQilpLLY1q8AG,37,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.633,0.663,10,0.0294,-5.049,1,0.463,0.0,0.108,0.253,115.052,156747,4
6,Neon Eyes,5WP1LPI7kLIQqEVEZKhnoo,36,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.592,0.847,11,0.0285,-5.232,0,0.0102,4e-05,0.158,0.652,126.01,226147,4
7,Outlaw (feat. Ben Burgess),6Vfl4qLFPMY73whXRef8Mk,34,"[Morgan Wallen, Ben Burgess]",[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.506,0.633,7,0.0275,-5.755,1,0.583,0.0,0.121,0.244,85.05,229747,4
8,Whiskey’d My Way,4kPLqNab6ZMGxbzsg423iX,36,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.449,0.518,2,0.0271,-6.37,1,0.599,0.00249,0.0984,0.314,185.879,180787,3
9,Wonderin’ Bout The Wind,7fgKAJlw9WNLoBORPAwBoB,34,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.481,0.714,6,0.0335,-5.624,1,0.25,0.0,0.133,0.384,90.984,182227,4


## Dataset creation

### Pull track data from Spotify API

In [36]:
# Commenting out pull code so it doens't get run accidentally-- it takes a while!
all_album_tracks = get_all_album_tracks_more_than_100_songs(albums_track_ids)

100%|██████████| 8996/8996 [2:39:18<00:00,  1.06s/it]  


View dataset, check output row count

In [37]:
all_album_tracks

Unnamed: 0,track_name,track_id,track_popularity,track_artists,track_artists_ids,track_artists_genres,track_artists_popularity,track_explicit,track_artists_followers,album_name,album_id,danceability,energy,key,speechiness,loudness,mode,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Sand In My Boots,4qeMbs55QlONyrE9YBMA93,42,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.354,0.540,3,0.0323,-6.625,1,0.58700,0.000000,0.1120,0.416,69.970,202133,4
1,Wasted On You,4xv9BDq764NSKG3geku9X2,42,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.505,0.658,11,0.0318,-5.240,0,0.37100,0.001360,0.1210,0.255,196.002,178520,3
2,Somebody’s Problem,7cWMnSxmQfKFsIIoRWifbb,35,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.658,0.539,5,0.0289,-7.674,1,0.66500,0.000000,0.1230,0.622,136.959,161773,4
3,More Surprised Than Me,6Mn2GFiNNadK0G2ZXRK1fd,35,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.488,0.748,6,0.0297,-6.049,1,0.24400,0.000000,0.1240,0.431,84.938,157440,4
4,865,3lb2f27695Iuyd82khwU9k,38,[Morgan Wallen],[4oUHIQIBe0LHzYfvXNW4QM],[[contemporary country]],[86],False,[2789002],Dangerous: The Double Album,1qW1C4kDOXnrly22daHbxz,0.454,0.586,8,0.0253,-5.254,1,0.72900,0.000000,0.1010,0.366,87.891,190680,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8987,Big Love In A Small Town,1VwuDXLQdyTWvieBIPWkNt,36,[Lady A],[32WkQRZEVKSzVAAYqukAEA],"[[contemporary country, country, country dawn,...",[73],False,[3439689],Heart Break,4WZZDzGqjvw4bRwrp56U24,0.524,0.523,1,0.0268,-5.150,1,0.64500,0.000000,0.1010,0.313,144.012,226013,4
8988,The Stars,2gFkkpMs7rakuCCi9UepP0,37,[Lady A],[32WkQRZEVKSzVAAYqukAEA],"[[contemporary country, country, country dawn,...",[73],False,[3439689],Heart Break,4WZZDzGqjvw4bRwrp56U24,0.576,0.766,1,0.0304,-4.594,1,0.26300,0.000004,0.1020,0.407,143.029,202347,4
8989,Teenage Heart,7G2BnhS46nEiPucLNvE0G3,35,[Lady A],[32WkQRZEVKSzVAAYqukAEA],"[[contemporary country, country, country dawn,...",[73],False,[3439689],Heart Break,4WZZDzGqjvw4bRwrp56U24,0.572,0.840,2,0.0341,-6.227,1,0.00323,0.000013,0.5590,0.505,133.474,220227,4
8990,Home,32zF5eXrb8QrUu5qNdYmIO,35,[Lady A],[32WkQRZEVKSzVAAYqukAEA],"[[contemporary country, country, country dawn,...",[73],False,[3439689],Heart Break,4WZZDzGqjvw4bRwrp56U24,0.559,0.692,7,0.0385,-7.797,1,0.48600,0.000004,0.1250,0.567,89.954,177200,4


#### Find the missing values

Create list of output track ids

In [38]:
output_track_ids = all_album_tracks['track_id'].tolist()
output_track_ids
print(len(output_track_ids))

8992


Compare input list and output list

In [39]:
missing_tracks = [item for item in albums_track_ids if item not in output_track_ids]
missing_tracks

['2MIBAmYwiuGoKUlpq9B9sZ',
 '0giiXonRhODYy4J1iy4Lb3',
 '5VKJr18j5O5Bl4KeYrBH0K',
 '4f6PUDRYJI51UrZy0jDAxD']

Missing tracks are missing audio feature values at the time of dataset creation and so will not be appended as they represent a non-significant portion of the data. 

#### Export all chart album tracks table to csv

In [40]:
all_album_tracks.to_csv('../data/allchartalbumtracks.csv', index=False)