In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

In [2]:
secrets_file = open("secrets.txt","r")
string = secrets_file.read()
string.split('\n')
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))

## Extract songs from playlist

In [4]:
from random import randint
from time import sleep

def get_playlist_tracks(playlist_id):
    results = sp.user_playlist_tracks("spotify",playlist_id)
    tracks = results['items']
    while results['next']!=None:
        results = sp.next(results)
        tracks = tracks + results['items']
        sleep(randint(1,3))
    return tracks

In [5]:
all_tracks = get_playlist_tracks("6wLYS8gxCFYqZBMHsz16fV")
len(all_tracks)

5186

In [6]:
#all_tracks

In [7]:
#all_tracks[1]['track']

In [8]:
def get_info(track):
    return [(track["name"],artist["name"],track["uri"]) for artist in track["artists"]]

In [9]:
get_info(all_tracks[0]['track'])

[('Run Away', 'dvsn', 'spotify:track:0xD824qQkuonQNHe2dSeI4')]

In [28]:
tracksinfo = []
num_iter = len(all_tracks)

for i in range(num_iter):
    tracksinfo.append(get_info(all_tracks[i]['track']))

In [11]:
#print(tracksinfo)

In [29]:
def flatten(input_list):
    return [item for sublist in input_list for item in sublist]

In [30]:
tracksinfo = flatten(tracksinfo)

In [31]:
tracksinfo = pd.DataFrame(tracksinfo, columns = ['song', 'artist', 'uri'])

In [32]:
tracksinfo.head(10)

Unnamed: 0,song,artist,uri
0,Run Away,dvsn,spotify:track:0xD824qQkuonQNHe2dSeI4
1,L$D,A$AP Rocky,spotify:track:4S7YHmlWwfwArgd8LfSPud
2,Studio,ScHoolboy Q,spotify:track:29gsi1zZrZxdStACmTQB0Z
3,Studio,BJ The Chicago Kid,spotify:track:29gsi1zZrZxdStACmTQB0Z
4,OTW,Khalid,spotify:track:6Hgh47WXVKtXN5zGOu0hjI
5,OTW,6LACK,spotify:track:6Hgh47WXVKtXN5zGOu0hjI
6,OTW,Ty Dolla $ign,spotify:track:6Hgh47WXVKtXN5zGOu0hjI
7,Silence,Marshmello,spotify:track:7vGuf3Y35N4wmASOKLUVVU
8,Silence,Khalid,spotify:track:7vGuf3Y35N4wmASOKLUVVU
9,I Fall Apart,Post Malone,spotify:track:2mdxJLJHzFmiiYdTZ8JcyH


In [33]:
tracksinfo.shape

(8136, 3)

### Drop duplicates

In [34]:
tracksinfo[tracksinfo['uri'].duplicated(keep=False) == True].shape

(5091, 3)

In [35]:
tracksinfo = tracksinfo.drop_duplicates(subset=['uri']).reset_index()
tracksinfo.shape

(5186, 4)

In [37]:
tracksinfo = tracksinfo.drop(['index'], axis=1)

### Add audio features

In [43]:
playlist = sp.user_playlist_tracks("spotify", "6wLYS8gxCFYqZBMHsz16fV")

In [44]:
sp.audio_features(tracksinfo['uri'][0])

[{'danceability': 0.764,
  'energy': 0.33,
  'key': 2,
  'loudness': -12.801,
  'mode': 0,
  'speechiness': 0.0427,
  'acousticness': 0.1,
  'instrumentalness': 0.506,
  'liveness': 0.139,
  'valence': 0.0686,
  'tempo': 119.978,
  'type': 'audio_features',
  'id': '0xD824qQkuonQNHe2dSeI4',
  'uri': 'spotify:track:0xD824qQkuonQNHe2dSeI4',
  'track_href': 'https://api.spotify.com/v1/tracks/0xD824qQkuonQNHe2dSeI4',
  'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0xD824qQkuonQNHe2dSeI4',
  'duration_ms': 140293,
  'time_signature': 4}]

In [45]:
tracksinfo['uri'].head()

0    spotify:track:0xD824qQkuonQNHe2dSeI4
1    spotify:track:4S7YHmlWwfwArgd8LfSPud
2    spotify:track:29gsi1zZrZxdStACmTQB0Z
3    spotify:track:6Hgh47WXVKtXN5zGOu0hjI
4    spotify:track:7vGuf3Y35N4wmASOKLUVVU
Name: uri, dtype: object

In [46]:
features = []

for i in range(len(tracksinfo['uri'])):
    features.append(sp.audio_features(tracksinfo['uri'][i]))
    sleep(randint(1,3))

In [75]:
features

[[{'danceability': 0.764,
   'energy': 0.33,
   'key': 2,
   'loudness': -12.801,
   'mode': 0,
   'speechiness': 0.0427,
   'acousticness': 0.1,
   'instrumentalness': 0.506,
   'liveness': 0.139,
   'valence': 0.0686,
   'tempo': 119.978,
   'type': 'audio_features',
   'id': '0xD824qQkuonQNHe2dSeI4',
   'uri': 'spotify:track:0xD824qQkuonQNHe2dSeI4',
   'track_href': 'https://api.spotify.com/v1/tracks/0xD824qQkuonQNHe2dSeI4',
   'analysis_url': 'https://api.spotify.com/v1/audio-analysis/0xD824qQkuonQNHe2dSeI4',
   'duration_ms': 140293,
   'time_signature': 4}],
 [{'danceability': 0.598,
   'energy': 0.252,
   'key': 2,
   'loudness': -15.086,
   'mode': 1,
   'speechiness': 0.0644,
   'acousticness': 0.194,
   'instrumentalness': 0.133,
   'liveness': 0.0992,
   'valence': 0.0352,
   'tempo': 120.064,
   'type': 'audio_features',
   'id': '4S7YHmlWwfwArgd8LfSPud',
   'uri': 'spotify:track:4S7YHmlWwfwArgd8LfSPud',
   'track_href': 'https://api.spotify.com/v1/tracks/4S7YHmlWwfwArgd8Lf

In [50]:
audio_feat = pd.DataFrame(features, columns = ['features'])

In [74]:
audio_feat.head()

Unnamed: 0,features
0,"{'danceability': 0.764, 'energy': 0.33, 'key':..."
1,"{'danceability': 0.598, 'energy': 0.252, 'key'..."
2,"{'danceability': 0.709, 'energy': 0.589, 'key'..."
3,"{'danceability': 0.652, 'energy': 0.678, 'key'..."
4,"{'danceability': 0.52, 'energy': 0.761, 'key':..."


In [78]:
# audio_features = pd.DataFrame(features)
# audio_features.head()

In [57]:
#tracksinfo

In [53]:
tracks = pd.concat([tracksinfo, audio_feat], axis=1)

In [55]:
tracks.head()

Unnamed: 0,song,artist,uri,features
0,Run Away,dvsn,spotify:track:0xD824qQkuonQNHe2dSeI4,"{'danceability': 0.764, 'energy': 0.33, 'key':..."
1,L$D,A$AP Rocky,spotify:track:4S7YHmlWwfwArgd8LfSPud,"{'danceability': 0.598, 'energy': 0.252, 'key'..."
2,Studio,ScHoolboy Q,spotify:track:29gsi1zZrZxdStACmTQB0Z,"{'danceability': 0.709, 'energy': 0.589, 'key'..."
3,OTW,Khalid,spotify:track:6Hgh47WXVKtXN5zGOu0hjI,"{'danceability': 0.652, 'energy': 0.678, 'key'..."
4,Silence,Marshmello,spotify:track:7vGuf3Y35N4wmASOKLUVVU,"{'danceability': 0.52, 'energy': 0.761, 'key':..."


In [59]:
tracks.shape

(5186, 4)

In [61]:
tracks

Unnamed: 0,song,artist,uri,features
0,Run Away,dvsn,spotify:track:0xD824qQkuonQNHe2dSeI4,"{'danceability': 0.764, 'energy': 0.33, 'key':..."
1,L$D,A$AP Rocky,spotify:track:4S7YHmlWwfwArgd8LfSPud,"{'danceability': 0.598, 'energy': 0.252, 'key'..."
2,Studio,ScHoolboy Q,spotify:track:29gsi1zZrZxdStACmTQB0Z,"{'danceability': 0.709, 'energy': 0.589, 'key'..."
3,OTW,Khalid,spotify:track:6Hgh47WXVKtXN5zGOu0hjI,"{'danceability': 0.652, 'energy': 0.678, 'key'..."
4,Silence,Marshmello,spotify:track:7vGuf3Y35N4wmASOKLUVVU,"{'danceability': 0.52, 'energy': 0.761, 'key':..."
...,...,...,...,...
5181,Talk,6LACK,spotify:track:5WLkwRe2pFxItoPNBSa40R,"{'danceability': 0.829, 'energy': 0.493, 'key'..."
5182,Us Against the World,Strandz,spotify:track:4TaS4giQQK01vKzBB40AEY,"{'danceability': 0.762, 'energy': 0.647, 'key'..."
5183,BESO,ROSALÍA,spotify:track:609E1JCInJncactoMmkDon,"{'danceability': 0.768, 'energy': 0.644, 'key'..."
5184,Rollercoaster (feat. J Balvin),Burna Boy,spotify:track:4pX4VTKooLaSRLkESPyR8N,"{'danceability': 0.774, 'energy': 0.687, 'key'..."


In [65]:
def flattendf(data, col_list):
    for column in col_list:
        flattened = pd.DataFrame(dict(data[column])).transpose()
        columns = [str(col) for col in flattened.columns]
        flattened.columns = [column + '_' + colname for colname in columns]
        data = pd.concat([data, flattened], axis=1)
        data = data.drop(column, axis=1)
    return data

In [71]:
pd.set_option('display.max_columns', 0)
columns = ['features']

tracks = flattendf(tracks, columns)
tracks.head(5)

Unnamed: 0,song,artist,uri,features_danceability,features_energy,features_key,features_loudness,features_mode,features_speechiness,features_acousticness,features_instrumentalness,features_liveness,features_valence,features_tempo,features_type,features_id,features_uri,features_track_href,features_analysis_url,features_duration_ms,features_time_signature
0,Run Away,dvsn,spotify:track:0xD824qQkuonQNHe2dSeI4,0.764,0.33,2,-12.801,0,0.0427,0.1,0.506,0.139,0.0686,119.978,audio_features,0xD824qQkuonQNHe2dSeI4,spotify:track:0xD824qQkuonQNHe2dSeI4,https://api.spotify.com/v1/tracks/0xD824qQkuon...,https://api.spotify.com/v1/audio-analysis/0xD8...,140293,4
1,L$D,A$AP Rocky,spotify:track:4S7YHmlWwfwArgd8LfSPud,0.598,0.252,2,-15.086,1,0.0644,0.194,0.133,0.0992,0.0352,120.064,audio_features,4S7YHmlWwfwArgd8LfSPud,spotify:track:4S7YHmlWwfwArgd8LfSPud,https://api.spotify.com/v1/tracks/4S7YHmlWwfwA...,https://api.spotify.com/v1/audio-analysis/4S7Y...,238307,4
2,Studio,ScHoolboy Q,spotify:track:29gsi1zZrZxdStACmTQB0Z,0.709,0.589,8,-7.776,0,0.0433,0.0507,0.0,0.0585,0.58,133.922,audio_features,29gsi1zZrZxdStACmTQB0Z,spotify:track:29gsi1zZrZxdStACmTQB0Z,https://api.spotify.com/v1/tracks/29gsi1zZrZxd...,https://api.spotify.com/v1/audio-analysis/29gs...,278067,3
3,OTW,Khalid,spotify:track:6Hgh47WXVKtXN5zGOu0hjI,0.652,0.678,2,-6.183,1,0.0541,0.183,3.3e-05,0.106,0.28,72.989,audio_features,6Hgh47WXVKtXN5zGOu0hjI,spotify:track:6Hgh47WXVKtXN5zGOu0hjI,https://api.spotify.com/v1/tracks/6Hgh47WXVKtX...,https://api.spotify.com/v1/audio-analysis/6Hgh...,263014,4
4,Silence,Marshmello,spotify:track:7vGuf3Y35N4wmASOKLUVVU,0.52,0.761,4,-3.093,1,0.0853,0.256,5e-06,0.17,0.286,141.971,audio_features,7vGuf3Y35N4wmASOKLUVVU,spotify:track:7vGuf3Y35N4wmASOKLUVVU,https://api.spotify.com/v1/tracks/7vGuf3Y35N4w...,https://api.spotify.com/v1/audio-analysis/7vGu...,180823,4


In [72]:
list(tracks['uri']) == list(tracks['features_uri'])

True

In [73]:
tracks.to_csv('spotifytracks.csv', index=False)