In [1]:
# Import the dependencies
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config import CLIENT_ID, CLIENT_SECRET
import pandas as pd
import time

In [2]:
# Authorization and credentials verification
client_credentials_manager = SpotifyClientCredentials(client_id = CLIENT_ID, client_secret = CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [3]:
# Create the tracks and their properties like name, artist, release date etc. after giving spotify user_name and playlist_id

# https://stackoverflow.com/questions/39086287/spotipy-how-to-read-more-than-100-tracks-from-a-playlist
def get_playlist_tracks(username,playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

playlist = get_playlist_tracks("23ceahnegd3f2nr3pro2d3ef9", "1dlbVCoN4U1JHZXYbe4SWn")

# Using Prints to ensure data integrity
print(len(playlist))

10000


In [4]:
# Get the most important properties from the tracks for song analysis and store in a list
track_ids = [song['track']['uri'].split(':')[2] for song in playlist]
track_names = [song['track']['name'] for song in playlist]
artist_ids = [song['track']['artists'][0]['id'] for song in playlist]
artist_names = [song['track']['artists'][0]['name'] for song in playlist]
release_date = [song['track']['album']['release_date'] for song in playlist]
popularity = [song['track']['popularity'] for song in playlist]

# Using Prints to ensure data integrity
# print(len(track_ids))
# print(len(track_names))
# print(len(artist_ids))
# print(len(artist_names))
# print(len(release_date))
# print(len(popularity))

In [5]:
# Get the most audio features like danceability, liveliness etc. from the tracks for song analysis and store in a list
# Track the progress of tracks importing audio_fratures
rows_imported = 0
start_row = 0
# get the start_time from time.time()
start_time = time.time()
mid_time = start_time
# audio_features 
audio_features = []
# get the start_time from time.time()
start_time = time.time()
for track_id in track_ids:
    # Get the track_features and save in a list
    try:
        audio_features.append(sp.audio_features(track_id))
        rows_imported += 1
        
    except:
        continue
    
    # Prints the status of the import and time elapsed
    if rows_imported % 1000 == 0:
        print(f'imported rows {start_row} to {rows_imported} in {time.time() - mid_time} seconds')
        mid_time = time.time()
        start_row += 1000
        
        
# add elapsed time to final print out
print(f'Done. {time.time() - start_time} total seconds elapsed')

imported rows 0 to 1000 in 175.93363785743713 seconds
imported rows 1000 to 2000 in 99.23150897026062 seconds
imported rows 2000 to 3000 in 99.56110644340515 seconds
imported rows 3000 to 4000 in 99.15570950508118 seconds
imported rows 4000 to 5000 in 99.03066086769104 seconds
imported rows 5000 to 6000 in 99.2404375076294 seconds
imported rows 6000 to 7000 in 95.7204692363739 seconds
imported rows 7000 to 8000 in 99.18280386924744 seconds
imported rows 8000 to 9000 in 99.03689312934875 seconds
imported rows 9000 to 10000 in 99.93703770637512 seconds
Done. 1066.0302650928497 total seconds elapsed


In [6]:
# Extract the each feature individually from audio_features as a list for all tracks

feature_track_ids = []
danceability= []
energy = []
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration_mins = []
time_signature = []


i= -1
for features in audio_features:
    i += 1
    
    try:
        feature_track_ids.append(features[0]['id'])
        danceability.append(features[0]['danceability'])
        energy.append(features[0]['energy'])
        key.append(features[0]['key'])
        loudness.append(features[0]['loudness'])
        mode.append(features[0]['mode'])
        speechiness.append(features[0]['speechiness'])
        acousticness.append(features[0]['acousticness'])
        instrumentalness.append(features[0]['instrumentalness'])
        liveness.append(features[0]['liveness'])
        valence.append(features[0]['valence'])
        tempo.append(features[0]['tempo'])
        duration_mins.append(round(int(features[0]['duration_ms'])/60000,2))
        time_signature.append(features[0]['time_signature'])
        #print(f'Record No:{i}')
        
    except TypeError as e:
        #print(f'Type error in record no: {i}')
        feature_track_ids.append('NA')
        danceability.append('NA')
        energy.append('NA')
        key.append('NA')
        loudness.append('NA')
        mode.append('NA')
        speechiness.append('NA')
        acousticness.append('NA')
        instrumentalness.append('NA')
        liveness.append('NA')
        valence.append('NA')
        tempo.append('NA')
        duration_mins.append('NA')
        time_signature.append('NA')
        continue
        
# Prints to check data integrity
# print(len(feature_track_ids))
# print(len(danceability))
# print(len(energy))
# print(len(key))
# print(len(loudness))
# print(len(mode))
# print(len(speechiness))
# print(len(acousticness))
# print(len(instrumentalness))
# print(len(liveness))
# print(len(valence))
# print(len(tempo))
# print(len(duration_mins))
# print(len(time_signature))

In [31]:
# Create a single DataFrame from all the track_properites
songs_description_df = pd.DataFrame({
   'feature_track_id': feature_track_ids,
    'track_id': track_ids,
    'track_names': track_names,
    'artist_ids': artist_ids,
    'artist_names': artist_names,
    'release_date': release_date,
    'popularity': popularity,
    'danceability': danceability,
    'energy': energy,
    'key': key,
    'loudness': loudness,
    'mode': mode,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'duration_mins': duration_mins,
    'time_signature': time_signature})
      
songs_description_df

Unnamed: 0,feature_track_id,track_id,track_names,artist_ids,artist_names,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,time_signature
0,3SWqGa1J0M7hSBUDM0KePD,3SWqGa1J0M7hSBUDM0KePD,Legend,2q3GG88dVwuQPF4FmySr9I,The Score,2017-10-13,75,0.601,0.757,2,-3.341,1,0.028,0.0139,0.000088,0.0727,0.462,82.001,3.15,4
1,3z8h0TU7ReDPLIbEnYhWZb,3z8h0TU7ReDPLIbEnYhWZb,Bohemian Rhapsody,1dfeR4HaWDbWqFHLkxsg1d,Queen,2018-10-19,75,0.39,0.397,0,-9.963,0,0.0513,0.287,0,0.207,0.246,144.031,5.92,4
2,0quum0TSd0de8jjpILY2xi,0quum0TSd0de8jjpILY2xi,Brawl Stars (Menu Remix V5),43iexp0Suf11TFpvR4OdPh,Hatkuvi,2020-07-06,28,0.522,0.749,5,-9.181,0,0.0278,0.000391,0.872,0.14,0.274,119.974,2.74,4
3,1sWeSMifj6Z6kZyI6z3bRc,1sWeSMifj6Z6kZyI6z3bRc,Warriors,53XhwfbYqKCa1cC15pYq2q,Imagine Dragons,2015-05-18,75,0.365,0.851,4,-6.238,0,0.0645,0.0964,0.003,0.237,0.309,78.086,2.85,4
4,5CQ30WqJwcep0pYcV4AMNc,5CQ30WqJwcep0pYcV4AMNc,Stairway to Heaven - Remaster,36QJpDe2go2KgaRleHCDTp,Led Zeppelin,1971-11-08,81,0.338,0.34,9,-12.049,0,0.0339,0.58,0.0032,0.116,0.197,82.433,8.05,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,0q5lnUuDhlogtYCOubNQhQ,The Rise and Fall of Bossanova Section IV,1TkMNnlmVdWmE2DPzKhZmr,P C III,2016-11-01,25,,,,,,,,,,,,,
9996,,6JyUeAB6lGSHH2UdIR643f,The Rise and Fall of Bossanova Section V,1TkMNnlmVdWmE2DPzKhZmr,P C III,2016-11-01,14,,,,,,,,,,,,,
9997,,0q5lnUuDhlogtYCOubNQhQ,The Rise and Fall of Bossanova Section IV,1TkMNnlmVdWmE2DPzKhZmr,P C III,2016-11-01,25,,,,,,,,,,,,,
9998,,0q5lnUuDhlogtYCOubNQhQ,The Rise and Fall of Bossanova Section IV,1TkMNnlmVdWmE2DPzKhZmr,P C III,2016-11-01,25,,,,,,,,,,,,,


In [32]:
# get names of indexes for which feature_track_id is not available against track_id
index_names = songs_description_df[songs_description_df['feature_track_id'] != songs_description_df['track_id']].index
  
# drop these row indexes
# Create a new dataframe with songs having track properties as well as audio_features
songs_description_df.drop(index_names, inplace = True)

# Drop the feature_track_id column 
songs_description_df.drop('feature_track_id', axis=1, inplace=True)

#Print to check data integrity
len(songs_description_df)

9592

In [33]:
# Converting the columns of DataFrames to appropriate types
songs_description_df['release_date'] = pd.to_datetime(songs_description_df['release_date'])
songs_description_df['popularity'] = pd.to_numeric(songs_description_df['popularity'], errors='raise')
songs_description_df['danceability'] = pd.to_numeric(songs_description_df['danceability'], errors='raise')
songs_description_df['energy'] = pd.to_numeric(songs_description_df['energy'], errors='raise')
songs_description_df['key'] = pd.to_numeric(songs_description_df['key'], errors='raise')
songs_description_df['loudness'] = pd.to_numeric(songs_description_df['loudness'], errors='raise')
songs_description_df['mode'] = pd.to_numeric(songs_description_df['mode'], errors='raise')
songs_description_df['speechiness'] = pd.to_numeric(songs_description_df['speechiness'], errors='raise')
songs_description_df['acousticness'] = pd.to_numeric(songs_description_df['acousticness'], errors='raise')
songs_description_df['instrumentalness'] = pd.to_numeric(songs_description_df['instrumentalness'], errors='raise')
songs_description_df['liveness'] = pd.to_numeric(songs_description_df['liveness'], errors='raise')
songs_description_df['valence'] = pd.to_numeric(songs_description_df['valence'], errors='raise')
songs_description_df['tempo'] = pd.to_numeric(songs_description_df['tempo'], errors='raise')
songs_description_df['duration_mins'] = pd.to_numeric(songs_description_df['duration_mins'], errors='raise')
songs_description_df['time_signature'] = pd.to_numeric(songs_description_df['time_signature'], errors='raise')

songs_description_df.dtypes

track_id                    object
track_names                 object
artist_ids                  object
artist_names                object
release_date        datetime64[ns]
popularity                   int64
danceability               float64
energy                     float64
key                          int64
loudness                   float64
mode                         int64
speechiness                float64
acousticness               float64
instrumentalness           float64
liveness                   float64
valence                    float64
tempo                      float64
duration_mins              float64
time_signature               int64
dtype: object

In [35]:
# Drop the duplicate tracks 
print(len(songs_description_df["track_id"].unique()))
songs_description_df.drop_duplicates(subset = ['track_id'], inplace = True)
print(len(songs_description_df))

2200
2200


In [37]:
songs_description_df

Unnamed: 0,track_id,track_names,artist_ids,artist_names,release_date,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_mins,time_signature
0,3SWqGa1J0M7hSBUDM0KePD,Legend,2q3GG88dVwuQPF4FmySr9I,The Score,2017-10-13,75,0.601,0.757,2,-3.341,1,0.0280,0.013900,0.000088,0.0727,0.4620,82.001,3.15,4
1,3z8h0TU7ReDPLIbEnYhWZb,Bohemian Rhapsody,1dfeR4HaWDbWqFHLkxsg1d,Queen,2018-10-19,75,0.390,0.397,0,-9.963,0,0.0513,0.287000,0.000000,0.2070,0.2460,144.031,5.92,4
2,0quum0TSd0de8jjpILY2xi,Brawl Stars (Menu Remix V5),43iexp0Suf11TFpvR4OdPh,Hatkuvi,2020-07-06,28,0.522,0.749,5,-9.181,0,0.0278,0.000391,0.872000,0.1400,0.2740,119.974,2.74,4
3,1sWeSMifj6Z6kZyI6z3bRc,Warriors,53XhwfbYqKCa1cC15pYq2q,Imagine Dragons,2015-05-18,75,0.365,0.851,4,-6.238,0,0.0645,0.096400,0.003000,0.2370,0.3090,78.086,2.85,4
4,5CQ30WqJwcep0pYcV4AMNc,Stairway to Heaven - Remaster,36QJpDe2go2KgaRleHCDTp,Led Zeppelin,1971-11-08,81,0.338,0.340,9,-12.049,0,0.0339,0.580000,0.003200,0.1160,0.1970,82.433,8.05,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8073,4DaByeLWrk8Zwk9H9aepvc,Drama Queen - Radio Mix,2IN2VBkdXOBfoXJGrAUV1O,Deep Zone Project,2017-04-01,3,0.653,0.951,1,-3.020,0,0.0649,0.010300,0.000000,0.0595,0.8230,170.050,3.11,4
8074,0aTDDlw8ZAQM0IvgwsKvqE,Maski Dolu - Club Remix,2IN2VBkdXOBfoXJGrAUV1O,Deep Zone Project,2017-04-01,3,0.713,0.873,4,-6.366,0,0.1010,0.008860,0.026200,0.1080,0.4020,121.996,5.04,4
8075,3VftjjFNQUOCmf02vbjiqK,Jore Dos - Club Mix,2IN2VBkdXOBfoXJGrAUV1O,Deep Zone Project,2017-04-01,3,0.668,0.949,2,-7.156,0,0.0517,0.023000,0.240000,0.4920,0.8320,128.031,5.94,4
9898,6UGAiIPbXtrdor1PD7PzCJ,Didn't We Deserve a Look At You The Way You Re...,6I8R5MFTlez7rHCsH4cx0u,Shellac,1998-05-19,0,0.646,0.285,11,-13.824,0,0.1910,0.429000,0.835000,0.1120,0.3860,136.007,12.32,3


In [38]:
# Convert the dataframe to a csv
songs_description_df.to_csv('Resources/spotify_playlist_songs.csv', index = False)