# Extract Data from Spotify API

Spotify API has python library called 'Spotipy'

In [None]:
pip install spotipy --upgrade

In [17]:
# Dependencies
import pandas as pd
import json
import requests
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config_spotify import client_id, client_secret

# from __future__ import print_function
# import sys

In [18]:
# Credentials to access Spotify API using spotipy library
cid = client_id
secret = client_secret

client_credentials_manager = SpotifyClientCredentials(client_id= cid, client_secret= secret)
sp = spotipy.Spotify(client_credentials_manager= client_credentials_manager)

### Request the data using Spotipy based on Genres

In [None]:
# List of genres we will extract from Spotipy
# genre:pop
# genre:hip-hop
# genre:jazz
# genre:rock
# genre:k-pop
# genre:instrumental
# genre:asmr

In [24]:
# Call the request to get the songs/tracks data by year
artist_name = []
track_name = []
popularity = []
track_id = []
track_uri = []

for i in range(0,20):
    track_results = sp.search(q='genre:hip-hop', type='track', limit=50, offset=i)
    
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
        track_uri.append(t['uri'])
    time.sleep(5)

In [25]:
# Add the collected songs/tracks to DataFrame

track_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri
})

track_df['genre'] = 'hiphop'

print(track_df.shape)
track_df.head()

(1000, 6)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,genre
0,Drake,Toosie Slide,99,127QTOFJsJQp5LbJbu3A1y,spotify:track:127QTOFJsJQp5LbJbu3A1y,hiphop
1,Jack Harlow,WHATS POPPIN,93,1jaTQ3nqY3oAAYyCTbIvnM,spotify:track:1jaTQ3nqY3oAAYyCTbIvnM,hiphop
2,Future,Life Is Good (feat. Drake),94,5yY9lUy8nbvjM1Uyo1Uqoc,spotify:track:5yY9lUy8nbvjM1Uyo1Uqoc,hiphop
3,DaBaby,ROCKSTAR (feat. Roddy Ricch),91,7ytR5pFWmSjzHJIeQkgog4,spotify:track:7ytR5pFWmSjzHJIeQkgog4,hiphop
4,NLE Choppa,Walk Em Down (feat. Roddy Ricch),90,4cSSL3YafYjM3yjgFO1vJg,spotify:track:4cSSL3YafYjM3yjgFO1vJg,hiphop


In [None]:
# Call the request to get shows audio features for each songs/tracks
danceability = []
energy = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration = []

for uri in track_uri:
    track_features = sp.audio_features(uri)
#     print(track_features)
    danceability.append(track_features[0]["danceability"])
    energy.append(track_features[0]["energy"])
    loudness.append(track_features[0]["loudness"])
    speechiness.append(track_features[0]["speechiness"])
    acousticness.append(track_features[0]["acousticness"])
    instrumentalness.append(track_features[0]["instrumentalness"])
    liveness.append(track_features[0]["liveness"])
    valence.append(track_features[0]["valence"])
    tempo.append(track_features[0]["tempo"])
    duration.append(track_features[0]["duration_ms"])
    time.sleep(3)

## Add the extracted data from Spotipy to Data Frame

In [22]:
# Add the collected songs/tracks and their features to DataFrame
track_features_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri,
    'danceability': danceability,
    'energy': energy,
    'loudness': loudness,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'duration': duration
})

track_features_df['genre'] = 'hiphop'

print(track_features_df.shape)
track_features_df

(1000, 16)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration,genre
0,The Weeknd,Blinding Lights,100,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,0.514,0.730,-5.934,0.0598,0.00146,0.000095,0.0897,0.334,171.005,200040,pop
1,Dua Lipa,Don't Start Now,97,6WrI0LAC5M1Rw2MnX2ZvEg,spotify:track:6WrI0LAC5M1Rw2MnX2ZvEg,0.794,0.793,-4.521,0.0842,0.01250,0.000000,0.0952,0.677,123.941,183290,pop
2,Doja Cat,Say So,97,3Dv1eDb0MEgF93GpLXlucZ,spotify:track:3Dv1eDb0MEgF93GpLXlucZ,0.787,0.673,-4.577,0.1580,0.25600,0.000004,0.0904,0.786,110.962,237893,pop
3,Arizona Zervas,ROXANNE,95,696DnlkuDOXcMAnKlTgXXK,spotify:track:696DnlkuDOXcMAnKlTgXXK,0.621,0.601,-5.616,0.1480,0.05220,0.000000,0.4600,0.457,116.735,163636,pop
4,BENEE,Supalonely,95,4nK5YrxbMGZstTLbvj6Gxw,spotify:track:4nK5YrxbMGZstTLbvj6Gxw,0.863,0.631,-4.689,0.0534,0.30500,0.000030,0.1230,0.817,128.977,223480,pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Taylor Swift,You Need To Calm Down,84,6RRNNciQGZEXnqk8SQ9yv5,spotify:track:6RRNNciQGZEXnqk8SQ9yv5,0.771,0.671,-5.617,0.0553,0.00929,0.000000,0.0637,0.714,85.026,171360,pop
996,Bazzi,Paradise,82,0Rx0DJI556Ix5gBny6EWmn,spotify:track:0Rx0DJI556Ix5gBny6EWmn,0.844,0.644,-6.273,0.0479,0.08280,0.000000,0.1130,0.591,122.061,169038,pop
997,A Boogie Wit da Hoodie,"Numbers (feat. Roddy Ricch, Gunna and London O...",84,733c1CWmIGymoQXdp7Us88,spotify:track:733c1CWmIGymoQXdp7Us88,0.819,0.654,-6.665,0.1360,0.51700,0.000000,0.0996,0.455,133.503,188563,pop
998,Social House,Magic In The Hamptons (feat. Lil Yachty),80,2Yer0p7uB2lVBUAtANuuQp,spotify:track:2Yer0p7uB2lVBUAtANuuQp,0.769,0.479,-5.339,0.0385,0.66000,0.000000,0.0914,0.803,96.051,164640,pop


# Export Data to CSV

In [23]:
# Export the DataFrame to csv file
track_features_df.to_csv("spotify_data_pop_v2.csv", encoding="utf-8", index=False)

# For Testing Purpose (No need to run)

In [None]:
# To get audio features
audio_features_test = sp.audio_features("spotify:track:285pBltuF7vW8TeWk8hdRR")
print(json.dumps(audio_features_test, indent=4))

In [None]:
# To get track information
test_track_info = sp.track("spotify:track:6WrI0LAC5M1Rw2MnX2ZvEg")
print(json.dumps(test_track_info, indent=4))

In [None]:
# To get artist information
test_artist_info = sp.artist(test_track_info["artists"][0]["uri"])
test_artist_info

In [None]:
# Call the request to show track info for each songs/tracks and get the artist ids
# artist_uri = []

# for uri in track_uri:
#     track_info = sp.track(uri)
#     artist_uri.append(track_info["artists"][0]["uri"])

In [None]:
# Call the request to get artist genres for each artist ids

# genres = []

# for uri in artist_uri:
#     artist_info = sp.artist(uri)
# #     print(artist_info)
#     genres.append(artist_info[genres])

# genres