# Extract Data from Spotify API

Spotify API has python library called 'Spotipy'

In [None]:
pip install spotipy --upgrade

In [17]:
# Dependencies
import pandas as pd
import json
import requests
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config_spotify import client_id, client_secret

# from __future__ import print_function
# import sys

In [18]:
# Credentials to access Spotify API using spotipy library
cid = client_id
secret = client_secret

client_credentials_manager = SpotifyClientCredentials(client_id= cid, client_secret= secret)
sp = spotipy.Spotify(client_credentials_manager= client_credentials_manager)

### Request the data using Spotipy based on Genres

In [None]:
# List of genres we will extract from Spotipy
# genre:pop
# genre:hip-hop
# genre:jazz
# genre:rock
# genre:k-pop
# genre:instrumental
# genre:asmr

In [39]:
# Call the request to get the songs/tracks data by year
artist_name = []
track_name = []
popularity = []
track_id = []
track_uri = []

for i in range(0,20):
    track_results = sp.search(q='genre:k-pop', type='track', limit=50, offset=i)
    
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
        track_uri.append(t['uri'])
    time.sleep(5)

In [40]:
# Add the collected songs/tracks to DataFrame

track_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri
})

track_df['genre'] = 'kpop'

print(track_df.shape)
track_df.head()

(1000, 6)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,genre
0,BTS,ON,85,2QyuXBcV1LJ2rq01KhreMF,spotify:track:2QyuXBcV1LJ2rq01KhreMF,kpop
1,BTS,Boy With Luv (feat. Halsey),84,5KawlOMHjWeUjQtnuRs22c,spotify:track:5KawlOMHjWeUjQtnuRs22c,kpop
2,BTS,Filter,82,0ono6UCNVZ1XqOm6j78Blu,spotify:track:0ono6UCNVZ1XqOm6j78Blu,kpop
3,BTS,My Time,81,4vTgx6h4seHvkuFh84JXYP,spotify:track:4vTgx6h4seHvkuFh84JXYP,kpop
4,BTS,ON (Feat. Sia),81,3IB5qOeMayvpOdHxYCL5tZ,spotify:track:3IB5qOeMayvpOdHxYCL5tZ,kpop


In [41]:
# Call the request to get shows audio features for each songs/tracks
danceability = []
energy = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration = []

for uri in track_uri:
    track_features = sp.audio_features(uri)
#     print(track_features)
    danceability.append(track_features[0]["danceability"])
    energy.append(track_features[0]["energy"])
    loudness.append(track_features[0]["loudness"])
    speechiness.append(track_features[0]["speechiness"])
    acousticness.append(track_features[0]["acousticness"])
    instrumentalness.append(track_features[0]["instrumentalness"])
    liveness.append(track_features[0]["liveness"])
    valence.append(track_features[0]["valence"])
    tempo.append(track_features[0]["tempo"])
    duration.append(track_features[0]["duration_ms"])
    time.sleep(3)

## Add the extracted data from Spotipy to Data Frame

In [42]:
# Add the collected songs/tracks and their features to DataFrame
track_features_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri,
    'danceability': danceability,
    'energy': energy,
    'loudness': loudness,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'duration': duration
})

track_features_df['genre'] = 'kpop'

print(track_features_df.shape)
track_features_df

(1000, 16)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration,genre
0,BTS,ON,85,2QyuXBcV1LJ2rq01KhreMF,spotify:track:2QyuXBcV1LJ2rq01KhreMF,0.583,0.817,-5.146,0.0987,0.1180,0.0,0.3380,0.438,105.936,246381,kpop
1,BTS,Boy With Luv (feat. Halsey),84,5KawlOMHjWeUjQtnuRs22c,spotify:track:5KawlOMHjWeUjQtnuRs22c,0.645,0.862,-4.757,0.0965,0.0923,0.0,0.1920,0.798,119.991,229773,kpop
2,BTS,Filter,82,0ono6UCNVZ1XqOm6j78Blu,spotify:track:0ono6UCNVZ1XqOm6j78Blu,0.781,0.762,-5.188,0.0626,0.0222,0.0,0.1210,0.860,110.042,180221,kpop
3,BTS,My Time,81,4vTgx6h4seHvkuFh84JXYP,spotify:track:4vTgx6h4seHvkuFh84JXYP,0.674,0.640,-5.139,0.0339,0.1510,0.0,0.0925,0.664,99.908,234458,kpop
4,BTS,ON (Feat. Sia),81,3IB5qOeMayvpOdHxYCL5tZ,spotify:track:3IB5qOeMayvpOdHxYCL5tZ,0.591,0.848,-4.397,0.0828,0.1370,0.0,0.3720,0.386,105.922,246816,kpop
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Jackson Wang,100 Ways,74,4mQMvcRtQou5yI8mjuJ1lf,spotify:track:4mQMvcRtQou5yI8mjuJ1lf,0.801,0.782,-7.081,0.2410,0.3180,0.0,0.2850,0.613,109.955,168627,kpop
996,GOT7,You Calling My Name,72,6f4wghlwYWGLqGEIKc6HDQ,spotify:track:6f4wghlwYWGLqGEIKc6HDQ,0.813,0.667,-4.061,0.0616,0.0917,0.0,0.0802,0.514,111.020,194708,kpop
997,BTS,Make It Right,71,1OqkHhYOcqfwYx7LqlAE8Y,spotify:track:1OqkHhYOcqfwYx7LqlAE8Y,0.638,0.703,-7.874,0.4440,0.0104,0.0,0.1320,0.261,105.766,226321,kpop
998,BTS,Dionysus,70,2SytGgja3g2R2N9lzV6n3E,spotify:track:2SytGgja3g2R2N9lzV6n3E,0.502,0.910,-2.769,0.1070,0.0400,0.0,0.3190,0.588,176.084,249215,kpop


# Export Data to CSV

In [43]:
# Export the DataFrame to csv file
track_features_df.to_csv("spotify_data_kpop_v2.csv", encoding="utf-8", index=False)

# For Testing Purpose (No need to run)

In [None]:
# To get audio features
audio_features_test = sp.audio_features("spotify:track:285pBltuF7vW8TeWk8hdRR")
print(json.dumps(audio_features_test, indent=4))

In [None]:
# To get track information
test_track_info = sp.track("spotify:track:6WrI0LAC5M1Rw2MnX2ZvEg")
print(json.dumps(test_track_info, indent=4))

In [None]:
# To get artist information
test_artist_info = sp.artist(test_track_info["artists"][0]["uri"])
test_artist_info

In [None]:
# Call the request to show track info for each songs/tracks and get the artist ids
# artist_uri = []

# for uri in track_uri:
#     track_info = sp.track(uri)
#     artist_uri.append(track_info["artists"][0]["uri"])

In [None]:
# Call the request to get artist genres for each artist ids

# genres = []

# for uri in artist_uri:
#     artist_info = sp.artist(uri)
# #     print(artist_info)
#     genres.append(artist_info[genres])

# genres