# Extract Data from Spotify API

Spotify API has python library called 'Spotipy'

In [None]:
pip install spotipy --upgrade

In [1]:
# Dependencies
import pandas as pd
import json
import requests
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config_spotify import client_id, client_secret

# from __future__ import print_function
# import sys

In [2]:
# Credentials to access Spotify API using spotipy library
cid = client_id
secret = client_secret

client_credentials_manager = SpotifyClientCredentials(client_id= cid, client_secret= secret)
sp = spotipy.Spotify(client_credentials_manager= client_credentials_manager)

### Request the data using Spotipy based on Genres

In [None]:
# List of genres we will extract from Spotipy
# genre:pop
# genre:hip-hop
# genre:jazz
# genre:rock
# genre:k-pop
# genre:instrumental
# genre:asmr

In [3]:
# Call the request to get the songs/tracks data by year
artist_name = []
track_name = []
popularity = []
track_id = []
track_uri = []

for i in range(0,20):
    track_results = sp.search(q='genre:instrumental', type='track', limit=50, offset=i)
    
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
        track_uri.append(t['uri'])
    time.sleep(5)

In [4]:
# Add the collected songs/tracks to DataFrame

track_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri
})

track_df['genre'] = 'instrumental'

print(track_df.shape)
track_df.head()

(1000, 6)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,genre
0,Steve Mokwebe,It Ends with Us,68,6RMjZgsE9IcQZqubTzLtDs,spotify:track:6RMjZgsE9IcQZqubTzLtDs,instrumental
1,Rannar Sillard,Dream Voucher,68,2T6wyxLBkQ4Y2ZjTbpuYfr,spotify:track:2T6wyxLBkQ4Y2ZjTbpuYfr,instrumental
2,Vala Capon,Presto arriverà il sole,69,2jyJXuG0rIma11mOl4Fz7m,spotify:track:2jyJXuG0rIma11mOl4Fz7m,instrumental
3,Benette,Lily's Cradle,68,4UlarjdicLUPbdssOxWbYX,spotify:track:4UlarjdicLUPbdssOxWbYX,instrumental
4,Ever So Blue,Cessura,70,7uvey8m0ZfknE25sBVWoGY,spotify:track:7uvey8m0ZfknE25sBVWoGY,instrumental


In [None]:
# Call the request to get shows audio features for each songs/tracks
danceability = []
energy = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration = []

for uri in track_uri:
    track_features = sp.audio_features(uri)
#     print(track_features)
    danceability.append(track_features[0]["danceability"])
    energy.append(track_features[0]["energy"])
    loudness.append(track_features[0]["loudness"])
    speechiness.append(track_features[0]["speechiness"])
    acousticness.append(track_features[0]["acousticness"])
    instrumentalness.append(track_features[0]["instrumentalness"])
    liveness.append(track_features[0]["liveness"])
    valence.append(track_features[0]["valence"])
    tempo.append(track_features[0]["tempo"])
    duration.append(track_features[0]["duration_ms"])
    time.sleep(7)

## Add the extracted data from Spotipy to Data Frame

In [None]:
# Add the collected songs/tracks and their features to DataFrame
track_features_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri,
    'danceability': danceability,
    'energy': energy,
    'loudness': loudness,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'duration': duration
})

track_features_df['genre'] = 'instrumental'

print(track_features_df.shape)
track_features_df

# Export Data to CSV

In [None]:
# Export the DataFrame to csv file
track_features_df.to_csv("spotify_data_instrumental.csv", encoding="utf-8", index=False)

# For Testing Purpose (No need to run)

In [None]:
# To get audio features
audio_features_test = sp.audio_features("spotify:track:285pBltuF7vW8TeWk8hdRR")
print(json.dumps(audio_features_test, indent=4))

In [None]:
# To get track information
test_track_info = sp.track("spotify:track:6WrI0LAC5M1Rw2MnX2ZvEg")
print(json.dumps(test_track_info, indent=4))

In [None]:
# To get artist information
test_artist_info = sp.artist(test_track_info["artists"][0]["uri"])
test_artist_info

In [None]:
# Call the request to show track info for each songs/tracks and get the artist ids
# artist_uri = []

# for uri in track_uri:
#     track_info = sp.track(uri)
#     artist_uri.append(track_info["artists"][0]["uri"])

In [None]:
# Call the request to get artist genres for each artist ids

# genres = []

# for uri in artist_uri:
#     artist_info = sp.artist(uri)
# #     print(artist_info)
#     genres.append(artist_info[genres])

# genres