# Extract Data from Spotify API

Spotify API has python library called 'Spotipy'

In [1]:
pip install spotipy --upgrade

Collecting spotipy
  Downloading https://files.pythonhosted.org/packages/b2/67/ea788ae3d7e3aad0b0ef72a4f5b6c24c1dbb3db83fd57efc221f86b4fe87/spotipy-2.12.0-py3-none-any.whl
Installing collected packages: spotipy
Successfully installed spotipy-2.12.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
# Dependencies
import pandas as pd
import json
import requests
import time

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from config_spotify import client_id, client_secret

# from __future__ import print_function
# import sys

In [3]:
# Credentials to access Spotify API using spotipy library
cid = client_id
secret = client_secret

client_credentials_manager = SpotifyClientCredentials(client_id= cid, client_secret= secret)
sp = spotipy.Spotify(client_credentials_manager= client_credentials_manager)

### Request the data using Spotipy based on Genres

In [None]:
# List of genres we will extract from Spotipy
# genre:pop
# genre:hip-hop
# genre:jazz
# genre:rock
# genre:k-pop
# genre:instrumental
# genre:asmr

In [4]:
# Call the request to get the songs/tracks data by year
artist_name = []
track_name = []
popularity = []
track_id = []
track_uri = []

for i in range(0,1):
    track_results = sp.search(q='genre:jazz', type='track', limit=10, offset=i)
    
    for i, t in enumerate(track_results['tracks']['items']):
        artist_name.append(t['artists'][0]['name'])
        track_name.append(t['name'])
        track_id.append(t['id'])
        popularity.append(t['popularity'])
        track_uri.append(t['uri'])
    time.sleep(5)

In [7]:
# Add the collected songs/tracks to DataFrame
track_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri
})

print(track_df.shape)
track_df.head()

(10, 5)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri
0,"Earth, Wind & Fire",September,82,7Cuk8jsPPoNYQWXK9XRFvG,spotify:track:7Cuk8jsPPoNYQWXK9XRFvG
1,Louis Armstrong,What A Wonderful World - Single Version,68,29U7stRjqHU6rMiS8BfaI9,spotify:track:29U7stRjqHU6rMiS8BfaI9
2,Leslie Odom Jr.,Alexander Hamilton,71,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6
3,Etta James,At Last,75,4Hhv2vrOTy89HFRcjU3QOx,spotify:track:4Hhv2vrOTy89HFRcjU3QOx
4,"Grover Washington, Jr.",Just the Two of Us (feat. Bill Withers),71,1ko2lVN0vKGUl9zrU0qSlT,spotify:track:1ko2lVN0vKGUl9zrU0qSlT


In [5]:
# Call the request to get shows audio features for each songs/tracks
danceability = []
energy = []
loudness = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration = []

for uri in track_uri:
    track_features = sp.audio_features(uri)
#     print(track_features)
    danceability.append(track_features[0]["danceability"])
    energy.append(track_features[0]["energy"])
    loudness.append(track_features[0]["loudness"])
    speechiness.append(track_features[0]["speechiness"])
    acousticness.append(track_features[0]["acousticness"])
    instrumentalness.append(track_features[0]["instrumentalness"])
    liveness.append(track_features[0]["liveness"])
    valence.append(track_features[0]["valence"])
    tempo.append(track_features[0]["tempo"])
    duration.append(track_features[0]["duration_ms"])
    time.sleep(5)

## Add the extracted data from Spotipy to Data Frame

In [8]:
# Add the collected songs/tracks and their features to DataFrame
track_features_df = pd.DataFrame({
    'artist_name' : artist_name, 
    'track_name' : track_name, 
    'popularity' : popularity, 
    'track_id' : track_id, 
    'track_uri' : track_uri,
    'danceability': danceability,
    'energy': energy,
    'loudness': loudness,
    'speechiness': speechiness,
    'acousticness': acousticness,
    'instrumentalness': instrumentalness,
    'liveness': liveness,
    'valence': valence,
    'tempo': tempo,
    'duration': duration
})

print(track_features_df.shape)
track_features_df.head()

(10, 15)


Unnamed: 0,artist_name,track_name,popularity,track_id,track_uri,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration
0,"Earth, Wind & Fire",September,82,7Cuk8jsPPoNYQWXK9XRFvG,spotify:track:7Cuk8jsPPoNYQWXK9XRFvG,0.694,0.831,-7.288,0.0301,0.165,0.000892,0.25,0.98,125.901,215080
1,Louis Armstrong,What A Wonderful World - Single Version,68,29U7stRjqHU6rMiS8BfaI9,spotify:track:29U7stRjqHU6rMiS8BfaI9,0.271,0.165,-20.652,0.0351,0.729,2e-06,0.118,0.203,77.082,139227
2,Leslie Odom Jr.,Alexander Hamilton,71,4TTV7EcfroSLWzXRY6gLv6,spotify:track:4TTV7EcfroSLWzXRY6gLv6,0.609,0.435,-7.862,0.284,0.524,0.0,0.118,0.563,131.998,236738
3,Etta James,At Last,75,4Hhv2vrOTy89HFRcjU3QOx,spotify:track:4Hhv2vrOTy89HFRcjU3QOx,0.273,0.347,-8.631,0.0292,0.546,0.0137,0.334,0.328,87.411,179693
4,"Grover Washington, Jr.",Just the Two of Us (feat. Bill Withers),71,1ko2lVN0vKGUl9zrU0qSlT,spotify:track:1ko2lVN0vKGUl9zrU0qSlT,0.803,0.488,-9.303,0.0803,0.576,0.0609,0.0763,0.624,95.771,237106


# Export Data to CSV

In [9]:
# Export the DataFrame to csv file
track_features_df.to_csv("spotify_data.csv", encoding="utf-8", index=False)

# For Testing Purpose (No need to run)

In [31]:
# To get audio features
audio_features_test = sp.audio_features("spotify:track:285pBltuF7vW8TeWk8hdRR")
print(json.dumps(audio_features_test, indent=4))

[
    {
        "danceability": 0.511,
        "energy": 0.566,
        "key": 6,
        "loudness": -7.23,
        "mode": 0,
        "speechiness": 0.2,
        "acousticness": 0.349,
        "instrumentalness": 0,
        "liveness": 0.34,
        "valence": 0.218,
        "tempo": 83.903,
        "type": "audio_features",
        "id": "285pBltuF7vW8TeWk8hdRR",
        "uri": "spotify:track:285pBltuF7vW8TeWk8hdRR",
        "track_href": "https://api.spotify.com/v1/tracks/285pBltuF7vW8TeWk8hdRR",
        "analysis_url": "https://api.spotify.com/v1/audio-analysis/285pBltuF7vW8TeWk8hdRR",
        "duration_ms": 239836,
        "time_signature": 4
    }
]


In [32]:
# To get track information
test_track_info = sp.track("spotify:track:6WrI0LAC5M1Rw2MnX2ZvEg")
print(json.dumps(test_track_info, indent=4))

{
    "album": {
        "album_type": "single",
        "artists": [
            {
                "external_urls": {
                    "spotify": "https://open.spotify.com/artist/6M2wZ9GZgrQXHCFfjv46we"
                },
                "href": "https://api.spotify.com/v1/artists/6M2wZ9GZgrQXHCFfjv46we",
                "id": "6M2wZ9GZgrQXHCFfjv46we",
                "name": "Dua Lipa",
                "type": "artist",
                "uri": "spotify:artist:6M2wZ9GZgrQXHCFfjv46we"
            }
        ],
        "available_markets": [
            "AD",
            "AE",
            "AR",
            "AU",
            "BE",
            "BG",
            "BH",
            "BO",
            "BR",
            "CA",
            "CL",
            "CO",
            "CR",
            "CY",
            "CZ",
            "DK",
            "DO",
            "DZ",
            "EC",
            "EE",
            "EG",
            "ES",
            "FI",
            "FR",
            "GB",
  

In [35]:
# To get artist information
test_artist_info = sp.artist(test_track_info["artists"][0]["uri"])
test_artist_info

{'external_urls': {'spotify': 'https://open.spotify.com/artist/6M2wZ9GZgrQXHCFfjv46we'},
 'followers': {'href': None, 'total': 16475832},
 'genres': ['dance pop', 'pop', 'uk pop'],
 'href': 'https://api.spotify.com/v1/artists/6M2wZ9GZgrQXHCFfjv46we',
 'id': '6M2wZ9GZgrQXHCFfjv46we',
 'images': [{'height': 640,
   'url': 'https://i.scdn.co/image/330f9806621bc0fe67f5c06f2f1f8df53d011b4e',
   'width': 640},
  {'height': 320,
   'url': 'https://i.scdn.co/image/1d452370e002660986515bacbcaf28a9aea68d2e',
   'width': 320},
  {'height': 160,
   'url': 'https://i.scdn.co/image/44893ea0065579ff145176eab721794b7714640c',
   'width': 160}],
 'name': 'Dua Lipa',
 'popularity': 96,
 'type': 'artist',
 'uri': 'spotify:artist:6M2wZ9GZgrQXHCFfjv46we'}

In [None]:
# Call the request to show track info for each songs/tracks and get the artist ids
# artist_uri = []

# for uri in track_uri:
#     track_info = sp.track(uri)
#     artist_uri.append(track_info["artists"][0]["uri"])

In [None]:
# Call the request to get artist genres for each artist ids

# genres = []

# for uri in artist_uri:
#     artist_info = sp.artist(uri)
# #     print(artist_info)
#     genres.append(artist_info[genres])

# genres