## Spotify - official API

In [1]:
import requests
import base64
import json
import csv
import os
from datetime import datetime

In [2]:
CLIENT_ID = "2347f12232864def931146701e2b2d8a"
CLIENT_SECRET = "a98e131897cb4c7b97d0d51564988f51"

# Example artist (Justin Bieber)
ARTIST_ID = "1uNFoZAHBGtllmzznpCI3s"

# Output folder
OUTPUT_DIR = "outputs/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

### Client Credentials Token

In [3]:
def get_spotify_token():
    auth_str = f"{CLIENT_ID}:{CLIENT_SECRET}"
    b64 = base64.b64encode(auth_str.encode()).decode()

    headers = {"Authorization": f"Basic {b64}"}
    data = {"grant_type": "client_credentials"}

    r = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
    r.raise_for_status()
    return r.json()["access_token"]

token = get_spotify_token()
print("Token acquired successfully!")

Token acquired successfully!


### API Calls- Artist 

In [4]:
def fetch_artist(artist_id, token):
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {"Authorization": f"Bearer {token}"}
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()

### Get Artist Top Tracks

In [5]:
def fetch_top_tracks(artist_id, token, market="US"):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market={market}"
    headers = {"Authorization": f"Bearer {token}"}
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json().get("tracks", [])

### Get Audio Features

In [6]:
def fetch_audio_features(track_id, token):
    url = f"https://api.spotify.com/v1/audio-features/{track_id}"
    headers = {"Authorization": f"Bearer {token}"}
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()

### Data Validation

In [8]:
def validate_artist(artist):
    required = ["id", "name", "genres", "popularity", "followers"]
    missing = [f for f in required if f not in artist]
    return missing


def validate_track(track):
    required = ["id", "name", "popularity", "duration_ms", "artists"]
    missing = [f for f in required if f not in track]
    return missing


def validate_audio_features(features):
    required = ["danceability", "energy", "valence", "tempo"]
    missing = [f for f in required if f not in features]
    return missing

### export to JSON/CSV

In [9]:
def save_json(data, filename):
    filepath = os.path.join(OUTPUT_DIR, filename)
    with open(filepath, "w") as f:
        json.dump(data, f, indent=2)
    print(f"[Saved] {filepath}")

In [18]:
def save_tracks_csv(tracks, filename):
    filepath = os.path.join(OUTPUT_DIR, filename)

    with open(filepath, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(["track_id", "name", "popularity", "duration_ms"])

        for t in tracks:
            writer.writerow([
                t["id"],
                t["name"],
                t["popularity"],
                t["duration_ms"],
            ])

    print(f"[Saved] {filepath}")

### Main RUn

In [19]:
def run_spotify_data_pull():
    timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
    token = get_spotify_token()

    # Fetch artist
    artist = fetch_artist(ARTIST_ID, token)
    missing = validate_artist(artist)
    if missing:
        print(f"[Warning] Missing artist fields: {missing}")

    save_json(artist, f"spotify_artist_{timestamp}.json")

    # Fetch top tracks
    tracks = fetch_top_tracks(ARTIST_ID, token)
    cleaned_tracks = []

    for t in tracks:
        missing = validate_track(t)
        if missing:
            print(f"[Warning] Missing track fields: {missing} in track {t.get('id')}")

        cleaned_tracks.append(t)

    save_json(cleaned_tracks, f"spotify_tracks_{timestamp}.json")
    save_tracks_csv(cleaned_tracks, f"spotify_tracks_{timestamp}.csv")

    # Fetch audio features for each track
    audio_features_all = {}
    for t in cleaned_tracks:
        features = fetch_audio_features(t["id"], token)
        missing = validate_audio_features(features)
        if missing:
            print(f"[Warning] Missing audio features: {missing} for {t['id']}")

        audio_features_all[t["id"]] = features

    save_json(audio_features_all, f"spotify_audio_features_{timestamp}.json")

    print("\nðŸŽ‰ Spotify Data Pull Completed!\n")

In [20]:
### Run script

In [21]:
if __name__ == "__main__":
    run_spotify_data_pull()

  timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")


[Saved] outputs/spotify_artist_20251113_043306.json
[Saved] outputs/spotify_tracks_20251113_043306.json
[Saved] outputs/spotify_tracks_20251113_043306.csv


HTTPError: 403 Client Error: Forbidden for url: https://api.spotify.com/v1/audio-features/5BZsQlgw21vDOAjoqkNgKb