## Spotify - official API

In [1]:
import requests
import base64
import json
import csv
import os
from datetime import datetime

In [2]:
CLIENT_ID = "2347f12232864def931146701e2b2d8a"
CLIENT_SECRET = "a98e131897cb4c7b97d0d51564988f51"

### Client Credentials Token

In [3]:
def get_spotify_token():
    auth_str = f"{CLIENT_ID}:{CLIENT_SECRET}"
    b64_auth = base64.b64encode(auth_str.encode()).decode()

    headers = {"Authorization": f"Basic {b64_auth}"}
    data = {"grant_type": "client_credentials"}

    r = requests.post("https://accounts.spotify.com/api/token", headers=headers, data=data)
    r.raise_for_status()
    return r.json()["access_token"]

### API Calls- Artist 

In [4]:
def fetch_artist(artist_id, token):
    url = f"https://api.spotify.com/v1/artists/{artist_id}"
    headers = {"Authorization": f"Bearer {token}"}
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()

### Get Artist Top Tracks

In [5]:
def fetch_artist_top_tracks(artist_id, token, market="US"):
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market={market}"
    headers = {"Authorization": f"Bearer {token}"}

    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()["tracks"]

### Get Track Information(name, popularity, album_id)

In [6]:
def fetch_track(track_id, token):
    url = f"https://api.spotify.com/v1/tracks/{track_id}"
    headers = {"Authorization": f"Bearer {token}"}

    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()

### Get Album Info (album_name, release_date)

In [7]:
def fetch_album(album_id, token):
    url = f"https://api.spotify.com/v1/albums/{album_id}"
    headers = {"Authorization": f"Bearer {token}"}

    r = requests.get(url, headers=headers)
    r.raise_for_status()
    return r.json()

### Pull out per us_daily_chart

In [12]:
import pandas as pd

def run_spotify_ingestion(chart_csv_path):
    token = get_spotify_token()
    df_chart = pd.read_csv(chart_csv_path)

    artist_rows = []
    track_rows = []
    album_rows = []

    print("Total artists:", df_chart['artist_id'].nunique())
    print("Total tracks:", df_chart['track_id'].nunique())

    # ÂéªÈáç
    unique_artists = df_chart[['artist_id', 'artist_name']].drop_duplicates()
    unique_tracks = df_chart[['track_id', 'track_name']].drop_duplicates()

    # ---- Êãâ Artist ‰ø°ÊÅØ ----
    for _, row in unique_artists.iterrows():
        artist_id = row["artist_id"]
        artist_name = row["artist_name"]

        try:
            d = fetch_artist(artist_id, token)
            d_top = fetch_artist_top_tracks(artist_id, token)

            artist_rows.append({
                "artist_id": artist_id,
                "artist_name": d["name"],
                "popularity": d["popularity"],
                "followers": d["followers"]["total"],
                "genres": ", ".join(d["genres"]),
                "top_tracks": [t["id"] for t in d_top][:10]
            })
        except Exception as e:
            print(f"Artist error {artist_name}: {e}")

    # ---- Êãâ Track ‰ø°ÊÅØ ----
    for _, row in unique_tracks.iterrows():
        track_id = row["track_id"]
        track_name = row["track_name"]

        try:
            d = fetch_track(track_id, token)
            track_rows.append({
                "track_id": d["id"],
                "track_name": d["name"],
                "popularity": d["popularity"],
                "album_id": d["album"]["id"],
            })
        except Exception as e:
            print(f"Track error {track_name}: {e}")

    # ---- Êãâ Album ‰ø°ÊÅØ ----
    unique_album_ids = pd.DataFrame(track_rows)['album_id'].dropna().unique()

    for album_id in unique_album_ids:
        try:
            d = fetch_album(album_id, token)
            album_rows.append({
                "album_id": d["id"],
                "album_name": d["name"],
                "release_date": d["release_date"],
                "total_tracks": d["total_tracks"],
            })
        except Exception as e:
            print(f"Album error {album_id}: {e}")

    # ---- Save ----
    today = datetime.now().strftime("%Y%m%d")

    df_artist = pd.DataFrame(artist_rows)
    df_track = pd.DataFrame(track_rows)
    df_album = pd.DataFrame(album_rows)

    df_artist.to_csv(f"outputs_spotify/spotify_artists_{today}.csv", index=False)
    df_track.to_csv(f"outputs_spotify/spotify_tracks_{today}.csv", index=False)
    df_album.to_csv(f"outputs_spotify/spotify_albums_{today}.csv", index=False)

    df_artist.to_json(f"outputs_spotify/spotify_artists_{today}.json", orient="records", indent=2)
    df_track.to_json(f"outputs_spotify/spotify_tracks_{today}.json", orient="records", indent=2)
    df_album.to_json(f"outputs_spotify/spotify_albums_{today}.json", orient="records", indent=2)

    print("üéâ DONE! All Spotify API data saved!")


# ===== RUN =====
run_spotify_ingestion(f"outputs_kworb/spotify_us_daily_chart_with_ids_20251120.csv")

Total artists: 115
Total tracks: 200
üéâ DONE! All Spotify API data saved!
