In [1]:
import os
from dotenv import load_dotenv
load_dotenv()   # loads .env if present in working dir

import time
import math
import json
import joblib
from typing import List, Dict

import pandas as pd
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Cell 1 - authenticate
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())
# quick test
print("Auth OK? try new releases count:")
print(len(sp.new_releases(limit=10)['albums']['items']))


Auth OK? try new releases count:
10


In [3]:
# Cell 2 - fetch new releases (albums -> album tracks -> track metadata)
def fetch_new_release_track_ids(sp, country='US', limit_albums=50):
    albums = sp.new_releases(limit=limit_albums, country=country)['albums']['items']
    track_meta = []
    for album in albums:
        album_id = album['id']
        album_name = album['name']
        album_artists = [a['name'] for a in album['artists']]
        # fetch all tracks for album (pagination safe)
        offset = 0
        while True:
            res = sp.album_tracks(album_id, limit=50, offset=offset)
            items = res['items']
            if not items:
                break
            for t in items:
                track_meta.append({
                    'track_id': t['id'],
                    'track_name': t['name'],
                    'artists': [a['name'] for a in t['artists']],
                    'album_id': album_id,
                    'album_name': album_name,
                    'album_artists': album_artists
                })
            offset += len(items)
            if len(items) < 50:
                break
    # drop None ids (sometimes a track id can be None)
    track_meta = [t for t in track_meta if t['track_id'] is not None]
    return pd.DataFrame(track_meta).drop_duplicates(subset='track_id').reset_index(drop=True)

tracks_df = fetch_new_release_track_ids(sp, country='US', limit_albums=50)
print("tracks:", len(tracks_df))
tracks_df.head()


tracks: 358


Unnamed: 0,track_id,track_name,artists,album_id,album_name,album_artists
0,2nnC82ruXhKSZFfcwhKlsm,Big Fish,[Adekunle Gold],1CwSEtADXMks7OtJKKZh7J,Fuji,[Adekunle Gold]
1,4i3HyTc6HKCn2TgOHlzq9L,Don Corleone,[Adekunle Gold],1CwSEtADXMks7OtJKKZh7J,Fuji,[Adekunle Gold]
2,2biE57GQL3Vesuy8qCNj1z,Bobo,"[Adekunle Gold, Lojay, Shoday]",1CwSEtADXMks7OtJKKZh7J,Fuji,[Adekunle Gold]
3,2DFGCzrgvTNgXl9lelEpnS,Coco Money,[Adekunle Gold],1CwSEtADXMks7OtJKKZh7J,Fuji,[Adekunle Gold]
4,2RKX3sJgRcOs1iCpyX0Iv4,Believe,[Adekunle Gold],1CwSEtADXMks7OtJKKZh7J,Fuji,[Adekunle Gold]


In [4]:
def fetch_audio_features_for_tracks(sp, track_ids, batch_size=50):
    import time
    all_features = []

    for i in range(0, len(track_ids), batch_size):
        batch = track_ids[i:i + batch_size]

        print(f"Batch {i//batch_size + 1}: {len(batch)} tracks")

        try:
            ids_param = ",".join(batch)
            data = sp._session.get(
                f"https://api.spotify.com/v1/audio-features?ids={ids_param}",
                headers={"Authorization": f"Bearer {sp.auth_manager.get_access_token(as_dict=False)}"}
            )

            if data.status_code != 200:
                print("GET failed:", data.status_code, data.text)
                data.raise_for_status()

            batch_features = data.json().get("audio_features", [])

            all_features.extend([f for f in batch_features if f is not None])

        except Exception as e:
            print("Error in batch:", batch)
            raise e

        time.sleep(0.1)  # avoid rate limits

    return all_features
