In [6]:
import os, tomllib
with open("../.streamlit/secrets.toml", "rb") as f:
    secrets = tomllib.load(f)
os.environ["SPOTIPY_CLIENT_ID"] = secrets['clientId']
os.environ["SPOTIPY_CLIENT_SECRET"] = secrets['clientSecret']
os.environ["SPOTIPY_REDIRECT_URI"] = "https://open.spotify.com/"

In [10]:
SCOPE = "playlist-read-private user-library-read"
import spotipy
from spotipy.oauth2 import SpotifyOAuth

def auth_spotify() -> spotipy.Spotify:
    """OAuth dance using the three env vars declared above."""
    return spotipy.Spotify(
        auth_manager=SpotifyOAuth(
            scope=SCOPE,
            client_id=os.getenv("SPOTIPY_CLIENT_ID"),
            client_secret=os.getenv("SPOTIPY_CLIENT_SECRET"),
            redirect_uri=os.getenv("SPOTIPY_REDIRECT_URI"),
        )
    )

sp = auth_spotify()

In [8]:
from typing import List

def fetch_playlist_tracks(sp: spotipy.Spotify, playlist_id: str) -> List[str]:
    """Return every track ID inside a playlist (handles pagination)."""
    track_ids = []
    results = sp.playlist_items(playlist_id, additional_types=["track"], limit=100)
    track_ids.extend([
        item["track"]["id"]
        for item in results["items"]
        if item.get("track") and item["track"].get("id")
    ])
    while results["next"]:
        results = sp.next(results)
        track_ids.extend([
            item["track"]["id"]
            for item in results["items"]
            if item.get("track") and item["track"].get("id")
        ])
    return track_ids

In [None]:
playlist_id = "37i9dQZF1DXcBWIGoYBM5M" # Today's Top Hits
track_ids = fetch_playlist_tracks(sp, playlist_id)

In [17]:
# Or if you want to use your own data
import pandas as pd
music = pd.read_csv('../data/streaming_history.csv',index_col=0)
track_ids = music.id.to_list()

In [35]:
from sklearn.preprocessing import StandardScaler
def fetch_audio_features(sp: spotipy.Spotify, track_ids: List[str]) -> pd.DataFrame:
    """Download & normalise audio‑feature vectors for each track ID."""
    feats = []
    for i in range(0, len(track_ids), 50):  # Spotify max batch size = 100
        feats.extend(sp.audio_features(track_ids[i : i + 50]))
    df = pd.DataFrame(feats).set_index("id")
    df = df[['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'speechiness', 'valence']]
    # numeric_cols = df.select_dtypes("number").columns
    # scaler = StandardScaler()
    # df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    return df

df = fetch_audio_features(sp,track_ids)
df

Unnamed: 0_level_0,acousticness,danceability,energy,instrumentalness,liveness,speechiness,valence
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
52vA3CYKZqZVdQnzRrdZt6,0.88700,0.389,0.396,0.000000,0.0828,0.0332,0.585
5MtP6QGoOkA1L5ynZyQEiv,0.70000,0.608,0.322,0.002980,0.0724,0.0431,0.274
0VjIjW4GlUZAMYd2vXMi3b,0.00143,0.513,0.730,0.000095,0.0897,0.0598,0.334
2dR5WkrpwylTuT3jRWNufa,0.52500,0.670,0.365,0.000000,0.0575,0.0566,0.450
4IsHMzDbRE8q5Z4ALsQj3o,0.60000,0.791,0.329,0.863000,0.1930,0.0592,0.526
...,...,...,...,...,...,...,...
6EZiB3Cb8WI4GNADA5y9cF,0.58500,0.539,0.435,0.026100,0.1070,0.0845,0.228
7tFiyTwD0nx5a1eklYtX2J,0.28900,0.391,0.402,0.000000,0.2430,0.0539,0.228
40riOy7x9W7GXjyGp4pjAv,0.00574,0.579,0.508,0.000494,0.0575,0.0270,0.609
1zB4vmk8tFRmM9UULNzbLB,0.00672,0.604,0.822,0.134000,0.1470,0.0438,0.288


In [36]:
from sklearn.neighbors import NearestNeighbors

def train_knn(df: pd.DataFrame, n_neighbors: int = 10) -> NearestNeighbors:
    model = NearestNeighbors(n_neighbors=n_neighbors, metric="cosine")
    model.fit(df.values)
    return model

k = 5
model = train_knn(df, n_neighbors=max(10, k + 1))

In [37]:
def recommend(
    seed_id: str, df: pd.DataFrame, model: NearestNeighbors, k: int = 5
) -> List[str]:
    """Return K nearest track IDs to the seed (excluding itself)."""
    try:
        idx = df.index.get_loc(seed_id)
    except KeyError:
        raise ValueError("Seed track not found in dataframe – is it in the playlist?")
    dists, indices = model.kneighbors([df.iloc[idx].values], n_neighbors=k + 1)
    idxs = indices.flatten()[1:]  # drop the seed itself
    return df.index[idxs].tolist()


In [38]:
music.loc[0]

name                                    The Times They Are A-Changin'
danceability                                                    0.389
energy                                                          0.396
key                                                                 7
loudness                                                       -7.999
mode                                                                1
speechiness                                                    0.0332
acousticness                                                    0.887
instrumentalness                                                  0.0
liveness                                                       0.0828
valence                                                         0.585
tempo                                                          171.86
type                                                   audio_features
id                                             52vA3CYKZqZVdQnzRrdZt6
uri                 

In [40]:
reco = recommend(music.id[0], df, model, k=5)
for r in reco:
    print(music.loc[music.id == r].name.values[0])

La bohème
Angie
Take Five
The Wind
A Horse with No Name


In [43]:
import webbrowser
def open_spotify_track(track_id: str) -> None:
    """Open a Spotify track in the browser."""
    url = f"https://open.spotify.com/track/{track_id}"
    webbrowser.open(url)
open_spotify_track(reco[0])