In [11]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import os
from dotenv import load_dotenv
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt

load_dotenv("./.env")

client_id = os.environ.get("SPOTIFY_CLIENT_ID")
client_secret = os.environ.get("SPOTIFY_CLIENT_SECRET")

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id, client_secret), requests_timeout=45)

In [12]:
def getClusterIds(df):
    # Normalize the data
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(df[["danceability", "energy", "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo"]])
    # df_normalized = pd.DataFrame(X_normalized, columns=["danceability", "energy",  "loudness", "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo"])

    # Perform PCA
    pca = PCA(n_components=9)
    pca.fit(X_normalized)
    X_pca = pca.transform(X_normalized)

    # Predict clusters using KMeans
    kmeans = KMeans(n_clusters=50, n_init=10) # TODO: Find optimal number of clusters using an elbow graph
    kmeans.fit(X_pca)
    cluster_ids = kmeans.predict(X_pca)
    return cluster_ids

In [13]:
import pandas as pd

df2 = pd.read_csv('features_clustered.csv')

# Final UI:
# - Input: song name
inpt = input("Enter song name: ")
# - Search for song in Spotify
results = sp.search(q=inpt, limit=1)
# - Get song id
song_id = results["tracks"]["items"][0]["id"]
# - Get song features
inpt_features = sp.audio_features(song_id)
inpt_features

# # - Get cluster id
df2.loc[0] = inpt_features[0]
inpt_cluster = getClusterIds(df2)[0]
# - Get songs in cluster
songs_in_cluster = df2[df2["cluster"] == inpt_cluster]["id"].values.tolist()
song_names = sp.tracks(songs_in_cluster[0:10])["tracks"]
song_names_legible = [i["name"] + ' - ' + i["artists"][0]["name"] for i in song_names]
song_names_legible

['Andalusia - Hammock',
 'Tuistos Herz - Burzum',
 'Something Heavens - H.U.V.A. Network',
 'Slava Satan - Dark Funeral',
 'Glory Box - Portishead',
 'Dernhelm in Battle - Howard Shore',
 'Terminator - Main Title - Best Movie Soundtracks',
 'Galaxia - Ferry Corsten',
 'Tower Seven - Thievery Corporation',
 'Swords Crossed - Klaus Badelt']

In [14]:
# - Display input song
from IPython.display import IFrame
IFrame(src='https://open.spotify.com/embed/track/' + song_id, width=300, height=380)

In [15]:
# - Display first song in cluster
IFrame(src='https://open.spotify.com/embed/track/' + songs_in_cluster[0], width=300, height=380)