In [1]:
import json
import pathlib

import os

import numpy as np
import pandas as pd
import requests

os.chdir("/home/paolo/git/spotify-playlist-generator")

import torch
from src.utils.config import config as project_cfg
from src.db.queries.embeddings import get_closest_embedding, insert_embeddings
from src.db.schemas.song_embedding import SongEmbedding
from src.db.tables.embeddings import SongEmbedding as SongEmbeddingSQL
from src.model.inference.inference import get_song_embedding


* 'orm_mode' has been renamed to 'from_attributes'


## Database setup

In [2]:
from sqlalchemy import create_engine
import sqlalchemy

config = {
    'user': 'spotify_playlist_generator',
    'password': 'spotify_playlist_generator',
    'host': 'localhost',
    'port': '5432',
    'database': 'spotify_playlist_generator',
}

DATABASE_URL: str = f"postgresql+psycopg2://{config['user']}:{config['password']}@{config['host']}:5432/{config['database']}"

# Create a SQLAlchemy engine and session
engine: sqlalchemy.engine.Engine = create_engine(DATABASE_URL, pool_pre_ping=True)

## Load data

In [3]:
from typing import Generator

paths: Generator = pathlib.Path("/home/paolo/git/spotify-playlist-generator/data/raw/songs").rglob("*.mp3")
tracks: list[dict[str, str]] = [{"song_id": x.stem, "audio_path": x} for x in paths]

## Generate embedding and save them to the db

In [4]:
# from tqdm import tqdm
# 
# embeddings: list[SongEmbedding] = []
# for track in tqdm(tracks):
#     
#     embeddings.append(get_song_embedding(track))
#     
# # embeddings: list[SongEmbedding] = [get_song_embedding(track) for track in tracks]

In [5]:
from sqlalchemy.orm import Session
# with Session(engine) as session:
#     insert_embeddings(session, embeddings)

## Get closest embeddings

In [6]:
from src.db.queries.embeddings import get_embeddings

with Session(engine) as session:
    # closest_embeddings: list[SongEmbeddingSQL] = get_closest_embedding(session, embeddings, project_cfg["model"]["k"])
    closest_embeddings = get_embeddings(session)


In [7]:
import numpy as np
tmp = [SongEmbedding(id=song_embedding.id, embedding=song_embedding.embedding.tolist()) for song_embedding in closest_embeddings]
X = np.array([x.embedding for x in tmp])
X.shape

(1899, 128)

In [8]:
# from sklearn.cluster import DBSCAN
# 
# for min_samples in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50, 100]:
#     for eps in [0.001, 0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6]: #, 0.7, 0.8, 0.9, 1., 1.5, 2., 2.5, 3, 3.5, 4., 4.5, 5.]:
#         clustering_model: DBSCAN = DBSCAN(
#             eps=eps, 
#             min_samples=min_samples, 
#             metric='manhattan', 
#             metric_params=None, 
#             algorithm='auto', 
#             leaf_size=30, 
#             p=None, 
#             n_jobs=-1
#         )
#         results = clustering_model.fit_predict(X)
#         num_clusters = np.unique(results).shape[0]
#         if num_clusters > 1:
#             print(f"Epsilon: {eps} - Minimum samples: {min_samples} - num clusters: {len(np.unique(results))}")
#         # break

Epsilon: 0.001 - Minimum samples: 1 - num clusters: 1766
Epsilon: 0.01 - Minimum samples: 1 - num clusters: 1766
Epsilon: 0.1 - Minimum samples: 1 - num clusters: 568
Epsilon: 0.2 - Minimum samples: 1 - num clusters: 10
Epsilon: 0.3 - Minimum samples: 1 - num clusters: 3
Epsilon: 0.001 - Minimum samples: 2 - num clusters: 127
Epsilon: 0.01 - Minimum samples: 2 - num clusters: 127
Epsilon: 0.1 - Minimum samples: 2 - num clusters: 47
Epsilon: 0.2 - Minimum samples: 2 - num clusters: 5
Epsilon: 0.3 - Minimum samples: 2 - num clusters: 2
Epsilon: 0.001 - Minimum samples: 3 - num clusters: 6
Epsilon: 0.01 - Minimum samples: 3 - num clusters: 6
Epsilon: 0.1 - Minimum samples: 3 - num clusters: 5
Epsilon: 0.2 - Minimum samples: 3 - num clusters: 3
Epsilon: 0.3 - Minimum samples: 3 - num clusters: 2
Epsilon: 0.001 - Minimum samples: 4 - num clusters: 3
Epsilon: 0.01 - Minimum samples: 4 - num clusters: 3
Epsilon: 0.1 - Minimum samples: 4 - num clusters: 3
Epsilon: 0.2 - Minimum samples: 4 - nu

In [9]:
clustering_model: DBSCAN = DBSCAN(eps=0.2, min_samples=2, metric='cosine', metric_params=None, algorithm='auto', leaf_size=30, p=None, n_jobs=-1)
results = clustering_model.fit_predict(X)

In [10]:
df = pd.DataFrame(zip(results, np.array([x.id for x in tmp])), columns=["cluster", "id"])
df["spotify_uri"] = f"spotify:track:" + df['id']
df = df[df["cluster"] != -1]
df

Unnamed: 0,cluster,id,spotify_uri
0,0,54KFQB6N4pn926IUUYZGzK,spotify:track:54KFQB6N4pn926IUUYZGzK
1,0,6LtPIXlIzPOTF8vTecYjRe,spotify:track:6LtPIXlIzPOTF8vTecYjRe
2,0,1BLfQ6dPXmuDrFmbdfW7Jl,spotify:track:1BLfQ6dPXmuDrFmbdfW7Jl
3,0,1Rz8BPFzWYOItgmZxb7ZJY,spotify:track:1Rz8BPFzWYOItgmZxb7ZJY
4,0,4E6cwWJWZw2zWf7VFbH7wf,spotify:track:4E6cwWJWZw2zWf7VFbH7wf
...,...,...,...
1893,0,5ByAIlEEnxYdvpnezg7HTX,spotify:track:5ByAIlEEnxYdvpnezg7HTX
1894,0,0trHOzAhNpGCsGBEu7dOJo,spotify:track:0trHOzAhNpGCsGBEu7dOJo
1895,0,7G3lxTsMfSx4yarMkfgnTC,spotify:track:7G3lxTsMfSx4yarMkfgnTC
1896,0,7nYvUtkQMx1v80S2FH2s9J,spotify:track:7nYvUtkQMx1v80S2FH2s9J


In [11]:
df.value_counts("cluster")

cluster
0     1214
20       4
17       4
25       4
35       2
27       2
28       2
29       2
30       2
31       2
32       2
33       2
34       2
36       2
37       2
38       2
39       2
40       2
41       2
42       2
43       2
44       2
45       2
46       2
26       2
24       2
1        2
23       2
2        2
3        2
4        2
5        2
6        2
7        2
8        2
9        2
10       2
11       2
12       2
13       2
14       2
15       2
16       2
18       2
19       2
21       2
22       2
47       2
Name: count, dtype: int64

In [14]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth

os.environ["SPOTIPY_CLIENT_ID"] = "bf621646332d4c9c82c6e6d1fd8a8352"
os.environ["SPOTIPY_CLIENT_SECRET"] = "0ecda4e3308e4340a26b519d0647b2bf"
os.environ["SPOTIPY_REDIRECT_URI"] = "http://localhost/callback/"

sp = spotipy.Spotify(
    auth_manager=SpotifyOAuth(
        scope="playlist-modify-private playlist-read-private",
        redirect_uri="http://localhost/callback/",
        client_id=os.getenv("SPOTIFY_CLIENT_ID"),
        client_secret=os.getenv("SPOTIFY_CLIENT_SECRET"),
        show_dialog=False,
        cache_path="token.txt",
    )
)
user = sp.current_user()

User authentication requires interaction with your web browser. Once you enter your credentials and give authorization, you will be redirected to a url.  Paste that url you were directed to to complete the authorization.
Using `localhost` as redirect URI without a port. Specify a port (e.g. `localhost:8080`) to allow automatic retrieval of authentication code instead of having to copy and paste the URL your browser is redirected to.
Opened https://accounts.spotify.com/authorize?client_id=bf621646332d4c9c82c6e6d1fd8a8352&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%2Fcallback%2F&scope=playlist-modify-private+playlist-read-private in your browser
Opening in existing browser session.


In [13]:
max_songs_per_request: int = 100
for cluster in df.cluster.unique():
    songs_uri: list[str] = df[df.cluster == cluster].spotify_uri.tolist()
    playlist = sp.user_playlist_create(user=user.get("id"),name=f"Playlist {cluster}", public=False, description=f"Playlist using the songs in the cluster {cluster}")

    if len(songs_uri) < max_songs_per_request:
        sp.playlist_add_items(playlist.get("id"), items=songs_uri)
        continue
        
    num_chunks: int = len(songs_uri) // max_songs_per_request + 1 
    for i in range(num_chunks):
        sp.playlist_add_items(playlist.get("id"), items=songs_uri[i*max_songs_per_request: (i+1)*max_songs_per_request])


User authentication requires interaction with your web browser. Once you enter your credentials and give authorization, you will be redirected to a url.  Paste that url you were directed to to complete the authorization.
Using `localhost` as redirect URI without a port. Specify a port (e.g. `localhost:8080`) to allow automatic retrieval of authentication code instead of having to copy and paste the URL your browser is redirected to.
Opened https://accounts.spotify.com/authorize?client_id=bf621646332d4c9c82c6e6d1fd8a8352&response_type=code&redirect_uri=http%3A%2F%2Flocalhost%2Fcallback%2F&scope=playlist-modify-private+playlist-read-private in your browser
Opening in existing browser session.


SpotifyOauthError: error: invalid_request, error_description: code must be supplied

{'href': 'https://api.spotify.com/v1/users/paolo.s16/playlists?offset=0&limit=50',
 'items': [{'collaborative': False,
   'description': '',
   'external_urls': {'spotify': 'https://open.spotify.com/playlist/7MkjiFngObIiQ3xenYWRLq'},
   'href': 'https://api.spotify.com/v1/playlists/7MkjiFngObIiQ3xenYWRLq',
   'id': '7MkjiFngObIiQ3xenYWRLq',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab67616d0000b273c08d5fa5c0f1a834acef5100',
     'width': 640}],
   'name': 'Playlist 42',
   'owner': {'display_name': 'paolo_sofia',
    'external_urls': {'spotify': 'https://open.spotify.com/user/paolo.s16'},
    'href': 'https://api.spotify.com/v1/users/paolo.s16',
    'id': 'paolo.s16',
    'type': 'user',
    'uri': 'spotify:user:paolo.s16'},
   'primary_color': None,
   'public': False,
   'snapshot_id': 'MixmNDllMTM1YjhkNjFmNWZkYmEzZDg5NDkzM2E4MGI4ZTBhMGQ3MGZk',
   'tracks': {'href': 'https://api.spotify.com/v1/playlists/7MkjiFngObIiQ3xenYWRLq/tracks',
    'total': 2},
   'typ

In [16]:
playlists = sp.user_playlists(user=user.get("id"))
playlists

{'href': 'https://api.spotify.com/v1/users/paolo.s16/playlists?offset=0&limit=50',
 'items': [{'collaborative': False,
   'description': 'Playlist using the songs in the cluster 42',
   'external_urls': {'spotify': 'https://open.spotify.com/playlist/7MkjiFngObIiQ3xenYWRLq'},
   'href': 'https://api.spotify.com/v1/playlists/7MkjiFngObIiQ3xenYWRLq',
   'id': '7MkjiFngObIiQ3xenYWRLq',
   'images': [{'height': 640,
     'url': 'https://i.scdn.co/image/ab67616d0000b273c08d5fa5c0f1a834acef5100',
     'width': 640}],
   'name': 'Playlist 42',
   'owner': {'display_name': 'paolo_sofia',
    'external_urls': {'spotify': 'https://open.spotify.com/user/paolo.s16'},
    'href': 'https://api.spotify.com/v1/users/paolo.s16',
    'id': 'paolo.s16',
    'type': 'user',
    'uri': 'spotify:user:paolo.s16'},
   'primary_color': None,
   'public': False,
   'snapshot_id': 'NCw5ZThlMWE1ZjgwY2MzOTAyZGNjZDY4MjA4Yzg5MGI4MjE3MzVjZDg2',
   'tracks': {'href': 'https://api.spotify.com/v1/playlists/7MkjiFngObIiQ3

In [23]:
for plist in playlists.get("items"):
    if "cluster" in plist.get("description"):
        print("deleting playlist", plist.get("id"))
        sp.current_user_unfollow_playlist(plist.get("id"))
    

deleting playlist 7MkjiFngObIiQ3xenYWRLq
deleting playlist 0hTVvBgSfMIflNchZXDvrg
deleting playlist 3NosKXlOE1YszMn1SlcKIC
deleting playlist 18doYEUy8d08b6hliR2tFL
deleting playlist 0dfGnFG2Lg8HOrvOinXeFZ
deleting playlist 360LJceO5oBgDBn40TlXRv
deleting playlist 6Xk1FQmJwo8imlx8NdqaEr
deleting playlist 7a9Rd1VWVLls4C88fI3VGK
deleting playlist 0BVKVMCYbdC4dej71eTV57
deleting playlist 78sm2Vw7Qx9YIWD5MeyJIk
deleting playlist 3vXRRTkM6akx79kxJ9N1Rk
deleting playlist 2jVx2wP1cQbQGe6qyBhx4p
deleting playlist 0O4ooVUsHPtIyGSDjeWkBK
deleting playlist 0rdl0SEYPOCpHxECILVwSS
deleting playlist 0RsvGOZVKvZSNoLfvjorQ2
deleting playlist 7qbpvQ7abHCVCE2ik0eLBC
deleting playlist 6Se3AqnSm9C6C66fAFNZ4z
deleting playlist 4ywjMvpAMcnNGaWZrgezbg
deleting playlist 630VJ8E0eo8gs2AlS9XUrh
deleting playlist 2BROwMmNwg7t9ls6JwRRD0
deleting playlist 4V6f5yOkNWUhTlBDQr7bpL
deleting playlist 2QNSmSrZdTJ1Wlvr6DKHpR
deleting playlist 5XnJQlw059VqK4nBTdssfi
deleting playlist 2BPnzlWhCcYelIUFEkPmxU
deleting playlis