In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import openai
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import ast

In [2]:
client_id = '75d0ab19dcdc4db7821a27bf07df72a0'  # Replace with your Spotify client ID
client_secret = 'f64897e446834d7cb83b1c90916242df'  # Replace with your Spotify client secret
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

# Function to extract song names from Spotify URLs
def get_song_names_from_url(song_uris):
    song_names = []
    for uri in song_uris:
        try:
            track_id = uri.split(":")[-1]  # Extract the track ID from the URI
            track_info = sp.track(track_id)  # Get track information
            song_name = track_info['name']  # Extract song name
            artist_name = track_info['artists'][0]['name']  # Extract artist name
            song_names.append(f"{song_name} by {artist_name}")
        except Exception as e:
            song_names.append(f"Error retrieving song for URI: {uri} ({e})")
    return song_names

In [3]:
with open("api_key.txt", "r") as f:
    key = f.readline().strip()

from openai import OpenAI
client = OpenAI(api_key=key)

In [4]:
data = pd.read_csv('../../datasets/seven_day_listening_history.csv')

In [5]:
def get_prompt_features(prompt):
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-3.5-turbo",  # Specify the model
            messages=[
                {
                    "role": "system",
                    "content": "You are a music expert.",
                },
                {
                    "role": "user",
                    "content": f"Given the playlist prompt: '{prompt}', assign values between 0 and 1 for these features: acousticness, danceability, energy, instrumentalness, liveness, loudness, speechiness, valence, and tempo. Respond with only a list of values in Python list format.",
                }
            ],
            temperature=0.4
        )

        # Extract the content from the response
        response_message = chat_completion.choices[0].message.content.strip()
        print("Response from LLM:", response_message)  # Debugging step

        # Directly evaluate the list (since the LLM is instructed to return a Python-style list)
        feature_scores = eval(response_message)
        return feature_scores
    except Exception as e:
        print(f"Error fetching prompt features: {e}")
        return None

In [17]:
features = [
    "acousticness", "danceability", "energy", "instrumentalness",
    "liveness", "valence", "tempo", "loudness", "speechiness"
]
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])
song_vectors = data[features].values
normalized_vectors = song_vectors / np.linalg.norm(song_vectors, axis=1, keepdims=True)
unique_indices = list({tuple(vec): i for i, vec in enumerate(normalized_vectors)}.values())
data = data.iloc[unique_indices].reset_index(drop=True)

# Compute Euclidean distances
def compute_distance(song, prompt_features):
    song_features = song[features].values
    return np.linalg.norm(song_features - np.array(prompt_features))

# Main logic to generate a playlist
def generate_playlists(prompt, user_ids=None, num_songs=5):
    # Fetch features for the prompt
    prompt_features = get_prompt_features(prompt)
    if prompt_features is None:
        print("Failed to fetch prompt features.")
        return

    if user_ids is None:
        user_ids = data["user_id"].unique()
    else:
        user_ids = [uid for uid in user_ids if uid in data["user_id"].unique()]

    all_playlists = {}  # To store playlists for each user

    for user_id in user_ids:
        # Filter data for the user
        user_data = data[data["user_id"] == user_id].copy()

        # Calculate distances
        user_data.loc[:, "distance"] = user_data.apply(lambda row: compute_distance(row, prompt_features), axis=1)

        recommended_songs = user_data.sort_values(by="distance").head(num_songs)
        recommended_uris = recommended_songs["uri"].tolist()

        song_names = get_song_names_from_url(recommended_uris)

        # Store the playlist for the user
        all_playlists[user_id] = song_names

        # print(f"\nGenerated Playlist for User {user_id}:")
        # for song in song_names:
        #     print(song)

    return all_playlists

prompt = "dance songs to play at a Friday night disco party"
all_playlists = generate_playlists(prompt, num_songs=20)

Response from LLM: [0.2, 0.8, 0.9, 0.1, 0.5, 0.85, 0.1, 0.8, 0.9]


In [18]:
import json
def save_playlists_to_json(playlists, filename="playlists.json"):
    playlists_str_keys = {str(key): value for key, value in playlists.items()}
    with open(filename, "w") as f:
        json.dump(playlists_str_keys, f, indent=4)
    print(f"Playlists saved to {filename}")

save_playlists_to_json(all_playlists, filename="user_playlists.json")

Playlists saved to user_playlists.json
