In [48]:
import pandas as pd
import requests
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display, clear_output
import base64
import os
from dotenv import load_dotenv

In [49]:
# Configuraci√≥n de credenciales de Spotify
load_dotenv()
SPOTIFY_CLIENT_ID = os.getenv("SPOTIFY_CLIENT_ID")
SPOTIFY_CLIENT_SECRET = os.getenv("SPOTIFY_CLIENT_SECRET")

def get_spotify_token():
    url = "https://accounts.spotify.com/api/token"
    auth_string = f"{SPOTIFY_CLIENT_ID}:{SPOTIFY_CLIENT_SECRET}"
    auth_bytes = auth_string.encode("utf-8")
    auth_base64 = base64.b64encode(auth_bytes).decode("utf-8")

    headers = {
        "Authorization": f"Basic {auth_base64}",
        "Content-Type": "application/x-www-form-urlencoded",
    }
    data = {"grant_type": "client_credentials"}

    response = requests.post(url, headers=headers, data=data)
    token = response.json().get("access_token")

    if not token:
        print("Error al obtener el token de Spotify:", response.json())

    return token

In [50]:
def get_album_cover(track_name, artist):
    # Funci√≥n para obtener la portada del √°lbum de una canci√≥n usando la API de Spotify.
    token = get_spotify_token()
    if not token:
        return None

    url = "https://api.spotify.com/v1/search"
    headers = {"Authorization": f"Bearer {token}"}
    params = {"q": f"track:{track_name} artist:{artist}", "type": "track", "limit": 1}

    response = requests.get(url, headers=headers, params=params)
    
    if response.status_code != 200:
        print(f"Error al obtener datos de Spotify: {response.status_code} - {response.text}")
        return None

    data = response.json()

    if "tracks" in data and "items" in data["tracks"] and len(data["tracks"]["items"]) > 0:
        return data["tracks"]["items"][0]["album"]["images"][0]["url"], data["tracks"]["items"][0]["external_urls"]["spotify"]
    
    print(f"No se encontr√≥ portada para: {track_name} - {artist}")
    return None, None

In [51]:
# Cargar el dataset de Spotify, limpiar valores nulos y ordenar por popularidad.
df = pd.read_csv("spotify_114k_tracks.csv")
df.dropna(inplace=True)
df = df.sort_values(by="popularity", ascending=False)
df = df.groupby(["track_name", "artists"], as_index=False).first()

if 'duration_ms' in df.columns:
    df["duration_s"] = df["duration_ms"] / 1000
    df.drop(columns=["duration_ms"], inplace=True)

In [52]:
# Definir pesos por g√©nero para el c√°lculo de similitud
genre_weights = {
    "pop": {"danceability": 0.4, "energy": 0.3, "valence": 0.3},
    "rock": {"energy": 0.5, "loudness": 0.3, "acousticness": -0.2},
    "hip-hop": {"speechiness": 0.4, "energy": 0.3, "danceability": 0.3},
    "r&b": {"danceability": 0.3, "energy": 0.2, "valence": 0.3, "acousticness": 0.2},
    "electronic": {"energy": 0.5, "danceability": 0.3, "instrumentalness": 0.2},
    "metal": {"energy": 0.6, "loudness": 0.4},
    "jazz": {"acousticness": 0.5, "instrumentalness": 0.3, "energy": 0.2},
    "classical": {"acousticness": 0.7, "instrumentalness": 0.3},
    "reggae": {"danceability": 0.4, "energy": 0.3, "valence": 0.3},
    "country": {"acousticness": 0.4, "danceability": 0.3, "energy": 0.3}
}

def get_genre_weights(genre):
    # Busca el g√©nero m√°s cercano en los pesos definidos
    if not genre or not isinstance(genre, str):
        return {"danceability": 0.3, "energy": 0.3, "valence": 0.2, "tempo": 0.2}
    
    genre_lower = genre.lower()
    for g in genre_weights:
        if g in genre_lower:
            return genre_weights[g]
    return {"danceability": 0.3, "energy": 0.3, "valence": 0.2, "tempo": 0.2}

In [53]:
# Normalizaci√≥n de caracter√≠sticas relevantes
features = ["danceability", "energy", "loudness", "speechiness", 
            "acousticness", "instrumentalness", "liveness", "valence", "tempo"]

df[features] = df[features].fillna(df[features].mean())                                                                 
scaler = StandardScaler()
df_scaled = df.copy()
df_scaled[features] = scaler.fit_transform(df[features])

df["full_name"] = df["artists"] + " - " + df["track_name"]
df_scaled["full_name"] = df["full_name"]

In [54]:
# Variables globales
ultima_cancion_seleccionada = ""
ultimo_porcentaje = 100.0
canciones_mostradas = set()

In [55]:
# Genera recomendaciones musicales basadas en similitud de caracter√≠sticas
def recomendar_cancion(cancion_seleccionada, reset_similitud=True):
    global ultimo_porcentaje, canciones_mostradas, ultima_cancion_seleccionada
    
    ultima_cancion_seleccionada = cancion_seleccionada
    if reset_similitud:
        ultimo_porcentaje = 100.0
        canciones_mostradas = set()
    
    cancion = df_scaled[df_scaled["full_name"].str.lower().str.contains(cancion_seleccionada.lower(), regex=False)]
    if cancion.empty:
        with recommendations_box:
            clear_output(wait=True)
            print("Canci√≥n no encontrada en la base de datos.")
        return

    cancion = cancion.sort_values(by="popularity", ascending=False).head(1)
    genero_principal = cancion["track_genre"].values[0] if "track_genre" in cancion else "Desconocido"
    weights = get_genre_weights(genero_principal)

    df_filtrado = df_scaled[df_scaled["track_genre"] == genero_principal] if genero_principal else df_scaled.copy()
    df_filtrado = df_filtrado[
        (df_filtrado["full_name"] != cancion["full_name"].values[0]) & 
        (~df_filtrado["full_name"].isin(canciones_mostradas))
    ]

    if df_filtrado.empty:
        with recommendations_box:
            clear_output(wait=True)
            print("No hay m√°s canciones similares disponibles.")
            ultimo_porcentaje = 100.0
            canciones_mostradas = set()
        return

    weighted_features = df_filtrado[features].copy()
    for feature, weight in weights.items():
        if feature in weighted_features.columns:
            weighted_features[feature] *= weight
    
    similitudes = cosine_similarity(
        cancion[features].values.reshape(1, -1), 
        weighted_features.values
    )
    df_filtrado["similitud"] = similitudes[0]
    
    recomendaciones = df_filtrado.sort_values(
        by=["similitud", "popularity"], 
        ascending=[False, False]
    ).head(5)
    
    ultimo_porcentaje = recomendaciones["similitud"].min() * 100
    canciones_mostradas.update(recomendaciones["full_name"].tolist())

    with recommendations_box:
        clear_output(wait=True)
        print(f"\nRecomendaciones para: {cancion_seleccionada.strip()} ({genero_principal})\n")
        
        items = []
        for _, row in recomendaciones.iterrows():
            cover_url, spotify_url = get_album_cover(row["track_name"], row["artists"])
            similitud = round(row["similitud"] * 100, 2)
            
            track_name_cleaned = row['track_name'].replace("'", "").replace('"', '')
            artist_name = row['artists']

            song_info = widgets.HTML(
                f"<div style='text-align: center; font-weight: bold;'>{track_name_cleaned}</div>"
                f"<div style='text-align: center;'>{artist_name}</div>"
                f"<div style='text-align: center;'> {similitud}%</div>"
            )
            img_link = widgets.HTML(f"<a href='{spotify_url}' target='_blank'><img src='{cover_url}' width='180' height='180'></a>")
            
            items.append(widgets.VBox([img_link, song_info], layout=widgets.Layout(margin="10px")))
        
        display(widgets.HBox(items, layout=widgets.Layout(justify_content="center")))

In [56]:
# Crear widgets interactivos
search_box = widgets.Text(placeholder="Escribe artista o canci√≥n...")
results_box = widgets.Output()
recommendations_box = widgets.Output()

refresh_button = widgets.Button(
    description="üîÑ Nuevas recomendaciones", 
    button_style="info",
    layout=widgets.Layout(width="auto", margin="10px 0 10px 0")
)
# Muestra resultados de b√∫squeda mientras el usuario escribe
def actualizar_resultados(cambio):
    texto = cambio["new"].strip().lower()
    
    with results_box:
        clear_output(wait=True)
        if len(texto) > 1:
            palabras = texto.split()
            resultados = df[df["full_name"].apply(lambda x: all(palabra in x.lower() for palabra in palabras))]
            if not resultados.empty:
                botones = [widgets.Button(description=res, layout=widgets.Layout(width="100%")) 
                           for res in resultados["full_name"].head(10)]
                for btn in botones:
                    btn.on_click(lambda b, descripcion=btn.description: recomendar_cancion(descripcion))
                display(widgets.VBox(botones))
            else:
                print("No se encontraron resultados.")
        else:
            print("Por favor, ingresa m√°s de una palabra para la b√∫squeda.")

In [57]:
# Genera nuevas recomendaciones manteniendo la canci√≥n base
def refrescar_recomendaciones(_):
    if ultima_cancion_seleccionada:
        recomendar_cancion(ultima_cancion_seleccionada, reset_similitud=False)

search_box.observe(actualizar_resultados, names="value")
refresh_button.on_click(refrescar_recomendaciones)

In [58]:
# Mostrar la interfaz
display(
    widgets.VBox([
        search_box,
        results_box,
        widgets.VBox([refresh_button, recommendations_box])
    ])
)

VBox(children=(Text(value='', placeholder='Escribe artista o canci√≥n...'), Output(), VBox(children=(Button(but‚Ä¶