In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
import pickle

def predict_cluster_and_similar_songs(new_song_features, 
                                     clustered_data_path='clustering_results.csv',
                                     scaler_model=None, 
                                     kmeans_model=None, 
                                     top_n=5):
    """
    Predecir cluster de nueva canción y encontrar similares
    
    Args:
        new_song_features: dict con características de la nueva canción
        clustered_data_path: path al CSV con resultados de clustering
        scaler_model: modelo StandardScaler entrenado
        kmeans_model: modelo KMeans entrenado
        top_n: número de canciones similares a retornar
    """
    
    # Cargar datos clusterizados
    df_clustered = pd.read_csv(clustered_data_path)
    
    # Características para clustering
    feature_cols = ['danceability', 'energy', 'key', 'loudness', 'mode',
                   'speechiness', 'acousticness', 'instrumentalness', 
                   'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
    
    # Preparar características de la nueva canción
    new_features = np.array([[new_song_features[col] for col in feature_cols]])
    
    # Normalizar
    normalized_features = scaler_model.transform(new_features)
    
    # Predecir cluster
    predicted_cluster = kmeans_model.predict(normalized_features)[0]
    
    # Obtener canciones del mismo cluster
    cluster_songs = df_clustered[df_clustered['cluster'] == predicted_cluster].copy()
    
    # Calcular similitud con canciones del cluster
    cluster_features = cluster_songs[feature_cols].values
    cluster_features_normalized = scaler_model.transform(cluster_features)
    
    # Similitud coseno
    similarities = cosine_similarity(normalized_features, cluster_features_normalized)[0]
    cluster_songs['similarity'] = similarities
    
    # Ordenar por similitud
    most_similar = cluster_songs.nlargest(top_n, 'similarity')
    
    return predicted_cluster, most_similar[['name', 'artists', 'similarity']]

In [2]:
import pandas as pd
import numpy as np

# Cargar CSV original y seleccionar canción aleatoria
df_original = pd.read_csv('tracks_features_clean.csv', sep=';', decimal=',')

# Seleccionar canción aleatoria
random_song = df_original.sample(n=1, random_state=42).iloc[0]

print(f"🎵 Canción seleccionada: '{random_song['name']}' - {random_song['artists']}")

# Extraer características
nueva_cancion = {
    'danceability': random_song['danceability'],
    'energy': random_song['energy'], 
    'key': random_song['key'],
    'loudness': random_song['loudness'],
    'mode': random_song['mode'],
    'speechiness': random_song['speechiness'],
    'acousticness': random_song['acousticness'],
    'instrumentalness': random_song['instrumentalness'],
    'liveness': random_song['liveness'],
    'valence': random_song['valence'],
    'tempo': random_song['tempo'],
    'duration_ms': random_song['duration_ms'],
    'time_signature': random_song['time_signature']
}

print("\n📊 Características de la canción:")
for key, value in nueva_cancion.items():
    print(f"  {key}: {value}")

# Ahora usar con la función de predicción
cluster, similares = predict_cluster_and_similar_songs(nueva_cancion, scaler, best_result['kmeans_model'])
print(f"\n🎯 Cluster predicho: {cluster}")
print("\n🎵 Canciones más similares:")
print(similares)

🎵 Canción seleccionada: 'Smokin' Sticky Sticky' - ['Unk']

📊 Características de la canción:
  danceability: 0.623
  energy: 0.736
  key: 11
  loudness: -3.657
  mode: 0
  speechiness: 0.402
  acousticness: 0.0021
  instrumentalness: 0.0
  liveness: 0.0691
  valence: 0.422
  tempo: 87.988
  duration_ms: 380427
  time_signature: 4.0


NameError: name 'scaler' is not defined