In [20]:
!pip install essentia pandas numpy scikit-learn matplotlib openai --quiet

  DEPRECATION: Building 'essentia' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'essentia'. Discussion can be found at https://github.com/pypa/pip/issues/6334
  error: subprocess-exited-with-error
  
  × python setup.py bdist_wheel did not run successfully.
  │ exit code: 1
  ╰─> [116 lines of output]
      C:\Python\Python311\Lib\site-packages\setuptools\__init__.py:92: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprecated.
      !!
      
              ********************************************************************************
              Requirements should be satisfied by a PEP 517 installer.
              If you are using pip, you can try `pip install --

In [24]:
import os
import json
import essentia
import essentia.standard as es
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'essentia'

In [23]:
MUSIC_DIR = "../music"
DATA_DIR = "../data"
os.makedirs(DATA_DIR, exist_ok=True)
FEATURES_PATH = os.path.join(DATA_DIR, "features.csv")

## Extraer datos música

In [22]:
def extract_features(audio_path):
    loader = es.MonoLoader(filename=audio_path)
    audio = loader()

    # BPM / Tempo
    rhythm_extractor = es.RhythmExtractor2013(method="multifeature")
    bpm, _, _, _, _ = rhythm_extractor(audio)

    # Energy (RMS)
    energy = float(np.sqrt(np.mean(audio ** 2)))

    # Key detection
    key_extractor = es.KeyExtractor()
    key, scale, strength = key_extractor(audio)
    key_str = f"{key}_{scale}"

    # MFCCs
    w = es.Windowing(type='hann')
    spectrum = es.Spectrum()
    mfcc_extractor = es.MFCC()
    mfccs = []

    for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512, startFromZero=True):
        mfcc_bands, mfcc_coeffs = mfcc_extractor(spectrum(w(frame)))
        mfccs.append(mfcc_coeffs)
    
    mfccs = np.array(mfccs)
    mfcc_mean = np.mean(mfccs, axis=0)

    return {
        "filename": os.path.basename(audio_path),
        "bpm": float(bpm),
        "energy": float(energy),
        "key": key_str,
        **{f"mfcc_{i}": float(mfcc_mean[i]) for i in range(len(mfcc_mean))}
    }

# --- Iteración sobre la carpeta ./music
files = [os.path.join(MUSIC_DIR, f) for f in os.listdir(MUSIC_DIR)
         if f.lower().endswith((".mp3", ".wav", ".flac"))]

features = []
for path in files:
    try:
        feat = extract_features(path)
        features.append(feat)
        print(f"✅ Procesado: {os.path.basename(path)}")
    except Exception as e:
        print(f"⚠️ Error en {path}: {e}")

df = pd.DataFrame(features)
df.to_csv(FEATURES_PATH, index=False)
print(f"\nGuardado dataset en {FEATURES_PATH}")

⚠️ Error en ../music\01 - Jerry Ropero - CORAÇAO (20th Anniversary Mix).mp3: name 'es' is not defined
⚠️ Error en ../music\01 - Jesus Fernandez - Hablando Claro.mp3: name 'es' is not defined
⚠️ Error en ../music\Nolek - Ke Tu Kiere.mp3: name 'es' is not defined

Guardado dataset en ../data\features.csv


## Carga de datos

In [19]:
import pandas as pd
df = pd.read_csv("../data/features.csv")
df.head()


Unnamed: 0,filename,bpm,energy,key,mfcc_0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,mfcc_11,mfcc_12
0,01 - Jerry Ropero - CORAÇAO (20th Anniversary ...,126.048018,0.222841,8,-92.70228,110.99503,-31.703508,29.143084,-18.384087,20.121151,-12.753054,11.408379,-7.688673,4.726678,-2.881854,3.209774,-5.442422
1,01 - Jesus Fernandez - Hablando Claro.mp3,126.048018,0.235792,5,-109.000626,91.72986,-17.078098,49.936417,-23.229513,30.18563,-11.992529,17.899054,-7.32099,6.663601,-2.54797,6.758866,1.152515
2,Nolek - Ke Tu Kiere.mp3,123.046875,0.258173,9,-136.61296,131.62978,-4.555398,46.95013,-7.90772,16.706938,-7.174628,4.223489,-5.036371,1.879665,-2.529577,0.814589,-2.604206


## Limpieza y preprocesado

In [12]:
df['bpm'] = df['bpm'].astype(float)
df = df.dropna()

ValueError: could not convert string to float: '[126.04801829]'

## Entrenamiento del modelo (ejemplo KNN o clustering)

In [None]:

feature_cols = [c for c in df.columns if c not in ["filename", "key"]]
X = df[feature_cols].values
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

k = min(10, max(1, len(df)-1))
print(f"Entrenando modelo KNN con k={k}")
knn = NearestNeighbors(n_neighbors=k, metric="euclidean")
knn.fit(Xs)

def recommend(track_name, n=5):
    row = df[df["filename"] == track_name]
    if row.empty:
        print("Canción no encontrada.")
        return
    x = scaler.transform(row[feature_cols].values)
    distances, indices = knn.kneighbors(x, n_neighbors=n+1)
    idx = indices[0][1:]
    recs = df.iloc[idx][["filename", "bpm", "energy", "key"]]
    print(f"\n🎧 Recomendaciones para: {track_name}\n")
    display(recs)
    return recs

