In [1]:
import os
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix




In [7]:
# -----------------------------
# CONFIGURACION
# -----------------------------
USE_ESC50 = True
USE_URBANSOUND8K = True
ESC50_PATH = 'datasets\\ESC-50'
URBANSOUND_PATH = 'datasets\\UrbanSound8K'

YAMNET_MODEL = hub.load('https://tfhub.dev/google/yamnet/1')

In [3]:
# -----------------------------
# FUNCION PARA EXTRAER EMBEDDING
# -----------------------------
def extract_embedding(file_path):
    wav_data, sr = librosa.load(file_path, sr=16000)
    waveform = tf.convert_to_tensor(wav_data, dtype=tf.float32)
    _, embeddings, _ = YAMNET_MODEL(waveform)
    return tf.reduce_mean(embeddings, axis=0).numpy()

In [4]:
# -----------------------------
# PROCESAR ESC-50
# -----------------------------
def process_esc50():
    print("\nProcesando ESC-50...")
    df = pd.read_csv(os.path.join(ESC50_PATH, 'meta', 'esc50.csv'))
    X, y = [], []
    for i, row in df.iterrows():
        file_path = os.path.join(ESC50_PATH, 'audio', row['filename'])
        try:
            embedding = extract_embedding(file_path)
            X.append(embedding)
            y.append(row['category'])
        except Exception as e:
            print(f"Error en {file_path}: {e}")
    return X, y


In [5]:
# -----------------------------
# PROCESAR URBANSOUND8K
# -----------------------------
def process_urbansound8k():
    print("\nProcesando UrbanSound8K...")
    df = pd.read_csv(os.path.join(URBANSOUND_PATH, 'metadata', 'UrbanSound8K.csv'))
    X, y = [], []
    for i, row in df.iterrows():
        fold = f"fold{row['fold']}"
        file_path = os.path.join(URBANSOUND_PATH, 'audio', fold, row['slice_file_name'])
        try:
            embedding = extract_embedding(file_path)
            X.append(embedding)
            y.append(row['class'])
        except Exception as e:
            print(f"Error en {file_path}: {e}")
    return X, y

In [8]:
# -----------------------------
# COMBINAR Y ENTRENAR MODELO
# -----------------------------
all_X, all_y = [], []

if USE_ESC50:
    X_esc, y_esc = process_esc50()
    all_X.extend(X_esc)
    all_y.extend(y_esc)

if USE_URBANSOUND8K:
    X_us8k, y_us8k = process_urbansound8k()
    all_X.extend(X_us8k)
    all_y.extend(y_us8k)




Procesando ESC-50...

Procesando UrbanSound8K...


FileNotFoundError: [Errno 2] No such file or directory: 'datasets\\UrbanSound8K\\metadata\\UrbanSound8K.csv'

In [None]:
print("\nEntrenando clasificador...")
X_train, X_test, y_train, y_test = train_test_split(all_X, all_y, test_size=0.2, stratify=all_y, random_state=42)

clf = MLPClassifier(hidden_layer_sizes=(256, 128), max_iter=300, random_state=42)
clf.fit(X_train, y_train)

In [None]:
# -----------------------------
# EVALUACION
# -----------------------------
y_pred = clf.predict(X_test)
print("\nEvaluaci\u00f3n del modelo:")
print(classification_report(y_test, y_pred))
print("\nMatriz de confusi\u00f3n:")
print(confusion_matrix(y_test, y_pred))