In [13]:
import sounddevice as sd
import numpy as np
import queue
import time
from keras.api.models import load_model
import librosa
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler

In [34]:
print(sd.query_devices())  
sd.default.device = 2

*  0 Microsoft Sound Mapper - Input, MME (2 in, 0 out)
   1 Microphone Array (Realtek(R) Au, MME (2 in, 0 out)
   2 Mezcla estéreo (Realtek(R) Audi, MME (2 in, 0 out)
   3 Microsoft Sound Mapper - Output, MME (0 in, 2 out)
   4 Speaker / Headphone (Realtek(R), MME (0 in, 2 out)
   5 Primary Sound Capture Driver, Windows DirectSound (2 in, 0 out)
   6 Microphone Array (Realtek(R) Audio), Windows DirectSound (2 in, 0 out)
   7 Mezcla estéreo (Realtek(R) Audio), Windows DirectSound (2 in, 0 out)
   8 Primary Sound Driver, Windows DirectSound (0 in, 2 out)
   9 Speaker / Headphone (Realtek(R) Audio), Windows DirectSound (0 in, 2 out)
  10 Speaker / Headphone (Realtek(R) Audio), Windows WASAPI (0 in, 2 out)
  11 Mezcla estéreo (Realtek(R) Audio), Windows WASAPI (2 in, 0 out)
  12 Microphone Array (Realtek(R) Audio), Windows WASAPI (2 in, 0 out)
  13 Auriculares con micrófono (@System32\drivers\bthhfenum.sys,#2;%1 Hands-Free%0
;(Redmi Buds 6 Lite)), Windows WDM-KS (0 in, 1 out)
  14 Auricula

In [9]:
# Parámetros de grabación
SAMPLE_RATE = 16000  # Hz
CHANNELS = 1
DURATION = 10       # segundos
REQUIRED_FRAMES = SAMPLE_RATE * DURATION

# Cola para almacenar chunks de audio
audio_queue = queue.Queue()

# Cargar modelo (opcional, si quieres predecir)
model = load_model("models/cnn_chroma_0.1107.keras")


In [16]:
# Funciones auxiliares
def normalize(X):
    return (X - np.mean(X)) / np.std(X)

def sub_extra_column(X):
    return X[:, :, :-1]

def transpose(X):
    return X.transpose(0, 2, 1)

# Definición del pipeline
preprocessing_pipeline = Pipeline([
    ('normalize', FunctionTransformer(normalize, validate=False)),  # Normalización
    ('scale', MinMaxScaler(feature_range=(0, 1))),                  # Escalado
    ('sub_column', FunctionTransformer(sub_extra_column, validate=False)),  # Quitar columna
    ('transpose', FunctionTransformer(transpose, validate=False)),  # Transponer
])

# Aplicar el pipeline
def process_and_encode(X):
    # Reshape para MinMaxScaler (flatten y restaurar)
    X = preprocessing_pipeline.named_steps['normalize'].transform(X)
    X = preprocessing_pipeline.named_steps['scale'].fit_transform(X.reshape(X.shape[0], -1)).reshape(X.shape)
    X = preprocessing_pipeline.named_steps['sub_column'].transform(X)
    X = preprocessing_pipeline.named_steps['transpose'].transform(X)
    
    # Predicción con el encoder
    encoder = load_model('models/encoders/encoder_chroma.keras')
    X = X.reshape(X.shape[0], X.shape[1], X.shape[2], 1)
    XC = encoder.predict(X)
    return XC

In [36]:
def audio_callback(indata, frames, time_info, status):
    """Callback de sonido: añade datos a la cola"""
    if status:
        print(f"Status: {status}", flush=True)
    audio_queue.put(indata.copy())

def grabar_intervalos():
    """Graba intervalos de DURATION segundos, extrae chroma y pasa al modelo"""
    with sd.InputStream(samplerate=SAMPLE_RATE,
                        channels=CHANNELS,
                        callback=audio_callback):
        print("Grabando en bucle...")
        while True:
            audio = sd.rec(int(SAMPLE_RATE*DURATION), samplerate=SAMPLE_RATE,
                   channels=CHANNELS, dtype='float32')
            sd.wait()
            audio = audio.flatten()
            
            # Extraer características Chroma (n_fft y hop_length para ~313 frames)
            chroma = librosa.feature.chroma_stft(
                y=audio,
                sr=SAMPLE_RATE,
            )  # shape = (12, ~313)
            print(f"Chroma extraída: {chroma.shape}")

            # Add a dimension at the beginning to match the model input shape (1, 12, T)
            chroma = chroma[np.newaxis, ...]
            print(f"Chroma reshaped: {chroma.shape}")

            chroma = process_and_encode(chroma)
            print(chroma.shape)
            
            # (Opcional) Predecir con el modelo
            x = chroma[..., np.newaxis]  # reshape a (1, 12, T, 1)
            resultado = model.predict(x)
            print(f"Predicción del modelo: {resultado}\n")
            
            # Breve pausa antes del siguiente intervalo
            time.sleep(0.1)

In [None]:
if __name__ == "__main__":
    print("Iniciando grabación de intervalos de 10 s...")
    grabar_intervalos()