In [1]:
import sounddevice as sd
from scipy.io.wavfile import write
import numpy as np
import parselmouth
from tensorflow.keras.models import load_model
from sincnet_tensorflow import SincConv1D, LayerNorm
import os

Importar funciones definidas en el modelo para usar SincNet

In [2]:
class SincConv1DWithConfig(SincConv1D):
    def get_config(self):
        # Llama al método get_config de la superclase
        config = super(SincConv1DWithConfig, self).get_config()
        
        # Agrega los parámetros específicos de SincConv1D
        config.update({
            "N_filt": self.N_filt,
            "Filt_dim": self.Filt_dim,
            "fs": self.fs,
            "stride": self.stride,
            "padding": self.padding,
        })
        return config
    
class LayerNormWithConfig(LayerNorm):
    def get_config(self):
        config = super(LayerNormWithConfig, self).get_config()
    
        return config

Grabación audio y hacer predicción

In [39]:
# Definimos la duración máxima en segundos y el número de muestras correspondientes
max_duration = 18.058979166666667
max_length = int(max_duration * 48000)  # Frecuencia de muestreo de 48,000 Hz

# Función para generar un nombre de archivo único
def generate_unique_filename(directory, base_filename):
    counter = 1
    filename = base_filename + ".wav"
    full_path = os.path.join(directory, filename)
    
    while os.path.exists(full_path):
        filename = f"{base_filename}_{counter}.wav"
        full_path = os.path.join(directory, filename)
        counter += 1
    
    return full_path

# Función para grabar audio en un solo canal (mono) y guardarlo en una carpeta específica
def record_audio(duration, sample_rate=48000, output_dir="recordings", base_filename="output"):
    # Crear la carpeta si no existe
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Generar un nombre de archivo único
    file_path = generate_unique_filename(output_dir, base_filename)
    
    print(f"Recording for {duration} seconds...")
    audio = sd.rec(int(duration * sample_rate), samplerate=sample_rate, channels=1)  # Mono canal
    sd.wait()  # Espera hasta que termine la grabación
    
    write(file_path, sample_rate, audio)
    print(f"Recording finished. Saved as {file_path}.")
    
    return file_path


def process_audio(file_path, sample_rate=48000):
    # Cargar el archivo de audio con Parselmouth
    snd = parselmouth.Sound(file_path)
    
    # Obtener el array de numpy en mono
    audio = snd.values
    
    audio = audio.flatten()
    
    return audio


# Función para predecir con el modelo
def predict(file_path):
    # Procesar el audio
    audio = process_audio(file_path)
    
    # Realizar predicciones
    predictions = model.predict(np.array([audio]))  # Convertir a un array de numpy con la dimensión adecuada
    
    # Mostrar los resultados
    return predictions, audio

# Cargar el modelo guardado
model = load_model('ProsodyNet.h5', custom_objects={
    'SincConv1DWithConfig': SincConv1DWithConfig,
    'LayerNormWithConfig': LayerNormWithConfig
})

# Grabar el audio y luego predecir
audio_path = record_audio(18.058979166666667)
predictions, audio = predict(audio_path)


Recording for 18.058979166666667 seconds...
Recording finished. Saved as recordings\output_10.wav.


Devolver diagnosis

In [None]:
thresholds = [0.7, 0.5, 0.6, 0.45, 0.5, 0.7, 0.4, 0.7, 0.45]

# Suponiendo que y_pred tiene las probabilidades predichas (dimensión: [num_samples, 9])
y_pred_adjusted = np.zeros_like(predictions)

for i in range(predictions.shape[1]):
    y_pred_adjusted[:, i] = (predictions[:, i] >= thresholds[i]).astype(int)

print(y_pred_adjusted[0])

Intento Matías

In [8]:
predictions

array([[7.8686368e-01, 2.0065904e-04, 9.9980760e-01, 9.9892271e-01,
        9.9848175e-01, 9.9997497e-01, 9.9990797e-01, 5.2611411e-02,
        9.7351021e-01]], dtype=float32)

In [5]:
thresholds = [0.7, 0.5, 0.6, 0.45, 0.5, 0.7, 0.4, 0.7, 0.45]

# Suponiendo que y_pred tiene las probabilidades predichas (dimensión: [num_samples, 9])
y_pred_adjusted = np.zeros_like(predictions)

for i in range(predictions.shape[1]):
    y_pred_adjusted[:, i] = (predictions[:, i] >= thresholds[i]).astype(int)

In [7]:
print(y_pred_adjusted[0])

[1. 0. 1. 1. 1. 1. 1. 0. 1.]


Intento google traductor

In [32]:
predictions

array([[3.4164721e-01, 4.0280819e-04, 9.8963487e-01, 9.8712474e-01,
        9.7500205e-01, 9.9935776e-01, 9.9600351e-01, 2.8764874e-02,
        8.7363249e-01]], dtype=float32)

In [37]:
# Suponiendo que y_pred tiene las probabilidades predichas (dimensión: [num_samples, 9])
y_pred_adjusted = np.zeros_like(predictions)

for i in range(predictions.shape[1]):
    y_pred_adjusted[:, i] = (predictions[:, i] >= thresholds[i]).astype(int)

In [34]:
print(y_pred_adjusted[0])

[0. 0. 1. 1. 1. 1. 1. 0. 1.]


In [41]:
import pandas as pd

# Leer el archivo CSV
df = pd.read_csv('data.csv')

# Inicializar un diccionario para almacenar los conteos
counts = {str(i): {'0': 0, '1': 0} for i in range(1, 10)}

# Iterar sobre las columnas que van del 1 al 9
for column in df.columns:
    if column.isdigit() and 1 <= int(column) <= 9:
        # Contar la cantidad de 0s y 1s en la columna actual
        counts[column]['0'] = (df[column] == 0).sum()
        counts[column]['1'] = (df[column] == 1).sum()

# Imprimir los resultados
for col, count in counts.items():
    print(f"Columna {col}: 0s = {count['0']}, 1s = {count['1']}")


Columna 1: 0s = 1218, 1s = 652
Columna 2: 0s = 1807, 1s = 63
Columna 3: 0s = 1156, 1s = 714
Columna 4: 0s = 1128, 1s = 742
Columna 5: 0s = 1264, 1s = 606
Columna 6: 0s = 1029, 1s = 841
Columna 7: 0s = 1358, 1s = 512
Columna 8: 0s = 1761, 1s = 109
Columna 9: 0s = 1477, 1s = 393
