# Audio Study

In [1]:
import sys
from pathlib import Path
import os

# Obtener la ruta absoluta de la carpeta que contiene el módulo
root_dir = Path.cwd().resolve().parent.parent

# Agregar la ruta de la carpeta al sys.path
sys.path.append(str(root_dir))

In [4]:
from config.variables import indiv_sample_path, audio_path, text_path

# 0. Speech extraction

First of all, we extract the speech from the audios, using one of the best pretrained models for this work. It is important to highlight that this step is not one of the main points of the project. For that reason, there will not be experimentation with different models of the rest of the phases.

In [9]:
import speech_recognition as sr
from langdetect import detect
import subprocess


def convert_to_wav(mp3_file, output_folder):
    wav_file = os.path.splitext(os.path.basename(mp3_file))[0] + '.wav'
    wav_path = os.path.join(output_folder, wav_file)
    subprocess.run(['ffmpeg', '-i', mp3_file, '-ac', '1', '-ar', '16000', wav_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return wav_path

def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)  # Grabamos el audio del archivo
        try:
            language = detect_language(audio_file)
            text = recognizer.recognize_google(audio_data, language=language)
            return text
        except sr.UnknownValueError:
            print("No se pudo entender el audio")
            return ""
        except sr.RequestError as e:
            print(f"Error en la solicitud a Google Speech Recognition API: {e}")
            return ""
        

def detect_language(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio_data = recognizer.record(source)
        try:
            language = detect(recognizer.recognize_google(audio_data, show_all=True))
            return language
        except:
            return "en-EN"  # Establece un idioma predeterminado en caso de error

In [11]:
# Directorio que contiene los archivos de audio
audio_folder = os.path.join(root_dir, audio_path)

# Directorio donde se guardarán los archivos de texto transcritos
output_folder = os.path.join(root_dir, text_path)

# Creamos el directorio de salida si no existe
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Recorremos los archivos de audio en la carpeta
for filename in os.listdir(audio_folder):
    if filename.endswith('.mp3'):  # Puedes ajustar la extensión según tus archivos de audio
        audio_file = os.path.join(audio_folder, filename)
        wav_file = convert_to_wav(audio_file, audio_folder)
        text = transcribe_audio(wav_file)
        
        output_file = os.path.splitext(filename)[0] + '.txt'
        output_path = os.path.join(output_folder, output_file)
        with open(output_path, 'w') as f:
            f.write(text)
        print(f"Transcripción del archivo {filename} guardada en {output_file}")

FileNotFoundError: [WinError 2] El sistema no puede encontrar el archivo especificado

## 1 Data Exploration and Visualisation

In [4]:
import IPython.display as ipd

ipd.Audio(os.path.join(root_dir, indiv_sample_path))