In [None]:
import os
import numpy as np
from scipy.io import wavfile
from vosk import Model
import vowel_functions
import pandas as pd
import matplotlib.pyplot as plt
import re

def get_subject_id(filename):
    match = re.search(r'-([A-Z0-9]+)-', filename)
    return match.group(1) if match else None

def process_audio_file(audio_path, output_folder, model):
    # Load audio file
    try:
        Fs, audio = wavfile.read(audio_path)
    except:
        print(f"Failed to read {audio_path}")
        return

    # Run speech recognition
    words = vowel_functions.rec_vosk(audio_path, model, print_summary=False)

    # Get all vowels for each word
    for w in words:
        w["vowels"] = vowel_functions.checkVowels(w["word"], vowel_functions.VOWELS_SV)

    # Extract vowels
    fl = int(0.03 * Fs)
    try:
        grouped_frames = vowel_functions.extract_vowels(words, audio, Fs, fl, print_info=False)
    except:
        print(f"Failed to extract vowels from {audio_path}")
        return

    # Get subject ID and recording date
    subject_id = get_subject_id(os.path.basename(audio_path))
    recording_date = os.path.basename(audio_path).split('-')[0]

    # Create output folder for the subject
    subject_output_folder = os.path.join(output_folder, subject_id)
    os.makedirs(subject_output_folder, exist_ok=True)

    # Export files
    vowel_functions.groupedframes_to_files(
        grouped_frames, Fs, f"{recording_date}_{os.path.splitext(os.path.basename(audio_path))[0]}",
        subject_output_folder, metadata={"origin_file": audio_path}
    )

def main():
    input_folder = "/Volumes/One Touch/kandidat/convertedFolder"  # Replace with the path to your input folder
    output_folder = "/Volumes/One Touch/kandidat/output" 
    model_path = "models/vosk-model-small-sv-rhasspy-0.15/"
    if "model" not in locals():
        model = Model(model_path)

    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if filename.endswith(".wav") and 'text' in filename:
            audio_path = os.path.join(input_folder, filename)
            print(f"Processing {filename}...")
            process_audio_file(audio_path, output_folder, model)
            print(f"Finished processing {filename}")

if __name__ == "__main__":
    main()