In [12]:
import os
import csv

import parselmouth

import textgrids

import numpy as np


In [13]:
n_formants = 5

def extract_formants_and_intensity_from_sound(snd):
    formants = snd.to_formant_burg()
    intensity = snd.to_intensity()
    formant_list = [list() for _ in range(n_formants)]
    intensity_list = list()
    for t in formants.t_grid():
        for f_n in range(n_formants):
            formant_list[f_n].append(formants.get_value_at_time(f_n + 1,t))
        intensity_list.append(intensity.get_value(t))

    return *formant_list, intensity_list, formants.t_grid()

In [15]:

base_folder = "words_annotations"
annotations_folder = "annotations"
wav_folder = "wav"

formants_dir = "all_formants_observed_formants_first_5_formants"
os.makedirs(formants_dir, exist_ok=True)




In [16]:
transcription_file = "words_annotations/word_map.txt"
with open(transcription_file) as af:
    annotations = af.readlines()
annotations

['27356,Arete\n',
 '27358,Reloj\n',
 '27361,Cadena\n',
 '27364,Gafas\n',
 '27365,Anillo\n',
 '27387,Pollo\n',
 '36677,Papa\n',
 '32540,Yuca\n',
 '33582,Tomate\n',
 '31873,Zanahoria\n',
 '36438,Cebolla\n',
 '35472,Cilantro\n',
 '29029,Pepino\n',
 '32621,Habichuela\n',
 '36271,Te amo\n',
 '36358,Me gusta\n',
 '31500,Quiero\n',
 '35159,Feliz\n',
 '34100,Triste\n',
 '28102,Miedo\n',
 '28728,Odio\n',
 '32467,Culpa\n',
 '33981,Enojo\n',
 '35165,Aburrido\n',
 '35616,Celos\n',
 '36265,Amor\n',
 '34617,Agua\n',
 '33307,Café\n',
 '27436,Jugo\n',
 '29420,Leche\n',
 '31317,Avena\n',
 '34014,Chocolate\n',
 '29809,Limonada\n',
 '30373,Bombero\n',
 '32784,Medico\n',
 '30538,Profesor\n',
 '28699,Estudiante\n',
 '32438,Interpretador\n',
 '28066,Conductor\n',
 '36798,Enfermero\n',
 '30380,Cantante\n',
 '32791,Carpintero\n',
 '29617,Chef\n',
 '33954,Fotógrafo\n',
 '31077,Músico\n',
 '28487,Odontólogo\n',
 '31485,Peluquero\n',
 '36245,Periodista\n',
 '33962,Piloto\n',
 '36249,Policía\n',
 '28492,Pintor\n'

In [19]:
for annotation in annotations:
    record_name, transcription = annotation.replace("\n", "").split(",")
    record_path = os.path.join(base_folder, wav_folder, f"{record_name}.wav")
    formants_file_path = os.path.join(formants_dir, f"{record_name}-{transcription}.csv")
    text_grid_file = os.path.join(base_folder, annotations_folder, f"{record_name}.TextGrid")
    if os.path.exists(record_path):

        try:
            current_text_grid = textgrids.TextGrid(text_grid_file)
            print("TRANSCRIPTION:", transcription)
            intervals = current_text_grid[transcription.lower()]
            snd = parselmouth.Sound(record_path)
            field_names = ["t", *[f"f_{i+1}" for i in range(n_formants)], "i", "phoneme"]

            with open(formants_file_path, "w+") as tf:
                writer = csv.DictWriter(tf, field_names, dialect='unix')
                writer.writeheader()
                for *formants, intensity, t in zip(*extract_formants_and_intensity_from_sound(snd)):
                    row = {
                        "t": t,
                        "i": intensity,
                    }
                    for interval in intervals:
                        if interval.xmin < t < interval.xmax:
                            row["phoneme"] = interval.text.strip()
                            break
                    for i in range(n_formants):
                        row[f"f_{i+1}"] = formants[i]
                    writer.writerow(row)
        except KeyError:
            print(f"Annotation {transcription} not found")
        except FileNotFoundError:
            print(f"Annotation in {text_grid_file} does not exists")
    else:
        print(f"Record file {record_path} does not exists")
    print(record_name, transcription)

TRANSCRIPTION: Arete
27356 Arete
TRANSCRIPTION: Reloj
27358 Reloj
TRANSCRIPTION: Cadena
27361 Cadena
TRANSCRIPTION: Gafas
27364 Gafas
TRANSCRIPTION: Anillo
27365 Anillo
TRANSCRIPTION: Pollo
27387 Pollo
Annotation in words_annotations/annotations/36677.TextGrid does not exists
36677 Papa
Annotation in words_annotations/annotations/32540.TextGrid does not exists
32540 Yuca
Annotation in words_annotations/annotations/33582.TextGrid does not exists
33582 Tomate
Annotation in words_annotations/annotations/31873.TextGrid does not exists
31873 Zanahoria
Annotation in words_annotations/annotations/36438.TextGrid does not exists
36438 Cebolla
Annotation in words_annotations/annotations/35472.TextGrid does not exists
35472 Cilantro
TRANSCRIPTION: Pepino
Annotation Pepino not found
29029 Pepino
Annotation in words_annotations/annotations/32621.TextGrid does not exists
32621 Habichuela
Annotation in words_annotations/annotations/36271.TextGrid does not exists
36271 Te amo
Annotation in words_annot