In [26]:
import os
import textdistance
import pickle
from sklearn import tree
import parselmouth
import numpy as np
import pandas as pd

In [18]:
file_name = "all_spanish_letters_recordings/aphasia_letters.txt"
with open(file_name) as f:
    all_recordings = f.readlines()


In [46]:
model = pickle.load(open("consonant_types_with_sil.pickle", "rb"))

In [74]:
n_formants = 5

def extract_formants_and_intensity_from_sound(snd):
    formants = snd.to_formant_burg()
    intensity = snd.to_intensity()
    formant_list = [list() for _ in range(n_formants)]
    intensity_list = list()
    for t in formants.t_grid():
        for f_n in range(n_formants):
            formant_list[f_n].append(formants.get_value_at_time(f_n + 1,t))
        intensity_list.append(intensity.get_value(t))

    return *formant_list, intensity_list, formants.t_grid()

remap = {
    'nasal': 'n',
    'sil': '',
    'e': 'e',
    'o': 'o',
    'vibrant': 'R',
    'u': 'u',
    'plosive': 'p',
    'a': 'a',
    'voiceless': 'r',
    'i': 'i',
    'approximant': 'j',
    'voiced': '',
    'fricative': 'f'
}
def regroup(string):
    current_string = string[0] if len(string) > 0 else ""
    grouped_string = current_string
    for letter in string:
        if letter != current_string:
            grouped_string += letter
            current_string = letter
    return grouped_string

def translate(counter_array, threshold=8):
    return ''.join([remap.get(phoneme, '') for phoneme, count in counter_array if count > threshold])

def compress(array):
    compressed_array = list()
    counter = 0
    current_element = array[0]
    for element in array:
        counter += 1
        if element != current_element:
            compressed_array.append((current_element, counter))
            current_element = element
            counter = 0
    return compressed_array

def recognize(record_path):

    snd = parselmouth.Sound(record_path)
    formants_plus_intensity = list()
    for *values, t in zip(*extract_formants_and_intensity_from_sound(snd)):
        formants_plus_intensity.append(values)

    formant_names = [f"f_{i+1}" for i in range(5)]
    column_names =  formant_names + ["i"]
    data_frame = pd.DataFrame(formants_plus_intensity, columns=column_names)
    data_frame[formant_names] = data_frame[formant_names].fillna(data_frame[column_names].mean())
    data_frame[["i"]] = data_frame[["i"]].fillna(value=-300)
    # data_frame[column_names] = data_frame[column_names].fillna(data_frame[column_names].mean())
    # data_frame[["i"]] = data_frame[["i"]].fillna(value=-300)
    return regroup(translate(compress(model.predict(data_frame))))

In [75]:
base_folder = "all_spanish_letters_recordings/aphasia_wav"
output = list()
for record in all_recordings:
    try:
        record_name, transcription = record.replace("mp4", "wav").replace("\n", "").split(",")
        record_path = os.path.join(base_folder, record_name)
        recognizer = recognize(record_path,)
        current_line = f"{transcription}, {recognizer}\n"
        output.append(current_line)
        print(current_line)
    except parselmouth.PraatError:
        print("File not found", record_path)

File not found all_spanish_letters_recordings/aphasia_wav/aphasia/24410.wav
a, a

a, a

a, a

a, af

a, a

a, fa

a, a

a, a

a, 

a, a

a, a

a, a

be, fne

be, e

be, e

be, eo

be, no

be, e

be, e

be, eo

be, je

be, a

be, e

ce, fe

ce, e

ce, e

ce, e

ce, o

ce, e

ce, e

ce, en

ce, f

ce, a

ce, fe

de, fjf

de, e

de, e

de, e

de, oa

de, je

de, 

de, eo

de, je

de, e

de, a

de, je

e, fef

e, 

e, e

e, oa

e, e

e, e

e, jo

e, 

e, e

e, ae

e, j

efe, ef

efe, e

efe, 

efe, ofo

efe, e

efe, e

efe, eje

efe, j

efe, fa

efe, je

ge, e

ge, 

ge, e

ge, a

ge, e

ge, e

ge, je

ge, e

ge, a

ge, ej

hache, af

hache, e

hache, a

hache, afo

hache, af

hache, a

hache, afen

hache, 

hache, a

hache, aej

i, ej

i, e

i, e

i, o

i, e

i, e

i, jn

i, e

i, a

i, jie

jota, oa

jota, oa

jota, oa

jota, aoa

jota, o

jota, o

jota, oan

jota, o

jota, a

jota, oe

ka, a

ka, a

ka, a

ka, a

ka, a

ka, a

ka, an

ka, eo

ka, a

ka, a

ele, ejn

ele, je

ele, ej

el

In [76]:
output_folder_name = "decision_trees_folder_results"
os.makedirs(output_folder_name, exist_ok=True)
output_base_name = "consonant_types.txt"
with open(os.path.join(output_folder_name, output_base_name), "w+") as output_file:
    output_file.writelines(output)


In [77]:
total_real_phones = 0
total_distance = 0
for line in output:
    transcription, ipa_transcription = line.replace("\n", "").replace(" ", "").split(",")
    total_real_phones += len(transcription)
    total_distance += len(transcription) + textdistance.levenshtein(transcription, ipa_transcription)

total_real_phones/total_distance

0.5617673579801623

Todos los fonemas: 0.5617673579801623

In [78]:
vocales = "aeiou"

total_real_phones = 0
total_distance = 0
for line in output:
    transcription, ipa_transcription = line.replace("\n", "").replace(" ", "").split(",")
    if transcription in vocales:
        total_real_phones += len(transcription)
        total_distance += len(transcription) + textdistance.levenshtein(transcription, ipa_transcription)

total_real_phones/total_distance

0.5888888888888889

Solo vocales: 0.5888888888888889



In [79]:
vocales_extremas = "aiu"

total_real_phones = 0
total_distance = 0
for line in output:
    transcription, ipa_transcription = line.replace("\n", "").replace(" ", "").split(",")
    if transcription in vocales_extremas:
        total_real_phones += len(transcription)
        total_distance += len(transcription) + textdistance.levenshtein(transcription, ipa_transcription)

total_real_phones/total_distance

0.5517241379310345

Vocales extremas: 0.5517241379310345


In [73]:
regroup("aaaabbaaaassa")

'abasa'