In [1]:
import parselmouth
from parselmouth import praat
import pandas as pd
import numpy as np
from promdetect.prep import find_syllable_nuclei, process_annotations
from io import StringIO

In [2]:
root_dir = "/home/lukas/Dokumente/Uni/ma_thesis/"
man_nuclei_file = root_dir + "eval/nucleus_eval/dlf-nachrichten-200703271100_ANNOT.TextGrid"
man_nuclei = parselmouth.Data.read(man_nuclei_file)

In [3]:
wav_file = root_dir + "eval/nucleus_eval/dlf-nachrichten-200703271100.wav"
auto_nuclei = find_syllable_nuclei.get_nucleus_points(wav_file)


In [4]:
phone_labels = root_dir + "eval/nucleus_eval/dlf-nachrichten-200703271100.phones"
phone_times = process_annotations.AnnotationReader(phone_labels)
phone_times = phone_times.get_annotation_data()

In [5]:
vowel_times = find_syllable_nuclei.filter_labels(phone_times, "phones")

In [6]:
num_man_nuclei = praat.call(man_nuclei, "Count intervals where", 1, "is equal to", "1")
num_auto_nuclei = len(auto_nuclei)
num_vowel_phones = len(vowel_times)

print("Number of manually annotated nuclei: {}".format(num_man_nuclei))
print("Number of automatically identified nuclei: {}".format(num_auto_nuclei))
print("Number of annotated vowel phones (excluding schwa): {}".format(num_vowel_phones))

Number of manually annotated nuclei: 1077
Number of automatically identified nuclei: 1573
Number of annotated vowel phones (excluding schwa): 791


In [7]:
man_nuclei_times = StringIO(praat.call(man_nuclei, "List", "no", 6, "no", "no"))
man_nuclei_times = pd.read_csv(man_nuclei_times, sep="\t")

In [8]:
for point in auto_nuclei:
    man_nuclei_times.loc[(man_nuclei_times["tmin"] <= point) & (man_nuclei_times["tmax"] >= point), "auto"] = point

In [9]:
for point in auto_nuclei:
    vowel_times.loc[(vowel_times["start_est"] <= point) & (vowel_times["end"] >= point), "auto"] = point

In [10]:
annotation_matches = vowel_times.auto.notnull().sum()
ann_match_recall = annotation_matches / num_vowel_phones
ann_match_precision = annotation_matches / num_auto_nuclei

print("Matches between automatically detected nuclei and DIRNDL-annotated vowels: {0} of {1}".format(annotation_matches, num_vowel_phones))
print("Recall: {0:.3f}".format(ann_match_recall))
print("Precision: {0:.3f}".format(ann_match_precision))

Matches between automatically detected nuclei and DIRNDL-annotated vowels: 676 of 791
Recall: 0.855
Precision: 0.430


In [11]:
manual_matches = man_nuclei_times.auto.notnull().sum()
man_match_recall = manual_matches / num_man_nuclei
man_match_precision = manual_matches / num_auto_nuclei

print("Matches between automatically detected nuclei and manually annotated nuclei: {0} of {1}".format(manual_matches, num_man_nuclei))
print("Recall: {0:.3f}".format(man_match_recall))
print("Precision: {0:.3f}".format(man_match_precision))

Matches between automatically detected nuclei and manually annotated nuclei: 968 of 1077
Recall: 0.899
Precision: 0.615


In [12]:
ann_match_nuclei = vowel_times.loc[vowel_times.auto.notnull(), "auto"]


In [13]:
for point in ann_match_nuclei:
    man_nuclei_times.loc[(man_nuclei_times["tmin"] <= point) & (man_nuclei_times["tmax"] >= point), "all_match"] = point

In [14]:
print("Matches between all three lists, divided by amount of manual labels: {0:.3f}".format(man_nuclei_times.all_match.notnull().sum() / len(man_nuclei_times)))

Matches between all three lists, divided by amount of manual labels: 0.601
