forked from danijel3/AlignmentMeasures
-
Notifications
You must be signed in to change notification settings - Fork 0
/
AlignMeasure.py
171 lines (134 loc) · 5.71 KB
/
AlignMeasure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/usr/bin/python
# -*- coding: utf-8 -*-
import argparse
import math
from textgrid import TextGrid
# based on the paper: An Improved Speech Segmentation Quality Measure: the R-value
# by Okko Johannes Räsänen, Unto Kalervo Laine, and Toomas Altosaar
# link: http://legacy.spa.aalto.fi/research/stt/papers/r_value.pdf
class Segment:
def __init__(self, text, start, dur):
self.text = text
self.start = start
self.dur = dur
def __str__(self):
return '["{}": {} - {}]'.format(self.text, self.start, self.start + self.dur)
def read_ctm(file):
ret = []
with open(file, 'r') as f:
for line in f:
tok = line.strip().split(' ')
ret.append(Segment(tok[4], float(tok[2]), float(tok[3])))
return ret
def read_textgrid(file, tier):
tg = TextGrid()
tg.read(file)
if len(tg.tiers) <= tier:
raise IOError('Texgrid file ' + file + ' doesn\'t have enough tiers to get tier: ' + str(tier))
if not hasattr(tg.tiers[tier], 'intervals'):
raise IOError('The selected tier: ' + str(tier) + ' is not and IntervalTier in file: ' + file)
ret = []
for seg in tg.tiers[tier].intervals:
ret.append(Segment(seg.mark, seg.minTime, seg.duration()))
return ret
class Boundary:
def __init__(self, time, name, search_reg=0.02):
self.time = time
self.name = name
self.reg_beg = time - search_reg
self.reg_end = time - search_reg
def __str__(self):
return '<"{}": {}>'.format(self.name, self.time)
def count_hits(ref_bound, hyp_bound, search_reg=0.02):
hit = 0
# update search regions:
for i, b in enumerate(ref_bound):
b.reg_beg = b.time - search_reg
b.reg_end = b.time + search_reg
ref_bound[i] = b
# search regions of a typical fixed size,
# e.g., ±20 ms, are placed around each reference boundary. If
# overlapping search regions exist, that is, adjacent regions with
# their reference boundaries exist closer than 40 ms to each other,
# then the regions are asymmetrically shrunk to divide the space
# between two reference boundaries into two equal-width halves
for i in range(len(ref_bound) - 1):
if ref_bound[i].reg_end > ref_bound[i + 1].reg_beg:
t = ref_bound[i].time + ref_bound[i + 1].time / 2
ref_bound[i].reg_end = t
ref_bound[i + 1].reg_beg = t
# "a boundary is considered to be correctly detected if the hypothesis and
# the manual transcription are within 20 ms of each other"
for b in ref_bound:
for b2 in hyp_bound:
if b.reg_beg <= b2.time <= b.reg_end and b.name == b2.name:
hit += 1
return hit
def seg2boundary(segments):
ret = []
for i in range(len(segments) + 1):
name_p = '#'
name_n = '#'
if i > 0:
name_p = segments[i - 1].text
time = segments[i - 1].start + segments[i - 1].dur
else:
time = segments[i].start
if i < len(segments):
name_n = segments[i].text
if i > 0:
assert abs(time - segments[i].start) <= 0.01, '{} - {}'.format(time, segments[i].start)
ret.append(Boundary(time, name_p + "_" + name_n))
return ret
def debug(lst):
for el in lst:
print el
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Calcualte various alignemnt accuracy measures.')
parser.add_argument('ref', help='reference segmentation (CTM or TextGrid)')
parser.add_argument('hyp', help='studied segmentation (CTM or TextGrid)')
parser.add_argument('--ref-tier', '-rt', dest='reftier', type=int, default=0,
help='for TextGrid, use which tier for reference (default:0)')
parser.add_argument('--hyp-tier', '-ht', dest='hyptier', type=int, default=0,
help='for TextGrid, use which tier for hypothesis (default:0)')
args = parser.parse_args()
if args.ref.endswith('.ctm'):
ref_seg = read_ctm(args.ref)
elif args.ref.endswith('.TextGrid'):
ref_seg = read_textgrid(args.ref, args.reftier)
else:
raise IOError('Unknown extension for ref file: ' + args.ref)
if args.ref.endswith('.ctm'):
hyp_seg = read_ctm(args.hyp)
elif args.ref.endswith('.TextGrid'):
hyp_seg = read_textgrid(args.hyp, args.hyptier)
else:
raise IOError('Unknown extension for hyp file: ' + args.hyp)
# debug(ref_seg)
# debug(hyp_seg)
ref_bound = seg2boundary(ref_seg)
hyp_bound = seg2boundary(hyp_seg)
# debug(ref_bound)
# debug(hyp_bound)
hit_count = count_hits(ref_bound, hyp_bound)
hyp_count = len(hyp_bound)
ref_count = len(ref_bound)
hr = (hit_count / float(ref_count)) * 100.0
os = ((hyp_count / float(ref_count)) - 1) * 100.0
prc = (hit_count / float(hyp_count))
rcl = (hit_count / float(ref_count))
f_meas = ((2 * prc * rcl) / (prc + rcl))
r1 = math.sqrt((100.0 - hr) ** 2 + os ** 2)
r2 = (-os + hr - 100.0) / math.sqrt(2.0)
R = 1 - ((math.fabs(r1) + math.fabs(r2)) / 200.0)
print 'Number of boundaries in reference segmentation: {}'.format(ref_count)
print 'Number of boundaries in studied segmentation: {}'.format(hyp_count)
print 'Number of hits: {}'.format(hit_count)
print 'Hit rate (higher=>better_: {:%}'.format(hr / 100.0)
print 'Over-segmentation rate (closer-zero=>better): {}'.format(os)
print 'Precision (higher=>better): {:%}'.format(prc)
print 'Recall (higher=>better): {:%}'.format(rcl)
print 'F-measure (higher=>better): {:%}'.format(f_meas)
print 'r1 (closer-zero=>better): {}'.format(r1)
print 'r2 (closer-zero=>better): {}'.format(r2)
print 'R-value (higher=>better): {:%}'.format(R)