-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
60 lines (55 loc) · 1.88 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from fcd.fcd import fcdet, fast_ccm
from sys import argv
from glob import glob
from itertools import product
from lingpy import *
from lingpy.evaluate.acd import *
import tqdm
if 'training' in argv:
ngrams = [3, 4, 5]
excludes = ['V_+', 'V_+T']
ngram_gaps = [True, False]
allngrams = [True, False]
cuts = [1]
models = ['sca', 'dolgo', 'asjp']
gaps = [True, False]
best = 0.0
for ngram, exclude, ngram_gap, cut, model, gap, allngram in product(
ngrams, excludes, ngram_gaps, cuts, models, gaps, allngrams):
table = []
for f in glob('data/training/*.csv'):
wl = Wordlist(f)
if not 'tokens' in wl.header:
wl.add_entries('tokens', 'ipa', ipa2tokens)
fcdet(
wl,
exclude=exclude,
ngrams=ngram,
ngram_gaps=ngram_gap,
cut=cut,
model=model,
gaps=gap,
all_ngrams=allngram,
ref='autocog'
)
p, r, fs = bcubes(wl, 'cogid', 'autocog', pprint=False)
table += [[f[:-4], round(p, 2), round(r, 2), round(fs, 4)]]
fs = round(sum([line[3] for line in table]) / len(table), 4)
if fs > best:
best = fs
star = '*'
else:
star = ' '
print('{0:5} | {1} | {2:6} | {3:6} | {4:6} | {5:6} | {6:6} | {7:.2f} | {8:.2f} | {9:.4f} {10}'.format(
exclude,
ngram,
str(ngram_gap),
cut,
model,
str(gap),
str(allngram),
round(sum([line[1] for line in table]) / len(table), 2),
round(sum([line[2] for line in table]) / len(table), 2),
round(sum([line[3] for line in table]) / len(table), 4),
star,
))