-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Copy pathconfusion_sets.txt
76 lines (74 loc) · 11.3 KB
/
confusion_sets.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# See the English 'confusion_sets.txt' for a description of file format
bon -> bond; 100 # p=1.000 r=0.190 f0.5=0.541 347+21 2020-03-17 f0.1=0.96 FA/10k=0.000
bond -> bon; 10000 # p=1.000 r=0.790 f0.5=0.949 21+347 2020-03-17 f0.1=1.00 FA/10k=0.000
#ce -> se; 100 # p=0.997 r=0.933 f0.5=0.984 337+690 2020-03-17 f0.1=1.00 FA/10k=0.179 (commented due to estimated false alarms rate > 0.025)
#se -> ce; 100 # p=1.000 r=0.760 f0.5=0.940 690+337 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
#cent -> sans; 10000 # p=0.997 r=0.800 f0.5=0.951 242+466 2020-03-17 f0.1=0.99 FA/10k=0.031 (commented due to estimated false alarms rate > 0.025)
#sans -> cent; 1000000 # p=1.000 r=0.463 f0.5=0.812 466+242 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
#cette -> sept; 1000 # p=1.000 r=0.624 f0.5=0.892 360+404 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
#sept -> cette; 1000000 # p=1.000 r=0.908 f0.5=0.980 404+360 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
dans -> dent; 10000 # p=1.000 r=0.656 f0.5=0.905 217+352 2020-03-17 f0.1=0.99 FA/10k=0.000
#dent -> dans; 10000000 # p=0.986 r=0.641 f0.5=0.890 352+217 2020-03-17 f0.1=0.98 FA/10k=0.837 (commented due to precision < 0.99)
don -> donc; 1000 # p=1.000 r=0.763 f0.5=0.941 60+636 2020-03-17 f0.1=1.00 FA/10k=0.000
donc -> don; 1000 # p=1.000 r=0.650 f0.5=0.903 636+60 2020-03-17 f0.1=0.99 FA/10k=0.000
#donc -> dont; 1000 # p=0.962 r=0.260 f0.5=0.625 636+780 2020-03-17 f0.1=0.94 FA/10k=0.572 (commented due to precision < 0.99)
#dont -> donc; 10000000 # p=0.985 r=0.506 f0.5=0.828 780+636 2020-03-17 f0.1=0.98 FA/10k=0.226 (commented due to precision < 0.99)
#il -> ils; 10 # p=1.000 r=0.581 f0.5=0.874 69+136 2020-03-17 f0.1=0.99 FA/10k=0.000
#ils -> il; 100000 # p=1.000 r=0.406 f0.5=0.773 136+69 2020-03-17 f0.1=0.99 FA/10k=0.000
#mais -> mai; 10000 # p=1.000 r=0.762 f0.5=0.941 336+404 2020-03-17 f0.1=1.00 FA/10k=0.000
moi -> mois; 1000 # p=1.000 r=0.689 f0.5=0.917 513+592 2020-03-17 f0.1=1.00 FA/10k=0.000
mois -> moi; 10000 # p=0.996 r=0.493 f0.5=0.827 592+513 2020-03-17 f0.1=0.99 FA/10k=0.017
#nom -> non; 100000 # p=0.965 r=0.407 f0.5=0.757 305+273 2020-03-17 f0.1=0.95 FA/10k=0.323 (commented due to precision < 0.99)
notre -> nôtre; 10000000 # p=1.000 r=0.767 f0.5=0.943 184+519 2020-03-17 f0.1=1.00 FA/10k=0.000
#nôtre -> notre; 1000000 # p=0.967 r=0.788 f0.5=0.925 519+184 2020-03-17 f0.1=0.96 FA/10k=0.153 (commented due to precision < 0.99)
pain -> pin; 10 # p=1.000 r=0.276 f0.5=0.656 407+228 2020-03-17 f0.1=0.97 FA/10k=0.000
#pin -> pain; 100000 # p=0.986 r=0.351 f0.5=0.724 228+407 2020-03-17 f0.1=0.97 FA/10k=0.004 (commented due to precision < 0.99)
#paire -> père; 10000000 # p=0.976 r=0.653 f0.5=0.888 741+570 2020-03-17 f0.1=0.97 FA/10k=0.033 (commented due to precision < 0.99)
père -> paire; 100 # p=1.000 r=0.796 f0.5=0.951 570+741 2020-03-17 f0.1=1.00 FA/10k=0.000
peau -> pot; 10 # p=0.996 r=0.658 f0.5=0.903 486+392 2020-03-17 f0.1=0.99 FA/10k=0.001
pot -> peau; 100000 # p=1.000 r=0.831 f0.5=0.961 392+486 2020-03-17 f0.1=1.00 FA/10k=0.000
#pris -> prix; 10000 # p=0.959 r=0.338 f0.5=0.701 350+139 2020-03-17 f0.1=0.94 FA/10k=0.152 (commented due to precision < 0.99)
prix -> pris; 10 # p=1.000 r=0.880 f0.5=0.973 139+350 2020-03-17 f0.1=1.00 FA/10k=0.000
#quand -> quant; 10 # p=1.000 r=0.937 f0.5=0.987 303+474 2020-03-17 f0.1=1.00 FA/10k=0.000 commented out due to false positive
#quant -> quand; 100000 # p=0.988 r=0.785 f0.5=0.939 474+303 2020-03-17 f0.1=0.99 FA/10k=0.038 (commented due to precision < 0.99)
#sais -> sait; 10 # p=0.998 r=0.773 f0.5=0.943 359+797 2020-03-17 f0.1=1.00 FA/10k=0.003 commented due to false positives
#sait -> sais; 1000 # p=1.000 r=0.471 f0.5=0.816 797+359 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
tante -> tente; 1000 # p=0.997 r=0.758 f0.5=0.938 352+890 2020-03-17 f0.1=0.99 FA/10k=0.001
tente -> tante; 1000 # p=1.000 r=0.210 f0.5=0.571 890+352 2020-03-17 f0.1=0.96 FA/10k=0.000
toi -> toit; 10 # p=1.000 r=0.826 f0.5=0.959 497+728 2020-03-17 f0.1=1.00 FA/10k=0.000
#toit -> toi; 1000000 # p=0.985 r=0.400 f0.5=0.762 728+497 2020-03-17 f0.1=0.97 FA/10k=0.008 (commented due to precision < 0.99)
#trait -> très; 100000 # p=0.992 r=0.851 f0.5=0.960 567+596 2020-03-17 f0.1=0.99 FA/10k=0.061 (commented due to estimated false alarms rate > 0.025)
très -> trait; 10000 # p=1.000 r=0.519 f0.5=0.843 596+567 2020-03-17 f0.1=0.99 FA/10k=0.000
vain -> vin; 1000 # p=0.990 r=0.785 f0.5=0.941 810+506 2020-03-17 f0.1=0.99 FA/10k=0.005
vin -> vain; 100 # p=0.999 r=0.951 f0.5=0.989 506+810 2020-03-17 f0.1=1.00 FA/10k=0.001
vain -> vingt; 100000 # p=1.000 r=0.429 f0.5=0.790 810+445 2020-03-17 f0.1=0.99 FA/10k=0.000
vingt -> vain; 10 # p=1.000 r=0.969 f0.5=0.994 445+810 2020-03-17 f0.1=1.00 FA/10k=0.000
vin -> vingt; 10000 # p=0.995 r=0.432 f0.5=0.789 503+444 2020-03-17 f0.1=0.98 FA/10k=0.006
vingt -> vin; 100 # p=0.995 r=0.805 f0.5=0.950 444+503 2020-03-17 f0.1=0.99 FA/10k=0.006
#ver -> verre; 10000 # p=0.968 r=0.602 f0.5=0.863 439+708 2020-03-17 f0.1=0.96 FA/10k=0.009 (commented due to precision < 0.99)
verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
#ver -> vers; 1000000 # p=0.979 r=0.694 f0.5=0.905 438+481 2020-03-17 f0.1=0.98 FA/10k=0.080 (commented due to precision < 0.99)
#ver -> vert; 1000 # p=0.971 r=0.556 f0.5=0.845 438+666 2020-03-17 f0.1=0.96 FA/10k=0.007 (commented due to precision < 0.99)
#vert -> verre; 100 # p=0.991 r=0.745 f0.5=0.929 664+707 2020-03-17 f0.1=0.99 FA/10k=0.004
vert -> verre; 10000; # p=0.993, r=0.420, f0.5=0.780, 664+707, 3grams, 2020-03-17
verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
#vers -> verre; 100 # p=1.000 r=0.677 f0.5=0.913 474+706 2020-03-17 f0.1=1.00 FA/10k=0.000
vers -> verre; 10000; # p=1.000, r=0.380, f0.5=0.754, 474+706, 3grams, 2020-03-17
#vers -> vert; 100 # p=1.000 r=0.306 f0.5=0.688 474+661 2020-03-17 f0.1=0.98 FA/10k=0.000
#vert -> vers; 10000000 # p=0.991 r=0.667 f0.5=0.903 661+474 2020-03-17 f0.1=0.99 FA/10k=0.036 (commented due to estimated false alarms rate > 0.025)
verre -> vert; 100 # p=0.990 r=0.574 f0.5=0.864 707+664 2020-03-17 f0.1=0.98 FA/10k=0.004
#votre -> vôtre; 10 # p=0.992 r=0.939 f0.5=0.981 325+872 2020-03-17 f0.1=0.99 FA/10k=0.010 commented due to false positives
# commented out, maybe precision still not high enough for these?:
#an; en; 1000000 # p=0.997, r=0.803, 1000+529, 3grams, 2016-03-29
#à; a; 10000000 # p=0.999, r=0.768, 1000+1000, 3grams, 2016-03-29
#ces; ses; 100000 # p=0.986, r=0.218, 909+990, 3grams, 2016-03-29
#cor; corps; 10000000 # p=0.997, r=0.790, 21+437, 3grams, 2016-03-30
#cours; court; 10000000 # p=0.995, r=0.630, 866+301, 3grams, 2016-03-30
#maire; mer; 10000000 # p=0.998, r=0.692, 212+528, 3grams, 2016-03-30
#maire; mére; 10000000 # p=0.998, r=0.694, 212+994, 3grams, 2016-03-30
#mer; mére; 10000000 # p=0.999, r=0.518, 528+992, 3grams, 2016-03-30
#par; part; 10000000 # p=0.997, r=0.798, 978+843, 3grams, 2016-03-30
#parti; partie; 10000000 # p=0.999, r=0.694, 995+983, 3grams, 2016-03-30
#saint;sen; 10000000 # p=0.998, r=0.333, 997+249, 3grams, 2016-03-30
#sur; sûr; 10000000 # p=0.999, r=0.741, 999+469, 3grams, 2016-03-30
#tant; temps; 10000000 # p=0.999, r=0.587, 587+1000, 3grams, 2016-03-30