/
metrics.doctest
196 lines (169 loc) · 6.34 KB
/
metrics.doctest
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
.. Copyright (C) 2001-2012 NLTK Project
.. For license information, see LICENSE.TXT
=======
Metrics
=======
The `nltk.metrics` package provides a variety of *evaluation measures*
which can be used for a wide variety of NLP tasks.
>>> from nltk.metrics import *
>>> from nltk.test.doctest_utils import *
------------------
Standard IR Scores
------------------
We can use standard scores from information retrieval to test the
performance of taggers, chunkers, etc.
>>> reference = 'DET NN VB DET JJ NN NN IN DET NN'.split()
>>> test = 'DET VB VB DET NN NN NN IN DET NN'.split()
>>> float_equal(accuracy(reference, test), 0.8)
True
The following measures apply to sets:
>>> reference_set = set(reference)
>>> test_set = set(test)
>>> precision(reference_set, test_set)
1.0
>>> float_equal(recall(reference_set, test_set), 0.8)
True
>>> float_equal(f_measure(reference_set, test_set), 0.88888888888)
True
Measuring the likelihood of the data, given probability distributions:
>>> from nltk import FreqDist, MLEProbDist
>>> pdist1 = MLEProbDist(FreqDist("aldjfalskfjaldsf"))
>>> pdist2 = MLEProbDist(FreqDist("aldjfalssjjlldss"))
>>> float_equal(log_likelihood(['a', 'd'], [pdist1, pdist2]), -2.707518749639422)
True
----------------
Distance Metrics
----------------
String edit distance (Levenshtein):
>>> edit_distance("rain", "shine")
3
Other distance measures:
>>> s1 = set([1,2,3,4])
>>> s2 = set([3,4,5])
>>> binary_distance(s1, s2)
1.0
>>> float_equal(jaccard_distance(s1, s2), 0.6)
True
>>> float_equal(masi_distance(s1, s2), 0.5)
True
----------------------
Miscellaneous Measures
----------------------
Rank Correlation works with two dictionaries mapping keys to ranks. The dictionaries should have the same set of keys.
>>> spearman_correlation({'e':1, 't':2, 'a':3}, {'e':1, 'a':2, 't':3})
0.5
Windowdiff uses a sliding window in comparing two segmentations of the same input (e.g. tokenizations, chunkings).
Segmentations are represented using strings of zeros and ones.
>>> s1 = "00000010000000001000000"
>>> s2 = "00000001000000010000000"
>>> s3 = "00010000000000000001000"
>>> windowdiff(s1, s1, 3)
0
>>> windowdiff(s1, s2, 3)
4
>>> windowdiff(s2, s3, 3)
16
----------------
Confusion Matrix
----------------
>>> reference = 'This is the reference data. Testing 123. aoaeoeoe'
>>> test = 'Thos iz_the rifirenci data. Testeng 123. aoaeoeoe'
>>> print ConfusionMatrix(reference, test)
| . 1 2 3 T _ a c d e f g h i n o r s t z |
--+-------------------------------------------+
|<8>. . . . . 1 . . . . . . . . . . . . . . |
. | .<2>. . . . . . . . . . . . . . . . . . . |
1 | . .<1>. . . . . . . . . . . . . . . . . . |
2 | . . .<1>. . . . . . . . . . . . . . . . . |
3 | . . . .<1>. . . . . . . . . . . . . . . . |
T | . . . . .<2>. . . . . . . . . . . . . . . |
_ | . . . . . .<.>. . . . . . . . . . . . . . |
a | . . . . . . .<4>. . . . . . . . . . . . . |
c | . . . . . . . .<1>. . . . . . . . . . . . |
d | . . . . . . . . .<1>. . . . . . . . . . . |
e | . . . . . . . . . .<6>. . . 3 . . . . . . |
f | . . . . . . . . . . .<1>. . . . . . . . . |
g | . . . . . . . . . . . .<1>. . . . . . . . |
h | . . . . . . . . . . . . .<2>. . . . . . . |
i | . . . . . . . . . . 1 . . .<1>. 1 . . . . |
n | . . . . . . . . . . . . . . .<2>. . . . . |
o | . . . . . . . . . . . . . . . .<3>. . . . |
r | . . . . . . . . . . . . . . . . .<2>. . . |
s | . . . . . . . . . . . . . . . . . .<2>. 1 |
t | . . . . . . . . . . . . . . . . . . .<3>. |
z | . . . . . . . . . . . . . . . . . . . .<.>|
--+-------------------------------------------+
(row = reference; col = test)
<BLANKLINE>
>>> cm = ConfusionMatrix(reference, test)
>>> print cm.pp(sort_by_count=True)
| e a i o s t . T h n r 1 2 3 c d f g _ z |
--+-------------------------------------------+
|<8>. . . . . . . . . . . . . . . . . . 1 . |
e | .<6>. 3 . . . . . . . . . . . . . . . . . |
a | . .<4>. . . . . . . . . . . . . . . . . . |
i | . 1 .<1>1 . . . . . . . . . . . . . . . . |
o | . . . .<3>. . . . . . . . . . . . . . . . |
s | . . . . .<2>. . . . . . . . . . . . . . 1 |
t | . . . . . .<3>. . . . . . . . . . . . . . |
. | . . . . . . .<2>. . . . . . . . . . . . . |
T | . . . . . . . .<2>. . . . . . . . . . . . |
h | . . . . . . . . .<2>. . . . . . . . . . . |
n | . . . . . . . . . .<2>. . . . . . . . . . |
r | . . . . . . . . . . .<2>. . . . . . . . . |
1 | . . . . . . . . . . . .<1>. . . . . . . . |
2 | . . . . . . . . . . . . .<1>. . . . . . . |
3 | . . . . . . . . . . . . . .<1>. . . . . . |
c | . . . . . . . . . . . . . . .<1>. . . . . |
d | . . . . . . . . . . . . . . . .<1>. . . . |
f | . . . . . . . . . . . . . . . . .<1>. . . |
g | . . . . . . . . . . . . . . . . . .<1>. . |
_ | . . . . . . . . . . . . . . . . . . .<.>. |
z | . . . . . . . . . . . . . . . . . . . .<.>|
--+-------------------------------------------+
(row = reference; col = test)
<BLANKLINE>
>>> print cm.pp(sort_by_count=True, truncate=10)
| e a i o s t . T h |
--+---------------------+
|<8>. . . . . . . . . |
e | .<6>. 3 . . . . . . |
a | . .<4>. . . . . . . |
i | . 1 .<1>1 . . . . . |
o | . . . .<3>. . . . . |
s | . . . . .<2>. . . . |
t | . . . . . .<3>. . . |
. | . . . . . . .<2>. . |
T | . . . . . . . .<2>. |
h | . . . . . . . . .<2>|
--+---------------------+
(row = reference; col = test)
<BLANKLINE>
>>> print cm.pp(sort_by_count=True, truncate=10, values_in_chart=False)
| 1 |
| 1 2 3 4 5 6 7 8 9 0 |
---+---------------------+
1 |<8>. . . . . . . . . |
2 | .<6>. 3 . . . . . . |
3 | . .<4>. . . . . . . |
4 | . 1 .<1>1 . . . . . |
5 | . . . .<3>. . . . . |
6 | . . . . .<2>. . . . |
7 | . . . . . .<3>. . . |
8 | . . . . . . .<2>. . |
9 | . . . . . . . .<2>. |
10 | . . . . . . . . .<2>|
---+---------------------+
(row = reference; col = test)
Value key:
1:
2: e
3: a
4: i
5: o
6: s
7: t
8: .
9: T
10: h
<BLANKLINE>