-
Notifications
You must be signed in to change notification settings - Fork 0
/
semdis_eval.py
238 lines (210 loc) · 9.95 KB
/
semdis_eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This module computes the measures defined by the Semdis 2014
lexical substitution task for French
http://www.irit.fr/semdis2014
"""
__author__ = "Tim Van de Cruys, Ludovic Tanguy"
__version__ = "0.3"
import re
import sys
import argparse
import warnings
class SemdisEvaluation(object):
"""Main class that contains the evaluation measures. Class is
initialized with gold standard, test file is evaluated using the
function 'evaluate'"""
def __init__(self, goldFile):
self.goldFile = goldFile
self.goldDict, self.sumAnswerDict = self.__parseGoldStandard()
self.normalizationMapping = self.__parseNormalizationMapping('mapping.txt')
def __parseNormalizationMapping(self, mappingFile):
normalizationMapping = {}
# regex for mapping rules
mappingPattern = re.compile(r'^([\w ]+)\((\w+\.[a-z])\)-->([\w ]+)$', re.UNICODE)
for line in open(mappingFile):
line = line.decode('utf8').rstrip()
mappingMatch = re.match(mappingPattern, line)
try:
lexeltId = mappingMatch.group(2)
proposedForm = mappingMatch.group(1)
normalizedForm = mappingMatch.group(3)
except AttributeError:
#ignore comment lines
continue
# mappings in dictionary of dictionaries
try:
normalizationMapping[lexeltId][proposedForm] = normalizedForm
except KeyError:
normalizationMapping[lexeltId] = {}
normalizationMapping[lexeltId][proposedForm] = normalizedForm
return normalizationMapping
def __parseGoldStandard(self):
goldDict = {}
sumAnswerDict = {}
# define some useful regular expressions
splitPattern1 = re.compile(r' :: ')
idPattern = re.compile(r'(\w+\.[a-z]) ([0-9]+)', re.UNICODE)
answerPattern = re.compile(r'(.*) ([0-9]+)', re.UNICODE)
for line in open(self.goldFile):
#also strip off possible ";" at end of line
line = line.decode('utf8').strip().rstrip(';')
# split on " :: "
left,right = re.split(splitPattern1, line)
idMatch = re.match(idPattern, left)
# (word, numberid) tuple as key
keyTuple = (idMatch.group(1), int(idMatch.group(2)))
goldDict[keyTuple] = {}
answerList = right.split(';')
answerList = [answer.strip() for answer in answerList]
answerTupleList = []
# extract answers as (word, frequency) tuples
# multiword expressions are left as they are
sumAnswers = 0
for answer in answerList:
answerMatch = re.match(answerPattern, answer)
answerTuple = (answerMatch.group(1),int(answerMatch.group(2)))
# each item in goldDict contains dictionary with answer as key
# and frequency as value
goldDict[keyTuple][answerTuple[0]] = answerTuple[1]
sumAnswers += answerTuple[1]
# in addition, sumAnswerDict contains items with the sum off
# all answer frequencies as value
sumAnswerDict[keyTuple] = sumAnswers
return goldDict, sumAnswerDict
def __parseTestFile(self, testFile, normalize=True):
testDict = {}
splitPattern1 = re.compile(r' :: ')
idPattern = re.compile(r'(\w+\.[a-z]) ([0-9]+)', re.UNICODE)
compliantFlag = True
for line in open(testFile):
line = line.decode('utf8').strip().rstrip(';')
# split on " :: "
try:
left,right = re.split(splitPattern1, line)
except ValueError:
#possible empty list - in that case line ends with
#double semicolon
if line.endswith('::'):
#if id matches, include id with empty answerlist
idMatch = re.match(idPattern, line)
try:
# (word, numberid) tuple as key
keyTuple = (idMatch.group(1), int(idMatch.group(2)))
except AttributeError:
raise ValueError('Wrong file format')
testAnswerList = []
else:
idMatch = re.match(idPattern, left)
try:
# (word, numberid) tuple as key
keyTuple = (idMatch.group(1), int(idMatch.group(2)))
except AttributeError:
raise ValueError('Wrong file format')
else:
testDict[keyTuple] = {}
testAnswerList = right.split(';')
testAnswerList = [answer.strip() for answer in testAnswerList]
#when normalize flag is on, answers are mapped to their normalized form
if self.normalizationMapping.has_key(keyTuple[0]) and normalize:
for i,answer in enumerate(testAnswerList):
if self.normalizationMapping[keyTuple[0]].has_key(answer):
testAnswerList[i] = self.normalizationMapping[keyTuple[0]][answer]
#normalizing might cause duplicates
#we want to remove duplicates, but preserve the order of the list
testAnswerSet = set()
newTestAnswerList = []
for answer in testAnswerList:
if answer in testAnswerSet:
continue
testAnswerSet.add(answer)
newTestAnswerList.append(answer)
testAnswerList = newTestAnswerList
if not len(testAnswerList) == len(set(testAnswerList)):
warnings.warn('Some items contain duplicates, which is not allowed')
if not len(testAnswerList) == 10:
warnings.warn('Some items do not contain 10 guesses - or wrong file format; trying to proceed anyway')
testDict[keyTuple] = testAnswerList
return testDict
def evaluate(self, testFile, metric, normalize):
testDict = self.__parseTestFile(testFile, normalize)
print '\nEvaluating', len(testDict), 'instances..\n'
if metric == 'all':
self.__best(testDict)
self.__oot(testDict)
elif metric == 'best':
self.__best(testDict)
elif metric == 'oot':
self.__oot(testDict)
elif metric == 'detail':
self.__detail(testDict)
else:
raise NotImplementedError('Unsupported evaluation method')
def __best(self,testDict):
sumNumerator = 0
for key, valueList in testDict.items():
# if empty valuelist, score for this item is zero, so we
# continue
if not valueList:
continue
else:
bestAnswer = valueList[0]
if self.goldDict[key].has_key(bestAnswer):
score = self.goldDict[key][bestAnswer] / float(self.sumAnswerDict[key])
sumNumerator += score
print 'Best score:', sumNumerator / float(len(testDict))
def __oot(self,testDict):
sumNumerator = 0
for key, valueList in testDict.items():
# if empty valuelist, score for this item is zero, so we
# continue
if not valueList:
continue
else:
allScore = 0
if len(valueList) > 10:
warnings.warn('Some items contain more than 10 guesses - only taking first 10 into account')
valueList = valueList[0:10]
for answer in valueList:
if self.goldDict[key].has_key(answer):
score = self.goldDict[key][answer]
allScore += score
sumNumerator += allScore / float(self.sumAnswerDict[key])
print 'OOT score: ', sumNumerator / float(len(testDict))
def __detail (self, testDict):
#Gives details for each sentence (ID, lexelt, number of
#proposed words, score for best word, oot score)
#file name used as submission name
name=args.testfile
name=re.sub("^.*\/", "",name)
name=re.sub("\..*?$","",name)
print "ID\tlexelt\t",name+"-Nreponses","\t",name+"-Best","\t",name+"-OOT"
for key, valueList in sorted(testDict.items()):
try:
bestAnswer = valueList[0]
except IndexError:
bestAnswer = ''
bestScore = float(0)
if self.goldDict[key].has_key(bestAnswer):
bestScore = self.goldDict[key][bestAnswer] / float(self.sumAnswerDict[key])
allScore = float(0)
for answer in valueList:
if self.goldDict[key].has_key(answer):
score = self.goldDict[key][answer]
allScore += score
ootScore= allScore / float(self.sumAnswerDict[key])
print key[1],"\t", key[0].encode('utf8'), "\t", len(valueList), "\t", bestScore, "\t", ootScore
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='This script calculates \
the evaluation measures for the Semdis 2014 lexical substitution task for French \
- http://www.irit.fr/semdis2014/')
parser.add_argument('-g','--goldfile', help='Gold standard file',required=True)
parser.add_argument('-t','--testfile',help='File with test results', required=True)
parser.add_argument('-m','--measure',help='One of "best", "oot", "all", "detail" (default = all)',
required=False, default='all')
parser.add_argument('-nn','--nonormalize',help='Do not apply normalization mapping',
required=False, action="store_false")
args = parser.parse_args()
s = SemdisEvaluation(args.goldfile)
s.evaluate(args.testfile,args.measure,args.nonormalize)