forked from cocodataset/cocoapi
-
Notifications
You must be signed in to change notification settings - Fork 64
/
cocostuffeval.py
278 lines (240 loc) · 12.1 KB
/
cocostuffeval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
__author__ = 'hcaesar'
import numpy as np
import datetime
import time
from cocostuffhelper import cocoSegmentationToSegmentationMap
class COCOStuffeval:
# Internal functions for evaluating stuff segmentations against a ground-truth.
#
# The usage for COCOStuffeval is as follows:
# cocoGt=..., cocoRes=... # load dataset and results
# E = COCOStuffeval(cocoGt, cocoRes) # initialize COCOStuffeval object
# E.params.imgIds = ... # set parameters as desired
# E.evaluate() # run per image evaluation
# E.summarize() # display summary metrics of results
# For example usage see pycocostuffEvalDemo.py.
#
# Note: Our evaluation has to take place on all classes. If we remove one class
# from evaluation, it is not clear what should happen to pixels for which a
# method output that class (but where the ground-truth has a different class).
#
# The evaluation parameters are as follows (defaults in brackets):
# imgIds - [all] N img ids to use for evaluation
#
# evaluate(): evaluates segmentations on each image and
# stores the results in the dictionary 'eval' with fields:
# params - parameters used for evaluation
# date - date evaluation was performed
# confusion - confusion matrix used for the final metrics
#
# summarize(): computes and prints the evaluation metrics.
# results are printed to stdout and stored in:
# stats - a numpy array of the evaluation metrics (mean IOU etc.)
# statsClass - a dict that stores per-class results in ious and maccs
#
# See also coco, mask, pycocostuffDemo, pycocostuffEvalDemo
#
# Microsoft COCO Toolbox. version 2.0
# Data, paper, and tutorials available at: http://mscoco.org/
# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
# Licensed under the Simplified BSD License [see coco/license.txt]
def __init__(self, cocoGt, cocoRes, stuffStartId=92, stuffEndId=182, addOther=True):
'''
Initialize COCOStuffeval using COCO APIs for gt and dt
:param cocoGt: COCO object with ground truth annotations
:param cocoRes: COCO object with detection results
:param stuffStartId: id of the first stuff class
:param stuffEndId: id of the last stuff class
:param addOther: whether to use a other class
:return: None
'''
self.cocoGt = cocoGt # Ground truth COCO API
self.cocoRes = cocoRes # Result COCO API
self.stuffStartId = stuffStartId # Id of the first stuff class
self.stuffEndId = stuffEndId # Id of the last stuff class
self.addOther = addOther # Whether to add a class that subsumes all thing classes
self.eval = {} # Accumulated evaluation results
self.confusion = [] # Confusion matrix that all metrics are computed on
self.stats = [] # Result summarization
self.statsClass = {} # Per-class results
self.params = Params() # Evaluation parameters
self.params.imgIds = sorted(cocoGt.getImgIds()) # By default we use all images from the GT file
self.catIds = range(stuffStartId, stuffEndId+addOther+1) # Take into account all stuff
# classes and one 'other' class
def evaluate(self):
'''
Run per image evaluation on given images and store results in self.confusion.
Note that this can take up to several hours.
:return: None
'''
# Reset eval and print message
tic = time.time()
imgIds = self.params.imgIds
print('Evaluating stuff segmentation on %d images and %d classes...' \
% (len(imgIds), len(self.catIds)))
# Check that all images in params occur in GT and results
gtImgIds = sorted(set(self.cocoGt.getImgIds()))
resImgIds = sorted(set([a['image_id'] for a in self.cocoRes.anns.values()]))
missingInGt = [p for p in imgIds if p not in gtImgIds]
missingInRes = [p for p in imgIds if p not in resImgIds]
if len(missingInGt) > 0:
raise Exception('Error: Some images specified in imgIds do not occur in the GT: %s' % missingInGt)
if len(missingInRes) > 0:
raise Exception('Error: %d evaluation images not found in the result!' % len(missingInRes))
# Create confusion matrix
labelCount = max([c for c in self.cocoGt.cats])
confusion = np.zeros((labelCount, labelCount))
for i, imgId in enumerate(imgIds):
if i+1 == 1 or i+1 == len(imgIds) or (i+1) % 10 == 0:
print('Evaluating image %d of %d: %d' % (i+1, len(imgIds), imgId))
confusion = self._accumulateConfusion(self.cocoGt, self.cocoRes, confusion, imgId)
self.confusion = confusion
# Set eval struct to be used later
self.eval = {
'params': self.params,
'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
'confusion': self.confusion
}
toc = time.time()
print('DONE (t={:0.2f}s).'.format(toc-tic))
def _accumulateConfusion(self, cocoGt, cocoRes, confusion, imgId):
'''
Accumulate the pixels of the current image in the specified confusion matrix.
Note: For simplicity we do not map the labels to range [0, L-1],
but keep the original indices when indexing 'confusion'.
:param cocoGt: COCO object with ground truth annotations
:param cocoRes: COCO object with detection results
:param confusion: confusion matrix that will be modified
:param imgId: id of the current image
:return: confusion (modified confusion matrix)
'''
# Combine all annotations of this image in labelMapGt and labelMapRes
labelMapGt = cocoSegmentationToSegmentationMap(cocoGt, imgId, includeCrowd=False)
labelMapRes = cocoSegmentationToSegmentationMap(cocoRes, imgId, includeCrowd=False)
# Check that the result has only valid labels
invalidLabels = [l for l in np.unique(labelMapRes) if l not in self.catIds]
if len(invalidLabels) > 0:
raise Exception('Error: Invalid classes predicted in the result file: %s. Please insert only labels in the range [%d, %d]!'
% (str(invalidLabels), min(self.catIds), max(self.catIds)))
# Filter labels that are not in catIds (includes the 0 label)
valid = np.reshape(np.in1d(labelMapGt, self.catIds), labelMapGt.shape)
validGt = labelMapGt[valid].astype(int)
validRes = labelMapRes[valid].astype(int)
# Gather annotations in confusion matrix
#for g, d in zip(validGt, validRes):
# confusion[g-1, d-1] += 1
# Much faster version using np.unique
n = confusion.shape[0] + 1 # Arbitrary number > labelCount
map_for_count = validGt * n + validRes
vals, cnts = np.unique(map_for_count, return_counts=True)
for v, c in zip(vals, cnts):
g = v // n
d = v % n
confusion[g - 1, d - 1] += c
return confusion
def summarize(self):
'''
Compute and display the metrics for leaf nodes and super categories.
:return: tuple of (general) stats and (per-class) statsClass
'''
# Check if evaluate was run and then compute performance metrics
if not self.eval:
raise Exception('Error: Please run evaluate() first!')
# Compute confusion matrix for supercategories
confusion = self.confusion
confusionSup = self._getSupCatConfusion(confusion)
# Compute performance
[miou, fwiou, macc, pacc, ious, maccs] = self._computeMetrics(confusion)
[miouSup, fwiouSup, maccSup, paccSup, iousSup, maccsSup] = self._computeMetrics(confusionSup)
# Store metrics
stats = np.zeros((8,))
stats[0] = self._printSummary('Mean IOU', 'leaves', miou)
stats[1] = self._printSummary('FW IOU', 'leaves', fwiou)
stats[2] = self._printSummary('Mean accuracy', 'leaves', macc)
stats[3] = self._printSummary('Pixel accuracy', 'leaves', pacc)
stats[4] = self._printSummary('Mean IOU', 'supercats', miouSup)
stats[5] = self._printSummary('FW IOU', 'supercats', fwiouSup)
stats[6] = self._printSummary('Mean accuracy', 'supercats', maccSup)
stats[7] = self._printSummary('Pixel accuracy', 'supercats', paccSup)
# Store statsClass
statsClass = {
'ious': ious,
'maccs': maccs,
'iousSup': iousSup,
'maccsSup': maccsSup
}
self.stats, self.statsClass = stats, statsClass
return stats, statsClass
def _getSupCatConfusion(self, confusion):
'''
Maps the leaf category confusion matrix to a super category confusion matrix.
:param confusion: leaf category confusion matrix
:return: confusionSup (super category confusion matrix)
'''
# Retrieve supercategory mapping
supCats = [c['supercategory'] for c in self.cocoGt.cats.values()]
supCatsUn = sorted(set(supCats))
keys = supCatsUn
vals = range(0, len(supCatsUn))
supCatMap = dict(zip(keys, vals))
supCatIds = [supCatMap[s] for s in supCats]
supCatCount = len(supCatsUn)
# Compute confusion matrix for supercategories
confusionSup = np.zeros((supCatCount, supCatCount))
for supCatIdA in xrange(0, supCatCount):
for supCatIdB in xrange(0, supCatCount):
curLeavesA = np.where([s == supCatIdA for s in supCatIds])[0] + self.stuffStartId - 1
curLeavesB = np.where([s == supCatIdB for s in supCatIds])[0] + self.stuffStartId - 1
confusionLeaves = confusion[curLeavesA, :]
confusionLeaves = confusionLeaves[:, curLeavesB]
confusionSup[supCatIdA, supCatIdB] = confusionLeaves.sum()
assert confusionSup.sum() == confusion.sum()
return confusionSup
def _computeMetrics(self, confusion):
'''
Compute evaluation metrics given a confusion matrix.
:param confusion: any confusion matrix
:return: tuple (miou, fwiou, macc, pacc, ious, maccs)
'''
# Init
labelCount = confusion.shape[0]
ious = np.zeros((labelCount))
maccs = np.zeros((labelCount))
ious[:] = np.NAN
maccs[:] = np.NAN
# Get true positives, positive predictions and positive ground-truth
total = confusion.sum()
if total <= 0:
raise Exception('Error: Confusion matrix is empty!')
tp = np.diagonal(confusion)
posPred = confusion.sum(axis=0)
posGt = confusion.sum(axis=1)
# Check which classes have elements
valid = posGt > 0
iousValid = np.logical_and(valid, posGt + posPred - tp > 0)
# Compute per-class results and frequencies
ious[iousValid] = np.divide(tp[iousValid], posGt[iousValid] + posPred[iousValid] - tp[iousValid])
maccs[valid] = np.divide(tp[valid], posGt[valid])
freqs = np.divide(posGt, total)
# Compute evaluation metrics
miou = np.mean(ious[iousValid])
fwiou = np.sum(np.multiply(ious[iousValid], freqs[iousValid]))
macc = np.mean(maccs[valid])
pacc = tp.sum() / total
return miou, fwiou, macc, pacc, ious, maccs
def _printSummary(self, titleStr, classStr, val):
'''
Prints the current metric title, class type and value.
:param titleStr: a string that represents the name of the metric
:param classStr: the type of classes the metric was performed on (leaves/supercategories)
:param val: the value of the metric
'''
iStr = ' {:<14} @[ classes={:>8s} ] = {:0.4f}'
print(iStr.format(titleStr, classStr, val))
return val
class Params:
'''
Params for coco stuff evaluation api
'''
def __init__(self):
self.imgIds = []