In [75]:
import csv
from matplotlib import pyplot as plt
import pandas as pd
from copy import copy

In [145]:
results_file = "../data/uk-hack-results/result.list"
imglist_file = "../data/uk-hack-results/Image_Annotation_Training_Data_Set.csv"

images = pd.read_csv(imglist_file)
results = pd.read_csv(results_file)
images['path'] = images['Image ID'].map(lambda x: "%d.jpg" % x)
# filter the unnecessary rows
images = images.query("Task=='subject-type'")

In [146]:
res = results.to_dict(orient='records')
imgs = images.to_dict(orient='records')

In [191]:
def get_tops(r, topk=9):
    r = copy(r)
    p = r['path']
    del r['path']
    return (p, sorted(r.items(), key=lambda x: x[1], reverse=True)[:topk])
tops_d = dict(map(get_tops, res))
imgs_d = dict(map(lambda x: (x['path'], x['Label']), imgs))

In [192]:
#dict(map(lambda x: (x['path'], x['Label']), imgs[:10]))
print("Missing results:", set(imgs_d.keys()) - set(tops_d.keys())) # there were JPEG parse errors from tensorflow
print("Missing Labels:", len(set(tops_d.keys()) - set(imgs_d.keys()))) # Some are missing labels

dataset = set(imgs_d.keys()) & set(tops_d.keys()) # intersection
print("Total labelled items for which we have results :", len(dataset))

('Missing results:', set(['2720.jpg', '2715.jpg', '2648.jpg', '2759.jpg', '2685.jpg']))
('Missing Labels:', 233)
('Total labelled items for which we have results :', 1192)


In [193]:
from collections import Counter
Counter(imgs_d.values())

Counter({'ammunition': 69,
         'conversion-parts': 3,
         'frame-lower-receiver': 7,
         'handgun': 392,
         'other': 173,
         'rifle': 272,
         'shotgun': 275,
         'silencer': 4,
         'skipped': 2})

In [194]:
# Keys are here http://image-net.org/challenges/LSVRC/2014/browse-synsets
filter(lambda k: 'gun' in k, results.keys())        

['revolver, six-gun, six-shooter', 'assault rifle, assault gun']

In [224]:
m = {
    'revolver, six-gun, six-shooter': 'handgun',
    'assault rifle, assault gun': 'rifle',
    'rifle': 'rifle'
}
revm = {
    'rifle': set(['rifle', 'assault rifle, assault gun']),
    'handgun':set(['revolver, six-gun, six-shooter'])
}

In [234]:
from collections import defaultdict as ddict

gun_classes = set(revm.keys()) # truth labels
def quantify(topk, debug=False):
    if debug:
        f = open('../data/uk-hack-results/error-analysis-top%d.txt' % topk, 'w')
    table = ddict(lambda: ddict(int))
    for img in list(dataset)[:]:
        truth = imgs_d[img]
        preds = set(list(map(lambda x: x[0], tops_d[img]))[:topk])
        if truth == 'shotgun':
            truth = 'rifle' # there is no explicit distinction between shotgun and rifle in ImageNet
        if truth not in gun_classes:
            truth = 'other'
        else:
            if revm.get(truth) & preds: #intersection of truth aliases and top predictions
                table[truth]['correct'] += 1
            else:
                table[truth]['error'] += 1
                if debug:
                    f.write("%s\n" % str((img, truth, '-->', preds)))
        table[truth]['total'] += 1
    if debug:
        f.close()
    return table

print("Top 1")
print(pd.DataFrame(data=quantify(1)).transpose())
print("\nTop 3")
print(pd.DataFrame(data=quantify(3)).transpose())
print("\nTop 5")
print(pd.DataFrame(data=quantify(5)).transpose())
print("\nTop 7")
print(pd.DataFrame(data=quantify(7, True)).transpose())


Top 1
         correct  error  total
handgun    202.0  190.0  392.0
other        NaN    NaN  255.0
rifle      438.0  107.0  545.0

Top 3
         correct  error  total
handgun    276.0  116.0  392.0
other        NaN    NaN  255.0
rifle      498.0   47.0  545.0

Top 5
         correct  error  total
handgun    309.0   83.0  392.0
other        NaN    NaN  255.0
rifle      505.0   40.0  545.0

Top 7
         correct  error  total
handgun    317.0   75.0  392.0
other        NaN    NaN  255.0
rifle      514.0   31.0  545.0


In [226]:
tab = pd.DataFrame(data=quantify(9)).transpose()
tab

Unnamed: 0,correct,error,total
handgun,330.0,62.0,392.0
other,,,255.0
rifle,262.0,8.0,545.0
