In [37]:
import os
import pandas as pd
import numpy as np
import json

In [32]:
ann_dir = "../../../Data/annotation"
train_fname = 'i2l_trainset_annotation.json'
val_fname = 'i2l_valset_annotation.json'
test_fname = 'i2l_testset_annotation.json'

In [5]:
def writeann(file):
    maxl = 0
    columns = ['name', 'label']
    names = []
    labels = []
    categories = []
    with open(file, 'r') as f:
        ann = json.loads(f.read())
        for key in ann.keys():
            img_dict = ann[key]
            img_name = img_dict['name']
            cat_list = img_dict['category']
            maxl = max(maxl, len(cat_list))
            categories.extend(cat_list)
            
            for cat in cat_list:
                names.append(img_name)
                labels.append(cat)
                
        data = pd.DataFrame(list(zip(names, labels)), columns=columns)
        print('Maximum categories per image', maxl)
        print('Total number of categories', len(set(categories)))
        print('All categories', set(categories))
#         data.to_csv('../Data/train_ann.csv', index = False)                   

In [9]:
train_fname = os.path.join(ann_dir, train_fname)
train_data = writeann(train_fname)

Maximum categories per image 9
Total number of categories 79
All categories {'frisbee', 'microwave', 'chair', 'orange', 'cup', 'keyboard', 'scissors', 'vase', 'hot dog', 'cell phone', 'stop sign', 'car', 'tv', 'laptop', 'skis', 'bottle', 'sink', 'broccoli', 'fork', 'book', 'horse', 'sports ball', 'tie', 'skateboard', 'truck', 'toothbrush', 'sandwich', 'cake', 'bowl', 'bicycle', 'dog', 'spoon', 'bird', 'apple', 'tennis racket', 'bear', 'suitcase', 'dining table', 'parking meter', 'couch', 'elephant', 'backpack', 'banana', 'remote', 'umbrella', 'sheep', 'clock', 'giraffe', 'wine glass', 'knife', 'bus', 'baseball bat', 'handbag', 'bed', 'cat', 'baseball glove', 'traffic light', 'kite', 'carrot', 'fire hydrant', 'hair drier', 'train', 'potted plant', 'teddy bear', 'donut', 'toaster', 'pizza', 'bench', 'surfboard', 'toilet', 'mouse', 'motorcycle', 'airplane', 'cow', 'zebra', 'refrigerator', 'snowboard', 'boat', 'oven'}


In [11]:
train_data = pd.read_csv('../Data/train_ann.csv')

# Checking training examples per-class
train_groups = train_data.groupby(['label'])
count_df = train_groups.agg('count')

print('Max Freq category:', train_data['label'].value_counts().idxmax())
print('Min Freq category:',train_data['label'].value_counts().idxmin())
print('Mean Freq:',train_data['label'].value_counts().mean())
print(count_df)


Max Freq category: chair
Min Freq category: bear
Mean Freq: 599.620253164557
                name
label               
airplane         141
apple            290
backpack        1394
banana           293
baseball bat     590
baseball glove   710
bear              44
bed              197
bench            925
bicycle          615
bird             397
boat             275
book            1164
bottle          2190
bowl            1220
broccoli         207
bus              400
cake             263
car             2139
carrot           247
cat              277
cell phone      1277
chair           2243
clock           1145
couch            568
cow               99
cup             2241
dining table     762
dog              583
donut            135
...              ...
pizza            127
potted plant     957
refrigerator     570
remote           646
sandwich         200
scissors         191
sheep             56
sink            1073
skateboard       463
skis             364
snowboard        248

In [17]:
def writeann_multilabel(file):
    maxl = 0
    columns = ['name', 'label']
    names = []
    labels = []
    with open(file, 'r') as f:
        ann = json.loads(f.read())
        for key in ann.keys():
            img_dict = ann[key]
            img_name = img_dict['name']
            cat_list = img_dict['category']
            maxl = max(maxl, len(cat_list))
            category = " ".join(cat_list)
            names.append(img_name)
            labels.append(category)
        
        data = pd.DataFrame(list(zip(names, labels)), columns=columns)
#         data.to_csv('../Data/train_ml_ann.csv', index = False)                   

In [18]:
ann_dir = "../../../Data/annotation"
train_fname = 'i2l_trainset_annotation.json'
val_fname = 'i2l_valset_annotation.json'
test_fname = 'i2l_testset_annotation.json'
train_fname = os.path.join(ann_dir, train_fname)
train_data = writeann_multilabel(train_fname)

                    name                   label
0  000000016164_left.png                     dog
1  000000471175_left.png                     dog
2  000000026654_left.png                     dog
3  000000158497_left.png  spoon refrigerator dog
4  000000390348_left.png                     dog
5  000000369190_left.png                     dog
6  000000151988_left.png              bottle dog
7  000000307993_left.png                book dog
8  000000007125_left.png                     dog
9  000000346965_left.png                     dog


In [54]:
def writevalann(file):
    maxl = 0
    columns = ['name', 'label', 'WC1', 'WC2', 'WC3', 'WC4']
    names = []
    labels = []
    WC1 = []
    WC2 = []
    WC3 = []
    WC4 = []
    with open(file, 'r') as f:
        ann = json.loads(f.read())
        for key in ann.keys():
            img_dict = ann[key]
            img_name = img_dict['name']
            correct_cand = img_dict['correct_candidate']
            wrong_cands = img_dict['wrong_candidate'] # four element always
            names.append(img_name)
            labels.append(correct_cand[0]) # only one element always
            WC1.append(wrong_cands[0])
            WC2.append(wrong_cands[1])
            WC3.append(wrong_cands[2])
            WC4.append(wrong_cands[3])
    
        data = pd.DataFrame(list(zip(names, labels, WC1, WC2, WC3, WC4)), columns=columns)
#         data.to_csv('../Data/val_ann.csv', index = False)

In [55]:
ann_dir = "../../../Data/annotation"
train_fname = 'i2l_trainset_annotation.json'
val_fname = 'i2l_valset_annotation.json'
test_fname = 'i2l_testset_annotation.json'
val_fname = os.path.join(ann_dir, val_fname)
val_data = writevalann(val_fname)


                    name          label       WC1         WC2            WC3  \
0  000000116358_left.png    sports ball     sheep       apple       backpack   
1  000000050179_left.png  tennis racket  airplane    elephant            tie   
2  000000523571_left.png   dining table       car     frisbee     cell phone   
3  000000531798_left.png           bowl      cake  wine glass     teddy bear   
4  000000531798_left.png         banana    toilet         cup  parking meter   

     WC4  
0  mouse  
1   kite  
2  pizza  
3  clock  
4   vase  


In [56]:
def writetestann(file):
    maxl = 0
    columns = ['name', 'label', 'WC1', 'WC2', 'WC3', 'WC4']
    names = []
    labels = []
    WC1 = []
    WC2 = []
    WC3 = []
    WC4 = []
    with open(file, 'r') as f:
        ann = json.loads(f.read())
        for key in ann.keys():
            img_dict = ann[key]
            img_name = img_dict['name']
            correct_cand = img_dict['correct_candidate']
            wrong_cands = img_dict['wrong_candidate'] # four element always
            names.append(img_name)
            labels.append(correct_cand[0]) # only one element always
            WC1.append(wrong_cands[0])
            WC2.append(wrong_cands[1])
            WC3.append(wrong_cands[2])
            WC4.append(wrong_cands[3])
    
        data = pd.DataFrame(list(zip(names, labels, WC1, WC2, WC3, WC4)), columns=columns)
        data.to_csv('../Data/test_ann.csv', index = False)

In [57]:
ann_dir = "../../../Data/annotation"
train_fname = 'i2l_trainset_annotation.json'
val_fname = 'i2l_valset_annotation.json'
test_fname = 'i2l_testset_annotation.json'
test_fname = os.path.join(ann_dir, test_fname)
test_data = writetestann(test_fname)

