In [1]:
%matplotlib inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [2]:
from bokeh.io import push_notebook, show, output_notebook
from bokeh.plotting import figure, output_file
from bokeh.models import ColumnDataSource
from bokeh.palettes import all_palettes
output_notebook()

In [3]:
tools_list = "pan," \
             "hover," \
             "box_select," \
             "lasso_select," \
             "box_zoom, " \
             "wheel_zoom," \
             "reset," \
             "save," \
             "help"

In [4]:
def show_simple_bar(title, x_axis_label, y_axis_label, x_range, y_range, xaxis_orientation = None, need_show = True, need_return = True):
    
    # source build
    source = ColumnDataSource(data=dict(X=x_range, Y=y_range, color=all_palettes['Paired'][len(x_range)]))

    # Create the blank plot
    p = figure(title = title,
               x_axis_label = x_axis_label, 
               y_axis_label = y_axis_label,
              x_range=x_range, y_range=(0, max(y_range) + 1),
               tools=tools_list, tooltips="@%s: @% s"  % ('X', 'Y'),
              plot_width=500, plot_height=300)

    p.vbar(x='X', top='Y', width=0.9, alpha = 0.4,color='color', source=source) #, legend=x, )
    if xaxis_orientation is not None:
        p.xaxis.major_label_orientation = math.pi/4

    p.xgrid.grid_line_color = None
    p.y_range.start = 0
    # p.legend.orientation = legend_ori
    # p.legend.location = "top_center"

    if need_show:
        # output_notebook()
        show(p)
    if need_return:
        return p
    
def show_simple_horizontal_bar(title, x_axis_label, y_axis_label, x_range, y_range, need_show = True, need_return = True):
    # output_notebook()
    
    # source build
    source = ColumnDataSource(data=dict(X=x_range, Y=y_range, color=all_palettes['Paired'][len(x_range)]))

    # Create the blank plot
    p = figure(title = title,
               y_axis_label = x_axis_label, 
               x_axis_label = y_axis_label,
              y_range=x_range, x_range=(0, max(y_range) + 1),
               tools=tools_list, tooltips="@%s: @% s"  % ('X', 'Y'),
              plot_width=500, plot_height=300)

    p.hbar(y='X', right='Y', height=0.9, alpha = 0.4,color='color', source=source) #, legend=x, )

    p.ygrid.grid_line_color = None
    p.x_range.start = 0
    # p.legend.orientation = legend_ori
    # p.legend.location = "top_center"

    if need_show:
        # output_notebook()
        show(p)
    if need_return:
        return p

In [5]:
dataDir='.'
dataType='train2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
output_file_name = "output/coco_matrix_{}.html".format(dataType)
output_title="coco_matrix_{}".format(dataType)

In [6]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

loading annotations into memory...
Done (t=24.23s)
creating index...
index created!


In [7]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
nms=[cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))

COCO categories: 
person bicycle car motorcycle airplane bus train truck boat traffic light fire hydrant stop sign parking meter bench bird cat dog horse sheep cow elephant bear zebra giraffe backpack umbrella handbag tie suitcase frisbee skis snowboard sports ball kite baseball bat baseball glove skateboard surfboard tennis racket bottle wine glass cup fork knife spoon bowl banana apple sandwich orange broccoli carrot hot dog pizza donut cake chair couch potted plant bed dining table toilet tv laptop mouse remote keyboard cell phone microwave oven toaster sink refrigerator book clock vase scissors teddy bear hair drier toothbrush

COCO supercategories: 
kitchen food electronic indoor furniture vehicle animal accessory appliance outdoor sports person


In [8]:
cats

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},

In [9]:
# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['person','dog','skateboard']);
catIds

[1, 18, 41]

In [10]:
imgIds = coco.getImgIds(catIds=catIds );
imgIds

[379520,
 55809,
 46978,
 438915,
 500100,
 209028,
 89350,
 341623,
 306440,
 76937,
 372874,
 22796,
 282768,
 360595,
 366484,
 410005,
 172310,
 449560,
 557721,
 22427,
 365340,
 363549,
 354721,
 47909,
 125351,
 312233,
 28842,
 304173,
 241837,
 560561,
 88754,
 429236,
 536244,
 380724,
 55478,
 23737,
 377278,
 411583,
 476736,
 461632,
 463044,
 569158,
 102599,
 231240,
 296782,
 48595,
 558036,
 518613,
 441470,
 493020,
 297692,
 162396,
 547421,
 176229,
 392166,
 251368,
 255209,
 353644,
 155117,
 551795,
 556149,
 45175,
 568187,
 158718,
 575615]

In [11]:
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
img

{'license': 1,
 'file_name': '000000551795.jpg',
 'coco_url': 'http://images.cocodataset.org/train2017/000000551795.jpg',
 'height': 640,
 'width': 433,
 'date_captured': '2013-11-18 09:32:01',
 'flickr_url': 'http://farm5.staticflickr.com/4039/4540176064_5e360a2d6c_z.jpg',
 'id': 551795}

In [12]:
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
annIds

[10151, 474443, 492006, 536836, 537320, 1861410, 2030638]

In [13]:
anns = coco.loadAnns(annIds)
# anns

In [14]:
# coco.showAnns(anns)
p = figure(plot_width=500, plot_height=500)

max_dimension = 500
color_list = ['black', 'red', 'green']
cat_list = [1, 18, 41]

polygons_xs = []
polygons_ys = []
color = []
for ann in anns:
    if 'segmentation' in ann:
        if type(ann['segmentation']) == list:
            # polygon
            for seg in ann['segmentation']:                
                poly = np.array(seg).reshape((int(len(seg)/2), 2))
                poly_x = max_dimension - poly[:,0] / img['width'] * max_dimension
                poly_y = max_dimension - poly[:,1] / img['height'] * max_dimension
                polygons_xs.append(list(poly_x))
                polygons_ys.append(list(poly_y))
                color.append(color_list[cat_list.index(ann['category_id'])])
p.patches(polygons_xs, polygons_ys, color=color, alpha=0.1, line_width=0)

show(p)

In [15]:
# load and display image
# I = io.imread('%s/images/%s/%s'%(dataDir,dataType,img['file_name']))
# use url to load image

# I = io.imread(img['coco_url'])
# plt.axis('off')
# plt.imshow(I)
# plt.show()

In [16]:
for super_cat in nms:
    catIds = coco.getCatIds(supNms=super_cat);
    print(len(catIds))

7
10
6
7
6
8
10
5
5
5
10
1


In [17]:
super_cat_total_count_list = []
for super_cat in nms:
    catIds = coco.getCatIds(supNms=super_cat);
    super_cat_total_count = 0
    for catId in catIds:
        imgIds = coco.getImgIds(catIds=catId);
        super_cat_total_count += len(imgIds)
    super_cat_total_count_list.append(super_cat_total_count)
super_cat_total_count_list

[38744,
 20351,
 19955,
 17867,
 40521,
 38683,
 25739,
 22549,
 11679,
 13859,
 28934,
 64115]

In [18]:
# Super Cats - bar graph (bokeh)

super_cat_bar_graph = show_simple_horizontal_bar(title = 'Super Categories',
                                                 x_axis_label = "Super Categories",
                                                 y_axis_label = "Number",
                                                 x_range = list(nms),
                                                 y_range = super_cat_total_count_list
                                                )

In [19]:
from bokeh.models import HoverTool, Panel, Tabs
import random

tab_list = list()

for super_cat in nms:
    cat_name_list = []
    cat_count_list = []
    for cat in cats:
        if cat['supercategory'] == super_cat:
            imgIds = coco.getImgIds(catIds=cat['id']);
            cat_name_list.append(cat['name'])
            cat_count_list.append(len(imgIds))
    if len(cat_name_list) == 1:
        cat_name_list.append(' ')
        cat_count_list.append(0)
        cat_name_list.append('  ')
        cat_count_list.append(0)
        # print(cat_name_list)
        # print(cat_count_list)
    # print(super_cat)
    p = show_simple_horizontal_bar(title = super_cat,
                                   x_axis_label = "Sub Categories",
                                   y_axis_label = "Number",
                                   x_range = cat_name_list,
                                   y_range = cat_count_list,
                                   need_show = False
                                  )
    tab_list.append(Panel(child=p, title=super_cat))

tabs = Tabs(tabs=tab_list)
show(tabs)

In [20]:
sup_cats = list(set([cat['supercategory'] for cat in cats]))

names = [cat['name'] for cat in cats]

N = len(cats)
counts = np.zeros((N, N))
id_source = 0
for cat_source in cats:    
    id_target = 0
    for cat_target in cats:
        catIds = coco.getCatIds(catNms=[cat_source['name'], cat_target['name']]);
        imgIds = coco.getImgIds(catIds=catIds );        
        counts[id_source, id_target] = len(imgIds)
        counts[id_target, id_source] = len(imgIds)
        id_target += 1
    id_source += 1
# print(np.mean(np.true_divide(counts.sum(1),(counts!=0).sum(1))))

colormap = all_palettes['Paired'][len(sup_cats)]

xname = []
yname = []
xsuper = []
ysuper = []
color = []
alpha = []
for i, cat1 in enumerate(cats):
    for j, cat2 in enumerate(cats):
        xname.append(cat1['name'])
        yname.append(cat2['name'])
        xsuper.append(cat1['supercategory'])
        ysuper.append(cat2['supercategory'])

        alpha.append(min(0.5 * counts[i,j]/np.mean(np.true_divide(counts.sum(1),(counts!=0).sum(1))), 0.9) + 0.1)

        if cat1['supercategory'] == cat2['supercategory']:
            color.append(colormap[sup_cats.index(cat1['supercategory'])])
        else:
            color.append('lightgrey')

data=dict(
    xname=xname,
    yname=yname,
    xsuper=xsuper,
    ysuper=ysuper,
    colors=color,
    alphas=alpha,
    count=counts.flatten(),
)

matrix_p = figure(title="coco_matrix",
           x_axis_location="above", tools=tools_list,
           x_range=list(reversed(names)), y_range=names,
           tooltips = [('supercategory', '@ysuper, @xsuper'), ('names', '@yname, @xname'), ('count', '@count')])

matrix_p.plot_width = 1000
matrix_p.plot_height = 1000
matrix_p.grid.grid_line_color = None
matrix_p.axis.axis_line_color = None
matrix_p.axis.major_tick_line_color = None
matrix_p.axis.major_label_text_font_size = "9pt"
matrix_p.axis.major_label_standoff = 0
matrix_p.xaxis.major_label_orientation = np.pi/3

matrix_p.rect('xname', 'yname', 0.9, 0.9, source=data,
       color='colors', alpha='alphas', line_color=None,
       hover_line_color='black', hover_color='colors')


show(matrix_p)

In [21]:
heat_grid_list = list()
heat_tab_list = list()
for cat in cats[:100]:
    polygons_xs = []
    polygons_ys = []
    heat_p = figure(plot_width=500, plot_height=500, title=cat['name'])
    max_dimension = 500    
    imgIds = coco.getImgIds(catIds=cat['id']);
    
    for imgId in imgIds[:200]:
        img = coco.loadImgs(imgId)[0]
        annIds = coco.getAnnIds(imgIds=imgId);
        # print(annIds)
        anns = coco.loadAnns(annIds)
        # print(anns)
        for ann in anns:
            if 'segmentation' in ann:
                if type(ann['segmentation']) == list and ann['category_id'] == cat['id']:
                    # polygon
                    for seg in ann['segmentation']:
                        poly = np.array(seg).reshape((int(len(seg)/2), 2))
                        poly_x = max_dimension - poly[:,0] / img['width'] * max_dimension
                        poly_y = max_dimension - poly[:,1] / img['height'] * max_dimension
                        polygons_xs.append(list(poly_x))
                        polygons_ys.append(list(poly_y))  
    heat_p.patches(polygons_xs, polygons_ys, color='black', alpha=1/len(imgIds[:200]), line_width=0)   
    heat_tab_list.append(Panel(child=heat_p, title=cat['name']))
    heat_grid_list.append(heat_p)

heat_tabs = Tabs(tabs=heat_tab_list)
# show(heat_tabs)

In [22]:
# initialize COCO api for caption annotations
annFile = '{}/annotations/captions_{}.json'.format(dataDir,dataType)
coco_caps=COCO(annFile)

loading annotations into memory...
Done (t=1.90s)
creating index...
index created!


In [23]:
imgIds = coco.getImgIds(imgIds = [324158])
annIds = coco_caps.getAnnIds(imgIds=imgIds);
anns = coco_caps.loadAnns(annIds)
anns
# coco_caps.showAnns(anns)

[]

In [24]:
import string
for ann in anns:
    cap = ann['caption']
    res = cap.translate(str.maketrans('', '', string.punctuation))
    print(res)

In [25]:
stopwords = ['ourselves', 'hers', 'between', 'yourself', 'but', 'again', 'there', 'about', 'once', 'during', 'out', 'very', 'having', 'with', 'they', 'own', 'an', 'be', 'some', 'for', 'do', 'its', 'yours', 'such', 'into', 'of', 'most', 'itself', 'other', 'off', 'is', 's', 'am', 'or', 'who', 'as', 'from', 'him', 'each', 'the', 'themselves', 'until', 'below', 'are', 'we', 'these', 'your', 'his', 'through', 'don', 'nor', 'me', 'were', 'her', 'more', 'himself', 'this', 'down', 'should', 'our', 'their', 'while', 'above', 'both', 'up', 'to', 'ours', 'had', 'she', 'all', 'no', 'when', 'at', 'any', 'before', 'them', 'same', 'and', 'been', 'have', 'in', 'will', 'on', 'does', 'yourselves', 'then', 'that', 'because', 'what', 'over', 'why', 'so', 'can', 'did', 'not', 'now', 'under', 'he', 'you', 'herself', 'has', 'just', 'where', 'too', 'only', 'myself', 'which', 'those', 'i', 'after', 'few', 'whom', 't', 'being', 'if', 'theirs', 'my', 'against', 'a', 'by', 'doing', 'it', 'how', 'further', 'was', 'here', 'than']
print(stopwords)

['ourselves', 'hers', 'between', 'yourself', 'but', 'again', 'there', 'about', 'once', 'during', 'out', 'very', 'having', 'with', 'they', 'own', 'an', 'be', 'some', 'for', 'do', 'its', 'yours', 'such', 'into', 'of', 'most', 'itself', 'other', 'off', 'is', 's', 'am', 'or', 'who', 'as', 'from', 'him', 'each', 'the', 'themselves', 'until', 'below', 'are', 'we', 'these', 'your', 'his', 'through', 'don', 'nor', 'me', 'were', 'her', 'more', 'himself', 'this', 'down', 'should', 'our', 'their', 'while', 'above', 'both', 'up', 'to', 'ours', 'had', 'she', 'all', 'no', 'when', 'at', 'any', 'before', 'them', 'same', 'and', 'been', 'have', 'in', 'will', 'on', 'does', 'yourselves', 'then', 'that', 'because', 'what', 'over', 'why', 'so', 'can', 'did', 'not', 'now', 'under', 'he', 'you', 'herself', 'has', 'just', 'where', 'too', 'only', 'myself', 'which', 'those', 'i', 'after', 'few', 'whom', 't', 'being', 'if', 'theirs', 'my', 'against', 'a', 'by', 'doing', 'it', 'how', 'further', 'was', 'here', 'tha

In [26]:
import string

cap_tab_list = list()

super_cat_word_freq_dict = {}

for super_cat in nms:
    word_list = {}
    for cat in cats:
        if cat['supercategory'] == super_cat:
            imgIds = coco.getImgIds(catIds=cat['id']);
            for imgId in imgIds:
                annIds = coco_caps.getAnnIds(imgIds=imgId);
                anns = coco_caps.loadAnns(annIds)
                # print(anns)
                for ann in anns:
                    cap = ann['caption'].strip().lower()
                    cap_no_punc = cap.translate(str.maketrans('', '', string.punctuation))
                    clean_cap_list = [word for word in cap_no_punc.split() if word not in stopwords]
                    # print(clean_cap_list)
                    for word in clean_cap_list:
                        if word in word_list:
                            word_list[word] = word_list[word] + 1
                        else:
                            word_list[word] = 1
    # print(word_list)
    sorted_pairs = [(k, word_list[k]) for k in sorted(word_list, key=word_list.get, reverse=True)]
    word_key_list = [word[0] for word in sorted_pairs]
    word_freq_list = [word[1] for word in sorted_pairs]
    super_cat_word_freq_dict[super_cat] = (word_key_list, word_freq_list)
    print(word_key_list[:20])
    print(word_freq_list[:20])
    cap_p = show_simple_horizontal_bar(title = super_cat,
                                   x_axis_label = "word",
                                   y_axis_label = "word frequency",
                                   x_range = word_key_list[:10],
                                   y_range = word_freq_list[:10],
                                   need_show = False
                                  )
    cap_tab_list.append(Panel(child=cap_p, title=super_cat))

cap_tabs = Tabs(tabs=cap_tab_list)
show(cap_tabs)

['table', 'sitting', 'plate', 'food', 'kitchen', 'man', 'people', 'woman', 'two', 'pizza', 'white', 'next', 'cake', 'room', 'group', 'bowl', 'eating', 'top', 'wine', 'standing']
[48974, 29375, 26387, 24855, 23969, 19371, 17602, 16180, 15691, 15428, 13362, 11643, 11100, 9229, 9186, 8426, 8379, 8284, 8116, 7636]
['plate', 'table', 'pizza', 'food', 'sitting', 'cake', 'man', 'two', 'vegetables', 'white', 'sandwich', 'woman', 'bananas', 'fruit', 'next', 'eating', 'holding', 'bowl', 'top', 'broccoli']
[19327, 18801, 13275, 12098, 11650, 8876, 7679, 7140, 7028, 6533, 6335, 5961, 5653, 5634, 5568, 5296, 5193, 5163, 4778, 4763]
['computer', 'desk', 'sitting', 'laptop', 'man', 'room', 'table', 'two', 'phone', 'woman', 'living', 'people', 'keyboard', 'next', 'holding', 'top', 'playing', 'cell', 'cat', 'game']
[20797, 19078, 17786, 17682, 14545, 14249, 10905, 10425, 9270, 8614, 7890, 7639, 7243, 7032, 6405, 6242, 6142, 5972, 5825, 5365]
['room', 'sitting', 'clock', 'table', 'man', 'living', 'two',

In [27]:
cap_pos_tab_list = list()

for super_cat in nms:
    word_key_list = super_cat_word_freq_dict[super_cat][0]
    word_freq_list = super_cat_word_freq_dict[super_cat][1]
    
    sup_cats = list(set([cat['supercategory'] for cat in cats]))

    words_count = 30
    sentence_length = 20

    counts = np.zeros((words_count, sentence_length))
    for cat in cats:
        if cat['supercategory'] == super_cat:
            imgIds = coco.getImgIds(catIds=cat['id']);
            for imgId in imgIds:
                annIds = coco_caps.getAnnIds(imgIds=imgId);
                anns = coco_caps.loadAnns(annIds)
                for ann in anns:
                    cap = ann['caption'].strip().lower()
                    cap_no_punc = cap.translate(str.maketrans('', '', string.punctuation))
                    cap_word_list = cap_no_punc.split()
                    for i in range(len(cap_word_list)):
                        if cap_word_list[i] in word_key_list[:words_count]:
                            index = word_key_list[:words_count].index(cap_word_list[i])
                            if i < sentence_length:
                                counts[index, i] = counts[index, i] + 1

    # colormap = all_palettes['Paired'][words_count]

    xname = []
    yname = []
    color = []
    alpha = []
    for i in range(words_count):
        for j in range(sentence_length):
            yname.append(word_key_list[i])
            xname.append(str(j+1))
            alpha.append(0.5 * min(counts[i,j]/np.amax(counts, axis = 1)[i], 0.9) + 0.1)

            color.append('lightgrey')

    data=dict(
        xname=xname,
        yname=yname,
        colors=color,
        alphas=alpha,
        count=counts.flatten(),
    )

    cap_pos_p = figure(title="coco_matrix",
               x_axis_location="above", tools=tools_list,
               x_range=[str(index + 1) for index in range(sentence_length)], y_range=word_key_list[:words_count],
               tooltips = [('names', '@yname, @xname'), ('count', '@count')])

    cap_pos_p.plot_width = 500
    cap_pos_p.plot_height = 1000
    cap_pos_p.grid.grid_line_color = None
    cap_pos_p.axis.axis_line_color = None
    cap_pos_p.axis.major_tick_line_color = None
    cap_pos_p.axis.major_label_text_font_size = "9pt"
    cap_pos_p.axis.major_label_standoff = 0
    cap_pos_p.xaxis.major_label_orientation = np.pi/3

    cap_pos_p.circle(x='xname', y='yname', radius='alphas', source=data,
           color='colors', alpha='alphas', line_color=None,
           hover_line_color='black', hover_color='colors')
    
    cap_pos_tab_list.append(Panel(child=cap_pos_p, title=super_cat))

cap_pos_tabs = Tabs(tabs=cap_pos_tab_list)
show(cap_pos_tabs)


In [28]:
from bokeh.layouts import gridplot, column
from bokeh.io import save

output_column = column([super_cat_bar_graph, 
                        tabs, 
                        matrix_p, 
                        heat_tabs, 
                        gridplot(heat_grid_list, ncols=8, plot_width=200, plot_height=200), 
                        cap_tabs, 
                        cap_pos_tabs
                       ])
show(output_column)
save(output_column, filename=output_file_name, title=output_title)

from IPython.display import clear_output
clear_output()