# Small Experiment Data Analysis

In [1]:
import json
from six.moves import cPickle
import os
import statistics

from IPython.core.display import HTML 
from IPython.core.display import Image, display

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  

In [13]:
"""
Load Coco data
"""
COCO_PATH = 'data/coco/'
COCO_ANNO_PATH = COCO_PATH + 'annotations/'

# load the data from captions
with open(COCO_ANNO_PATH + 'captions_train2014.json') as f:
    coco_captions = json.load(f)
# print len(coco_captions)
# with open(COCO_ANNO_PATH + 'captions_val2014.json') as f:
#     coco_captions =  dict(coco_captions.items() + json.load(f).items())
# print len(coco_captions)

# build the reverse dictionary, from img_id to captions, img_infos, and annotations
img_captions = {}
for img_info in coco_captions['images']:
    mid = int(img_info['id'])
    if not mid in img_captions:
        img_captions[mid] = {}
    img_captions[mid]['image'] = img_info

for cap_info in coco_captions['annotations']:
    mid = int(cap_info['image_id'])
    if not 'annotation' in img_captions[mid]:
        img_captions[mid]['annotation'] = []
        img_captions[mid]['captions'] = []
    img_captions[mid]['annotation'].append(cap_info)


In [16]:
IN_FILE_NAME  = 'high_coexist_img_ids.pkl'
OUT_FILE_NAME = 'highexist_gaussian'
IN_DIR        = 'input'
OUT_DIR       = 'output'

def get_stat(data):
    if len(data)==0: return;
    print ""
    print "Total:\t%d;\t\tMean:\t\t%f;\tNonezero:\t%d"%(len(data), statistics.mean(data), len(filter(lambda x:x!=0, data)))
    print "Median:\t%f;\tMedian(H):\t%f;\tMedian(L):\t%f;"%(statistics.median(data), statistics.median_high(data), statistics.median_low(data))
    print "Max:\t%f;\tMin:\t\t%f;\tStd:\t\t%f"%(max(data), min(data),statistics.stdev(data))
    print "\n"
    
def look_up_image(title, idx, in_ids, vis, scores):
    print "[%d]%s\n"%(idx,title)
    print "\tScore:%s;\tImageId:%s\n"%(scores[idx], in_ids[idx])
    print "\tOriginal Caption:\n\t\t%s;\n\tAblation Caption:\n\t\t%s;"%(vis[idx*2+1]['caption'],vis[idx*2]['caption'])
    print "\tAnnotated Captions:\n"
    img_id = int(in_ids[idx])
    for idx, note in enumerate(img_captions[img_id]['annotation']):
        print "\t\t%d. %s\n"%(idx+1, note['caption'])
    display(Image(url= img_captions[img_id]['image']['coco_url']))

def get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME, in_path=IN_DIR, out_path=OUT_DIR, num=5):
    in_ids  = cPickle.load(open(os.path.join(in_path, in_file)))
    scores  = cPickle.load(open(os.path.join(out_path, "scores_%s.pkl"%out_file)))
    vis     = json.load(open(os.path.join(out_path, 'vis_%s.json'%out_file)))
    print "Statistics for scores of gaussian filtered ablations:"
    get_stat(scores)

    scores_idx = zip(range(len(scores)), scores)
    sorted_scores_idx = sorted(scores_idx, key=lambda x: x[1])   # sort by score
    for idx, _ in sorted_scores_idx[:num]:
        look_up_image("Zero Image",idx, in_ids, vis, scores)

get_expr_summary(in_file = IN_FILE_NAME, out_file = OUT_FILE_NAME)

Statistics for scores of gaussian filtered ablations:

Total:	1013;		Mean:		0.837452;	Nonezero:	1008
Median:	1.000000;	Median(H):	1.000000;	Median(L):	1.000000;
Max:	1.000000;	Min:		0.000000;	Std:		0.276086


[107]Zero Image

	Score:0.0;	ImageId:340096

	Original Caption:
		a stop sign is on the corner of a street;
	Ablation Caption:
		a person holding a glass of wine in a room;
	Annotated Captions:

		1. A stop sign covered in lights at a street intersection.

		2. A stop sign that is decorated with Christmas lights.

		3. a decorated lite up stop sign at the corner

		4. car comes to the curve to stop at the stop sign

		5. A stop sign covered in a white lights. 



[427]Zero Image

	Score:0.0;	ImageId:110251

	Original Caption:
		a street sign on a street corner;
	Ablation Caption:
		a park bench sitting in the middle of a park;
	Annotated Captions:

		1. A bench sitting on top of a park near a play ground.

		2. a fenced in park on a city street

		3. a sign showing that there kids playing in the field

		4. a sign on a pole near a park 

		5. A playground with a sign saying caution live children playing.



[550]Zero Image

	Score:0.0;	ImageId:289515

	Original Caption:
		a street sign on the side of a street;
	Ablation Caption:
		a car parked in front of a building;
	Annotated Captions:

		1. A sidewalk and doughnut shop with a bike leaning against the building

		2. A long shot of a city sidewalk on a sunny day 

		3. A sign for a doughnut shop on a sidewalk by a street.

		4. Donut shop sign on an empty street next to a tree

		5. A building with a sign that says Donuts above the door. 



[584]Zero Image

	Score:0.0;	ImageId:461464

	Original Caption:
		a clock on the side of a building;
	Ablation Caption:
		a train station with a train passing by;
	Annotated Captions:

		1. A train station has a foreign sign and a clock.

		2. View from the train at Pistoia station in Italy

		3. A clock on a train platform during day time.

		4. a clock attached to a pole at a train station

		5. A street sign and a clock sitting by a parked train.



[877]Zero Image

	Score:0.0;	ImageId:199678

	Original Caption:
		a bunch of different types of electronic devices;
	Ablation Caption:
		a pair of scissors sitting on top of a table;
	Annotated Captions:

		1. A stack of cash sitting under a cell phone.

		2. USA 20 dollars totaling $120, held down by a cell phone with Coca cola cans nearby. 

		3. A bunch of twentys underneath a celll phone

		4. Six twenty dollar bills are underneath a phone.

		5. A number of twenty dollar bills with a phone



In [49]:
blkin     = cPickle.load(open(os.path.join(IN_DIR, B_OUT_FILE)))
blkvis    = json.load(open(os.path.join(B_TMP_DIR, 'vis.json')))
glkscores = cPickle.load(open(os.path.join(OUT_DIR, B_OUT_FILE)))

ValueError: No JSON object could be decoded

In [None]:
os.listdir()