## Comprehensive Experiments

Run experiments on a larger data-set using basic ablation methods.

In [12]:
# Imports 
CWD = %pwd
CWD = str(CWD)

import json
from six.moves import cPickle
import os
import sys
import statistics

from IPython.core.display import HTML 
from IPython.core.display import Image, display, display_pretty

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pipeline


%matplotlib inline  


CWD

'/Users/Grendel/Desktop/ML/textmatters'

In [2]:
COCO_PATH = os.path.join(CWD,'data','coco')
COCO_ANNO_PATH = os.path.join(COCO_PATH, 'annotations')
COCO_TEXT_PATH = os.path.join(CWD, 'coco-text')
sys.path.insert(0, COCO_TEXT_PATH)
import coco_text as ct
ct = ct.COCO_Text(os.path.join(COCO_PATH, 'COCO_Text.json'))

loading annotations into memory...
0:00:02.588224
creating index...
index created!


In [3]:
"""
Load Coco data (Train)
"""
# load the data from captions
with open(os.path.join(COCO_ANNO_PATH, 'captions_train2014.json')) as f:
    coco_captions = json.load(f)

# build the reverse dictionary, from img_id to captions, img_infos, and annotations
img_ids = []
img_captions = {}
for img_info in coco_captions['images']:
    mid = int(img_info['id'])
    img_ids.append(mid)
    if not mid in img_captions:
        img_captions[mid] = {}
    img_captions[mid]['image'] = img_info

for cap_info in coco_captions['annotations']:
    mid = int(cap_info['image_id'])
    if not 'annotation' in img_captions[mid]:
        img_captions[mid]['annotation'] = []
    img_captions[mid]['annotation'].append(cap_info)



In [4]:
"""
Split into three groups:
    1. Groups with no text instances
    2. Groups with relevant COCO-Text Instance 
       (the text in the instance also appears in the caption)
    3. Groups without relevant COCO-Text Instance
"""
no_texts     = []
rel_texts    = []
no_rel_texts = []
for mid in img_ids:    
    #  get text annotations
    anns = [ann for ann in ct.loadAnns(ct.getAnnIds(imgIds=mid)) if 'utf8_string' in ann ]
    if len(anns) == 0:
        no_texts.append(mid)
        continue
        
    # split text instances into words
    text_words = set([w.strip('.').upper() for ann in anns for w in ann['utf8_string'].split(' ')])

    # split captions into words
    caps = [ note['caption'] for note in img_captions[mid]['annotation']]
    caps_words = set([word.strip('.').upper() for word in (' '.join(caps).split())])

    if len(text_words & caps_words) > 0:
        rel_texts.append(mid)
    else:
        no_rel_texts.append(mid)

with open(os.path.join(CWD, 'input', 'no_texts_img_ids.pkl'),'w+') as f:
    cPickle.dump(no_texts, f)

with open(os.path.join(CWD, 'input', 'rel_texts_img_ids.pkl'),'w+') as f:
    cPickle.dump(rel_texts, f)
    
with open(os.path.join(CWD, 'input', 'no_rel_texts_img_ids.pkl'),'w+') as f:
    cPickle.dump(no_rel_texts, f)
    
print "No Texts:\t%d;\t\tRelevant Text:\t%d\t\tNo relevant text:\t%d"%(len(no_texts), len(rel_texts), len(no_rel_texts))

No Texts:	61247;		Relevant Text:	5556		No relevant text:	15980


In [37]:
IN_FILE_NAME  = 'high_coexist_img_ids.pkl'
OUT_FILE_NAME = 'highexist_gaussian'
IN_DIR        = 'input'
OUT_DIR       = 'output'

def get_stat(data):
    if len(data)==0: return;
    print ""
    print "Total:\t%d;\t\tMean:\t\t%f;\tNonezero:\t%d"%(len(data), statistics.mean(data), len(filter(lambda x:x!=0, data)))
    print "Median:\t%f;\tMedian(H):\t%f;\tMedian(L):\t%f;"%(statistics.median(data), statistics.median_high(data), statistics.median_low(data))
    try:
        variance = statistics.stdev(data)
    except:
        variance = 0
    print "Max:\t%f;\tMin:\t\t%f;\tStd:\t\t%f"%(max(data), min(data),variance)
    print "\n"
    
def look_up_image(title, idx, in_ids, vis, scores, out_file):
    print "[%d]%s\n"%(idx,title)
    print "\tScore:%s;\tImageId:%s\n"%(scores[idx], in_ids[idx])
    print "\tOriginal Caption:\n\t\t%s;\n\tAblation Caption:\n\t\t%s;"%(vis[idx*2+1]['caption'],vis[idx*2]['caption'])
    print "\tAnnotated Captions:\n"
    img_id = int(in_ids[idx])
    for i, note in enumerate(img_captions[img_id]['annotation']):
        print "\t\t%d. %s\n"%(i+1, str(note['caption']).strip())
    
    # Display images side by side: http://permalink.gmane.org/gmane.comp.python.ipython.devel/11073
    s = """<table>
        <tr>
        <th><img src="%s" style="max-width:400px" /></th>
        <th><img src="%s" style="max-width:400px" /></th>
        </tr></table>"""%(img_captions[img_id]['image']['coco_url'], "%s"%os.path.join("tmp_%s"%out_file, "%s_%s_ablt.jpg"%(str(idx).zfill(16),str(img_id))))
    t=HTML(s)
    display(t)

def get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME, in_path=IN_DIR, out_path=OUT_DIR, num=3):
    in_ids  = cPickle.load(open(os.path.join(in_path, in_file)))
    scores  = cPickle.load(open(os.path.join(out_path, "scores_%s.pkl"%out_file)))
    vis     = json.load(open(os.path.join(out_path, 'vis_%s.json'%out_file)))
#     print "Statistics for scores of gaussian filtered ablations:"
    get_stat(scores)

    scores_idx = zip(range(len(scores)), scores[:len(in_ids)*2])
    sorted_scores_idx = sorted(scores_idx, key=lambda x: x[1])   # sort by score
    for idx, _ in sorted_scores_idx[:num]:
        look_up_image("Iamges with lowest scores:",idx, in_ids, vis, scores, out_file)


In [38]:
IN_FILE_NAME  = "rel_texts_img_ids.pkl"
OUT_FILE_NAME = "rel_texts_gaussian"
# pipeline.run(amode="gaussian", input_file=IN_FILE_NAME, output_file=OUT_FILE_NAME)
get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME)



Total:	5600;		Mean:		0.076921;	Nonezero:	2820
Median:	0.066667;	Median(H):	0.066667;	Median(L):	0.066667;
Max:	1.000000;	Min:		0.000000;	Std:		0.119841


[0]Iamges with lowest scores:

	Score:0.0;	ImageId:222016

	Original Caption:
		a man riding a motorcycle with a woman on the back;
	Ablation Caption:
		a picture of a person holding a cell phone;
	Annotated Captions:

		1. a big red telephone booth that a man is standing in

		2. a person standing inside of a phone booth

		3. this is an image of a man in a phone booth.

		4. A man is standing in a red phone booth.

		5. A man using a phone in a phone booth.



Unnamed: 0,Unnamed: 1


[2]Iamges with lowest scores:

	Score:0.0;	ImageId:257350

	Original Caption:
		a close up of an orange and a banana;
	Ablation Caption:
		a bus is parked in a parking lot;
	Annotated Captions:

		1. a group of people riding bikes stopped in front of a building

		2. A group of people on bicy les in front of a church.

		3. Bike riders on the corner outside of a church.

		4. Several children on bicycles outside a white church.

		5. Several people on bikes in front of a building.



Unnamed: 0,Unnamed: 1


[3]Iamges with lowest scores:

	Score:0.0;	ImageId:311914

	Original Caption:
		a lunch box with a variety of food;
	Ablation Caption:
		a man standing in front of a truck;
	Annotated Captions:

		1. A school bus parked with it's stop sign closed.

		2. A stop sign is on the side of a school bus.

		3. a bus sits stopped with a sign on the side of it

		4. Side of a school bus showing a stop sign.

		5. A view of a stop sign, on the side of a bus.



Unnamed: 0,Unnamed: 1


In [None]:
IN_FILE_NAME  = "rel_texts_img_ids.pkl"
OUT_FILE_NAME = "rel_texts_blackout"
# pipeline.run(amode="blackout", input_file=IN_FILE_NAME, output_file=OUT_FILE_NAME)
get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME)

In [None]:
IN_FILE_NAME  = "no_rel_texts_img_ids.pkl"
OUT_FILE_NAME = "no_rel_texts_blackout"
# pipeline.run(amode="blackout", input_file=IN_FILE_NAME, output_file=OUT_FILE_NAME)
get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME)

In [39]:
IN_FILE_NAME  = "no_rel_texts_img_ids.pkl"
OUT_FILE_NAME = "no_rel_texts_blackout"
# pipeline.run(amode="gaussian", input_file=IN_FILE_NAME, output_file=OUT_FILE_NAME)
get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME)


in_ids  = cPickle.load(open(os.path.join(IN_DIR, IN_FILE_NAME)))
scores  = cPickle.load(open(os.path.join(OUT_DIR, "scores_%s.pkl"%OUT_FILE_NAME)))
vis     = json.load(open(os.path.join(OUT_DIR, 'vis_%s.json'%OUT_FILE_NAME)))

selected_ids = in_ids[:29]
selected_ids


Total:	16000;		Mean:		0.056330;	Nonezero:	6704
Median:	0.000000;	Median(H):	0.000000;	Median(L):	0.000000;
Max:	1.000000;	Min:		0.000000;	Std:		0.094949


[0]Iamges with lowest scores:

	Score:0.0;	ImageId:392136

	Original Caption:
		a man with a hat and a hat on a clock;
	Ablation Caption:
		a group of people standing next to each other;
	Annotated Captions:

		1. A large bus and some people on the street.

		2. Several people are standing on the sidewalk as a bus goes by.

		3. Bus rushing by a group of people walking in a city.

		4. A double-decker bus moving down the street as people stand waiting.

		5. A group of people standing next to a yellow and blue double decker bus.



Unnamed: 0,Unnamed: 1


[2]Iamges with lowest scores:

	Score:0.0;	ImageId:71631

	Original Caption:
		a man and a woman standing next to each other;
	Ablation Caption:
		a truck is parked in a parking lot;
	Annotated Captions:

		1. Dining room table set for a casual meal, with flowers.

		2. A red table topped with four white place mats.

		3. there is a dining room table with a red cloth and a vase with roses

		4. a table with a red tablecloth and white placemats

		5. A small dinning table with all red napkins and a red table cloth .



Unnamed: 0,Unnamed: 1


[4]Iamges with lowest scores:

	Score:0.0;	ImageId:279108

	Original Caption:
		a desk with a laptop and a keyboard;
	Ablation Caption:
		a bus is driving down the street in a city;
	Annotated Captions:

		1. A woman feeding a man food from a spoon.

		2. A woman offering a man a taste of something in front of other people.

		3. A woman feeds a man a bite of food.

		4. a woman is feeding something to a man

		5. A woman spoon feeding an old man



Unnamed: 0,Unnamed: 1


[392136,
 37015,
 71631,
 491269,
 279108,
 438422,
 485894,
 299411,
 239811,
 287541,
 540162,
 357684,
 576757,
 98760,
 77806,
 280980,
 62604,
 217306,
 374114,
 560459,
 448698,
 70868,
 513541,
 239728,
 474882,
 483008,
 424102,
 77375,
 212091]

In [34]:
IN_FILE_NAME  = 'poking.pkl'
OUT_FILE_NAME = 'poking_blackout'

pipeline.makePickle(IN_FILE_NAME, [392136] + selected_ids)
pipeline.run(amode='blackout', input_file=IN_FILE_NAME, output_file=OUT_FILE_NAME, tmp_path='tmp_%s'%OUT_FILE_NAME, batch_size=10)
get_expr_summary(in_file=IN_FILE_NAME, out_file=OUT_FILE_NAME)

in_ids  = cPickle.load(open(os.path.join(IN_DIR, IN_FILE_NAME)))
scores  = cPickle.load(open(os.path.join(OUT_DIR, "scores_%s.pkl"%OUT_FILE_NAME)))
vis     = json.load(open(os.path.join(OUT_DIR, 'vis_%s.json'%OUT_FILE_NAME)))


Cleaning up
loading annotations into memory...
0:00:04.272789
creating index...
index created!
Ablating image 1/30
Ablating image 2/30
Ablating image 3/30
Ablating image 4/30
Ablating image 5/30
Ablating image 6/30
Ablating image 7/30
Ablating image 8/30
Ablating image 9/30
Ablating image 10/30
Ablating image 11/30
Ablating image 12/30
Ablating image 13/30
Ablating image 14/30
Ablating image 15/30
Ablating image 16/30
Ablating image 17/30
Ablating image 18/30
Ablating image 19/30
Ablating image 20/30
Ablating image 21/30
Ablating image 22/30
Ablating image 23/30
Ablating image 24/30
Ablating image 25/30
Ablating image 26/30
Ablating image 27/30
Ablating image 28/30
Ablating image 29/30
Ablating image 30/30
DataLoaderRaw loading images from folder: 	/Users/Grendel/Desktop/ML/textmatters/tmp_poking_blackout	

listing all images in directory /Users/Grendel/Desktop/ML/textmatters/tmp_poking_blackout	

DataLoaderRaw found 60 images	

constructing clones inside the LanguageModel	

cp "/Users

Unnamed: 0,Unnamed: 1


[20]Iamges with lowest scores:

	Score:0.111111111111;	ImageId:560459

	Original Caption:
		an airplane is parked on the runway at the airport;
	Ablation Caption:
		a plane sitting on the tarmac at an airport;
	Annotated Captions:

		1. A ""LAN" Brand airplane at an airport near the sea.

		2. A jetliner taking off from an airport runway.

		3. There is a plane taxiing on the ruwnay

		4. A large passenger jet on an airport runway near the coast.

		5. A passenger jet that is on a runway.



Unnamed: 0,Unnamed: 1


[6]Iamges with lowest scores:

	Score:0.2;	ImageId:438422

	Original Caption:
		a woman is holding a glass of wine;
	Ablation Caption:
		a man and a woman standing next to each other;
	Annotated Captions:

		1. A woman giving a taste test to a man.

		2. A woman feeds a sample of her dish to a man in front of onlookers.

		3. Man is fed a spoonful of food by a woman in front of other people.

		4. Several people observing a woman feeding a man food inside a restaurant kitchen

		5. A woman is giving a taste of her food to a man



Unnamed: 0,Unnamed: 1


In [35]:
vis

[{u'caption': u'a group of people standing in front of a bus',
  u'image_id': u'1'},
 {u'caption': u'a group of people standing in front of a bus',
  u'image_id': u'2'},
 {u'caption': u'a group of people standing in front of a bus',
  u'image_id': u'3'},
 {u'caption': u'a group of people standing in front of a bus',
  u'image_id': u'4'},
 {u'caption': u'a group of people standing around a kitchen',
  u'image_id': u'5'},
 {u'caption': u'a group of people standing around a kitchen',
  u'image_id': u'6'},
 {u'caption': u'a dining room table and chairs in a kitchen',
  u'image_id': u'7'},
 {u'caption': u'a dining room table and chairs in a kitchen',
  u'image_id': u'8'},
 {u'caption': u'a kitchen with a refrigerator and a stove', u'image_id': u'9'},
 {u'caption': u'a kitchen with a refrigerator and a stove',
  u'image_id': u'10'},
 {u'caption': u'a woman and a man standing in a kitchen', u'image_id': u'11'},
 {u'caption': u'a woman and a man standing in a kitchen', u'image_id': u'12'},
 {u

In [36]:
IN_FILE_NAME  = "rel_texts_img_ids.pkl"
OUT_FILE_NAME = "rel_texts_gaussian"
in_ids  = cPickle.load(open(os.path.join(IN_DIR, IN_FILE_NAME)))
scores  = cPickle.load(open(os.path.join(OUT_DIR, "scores_%s.pkl"%OUT_FILE_NAME)))
vis     = json.load(open(os.path.join(OUT_DIR, 'vis_%s.json'%OUT_FILE_NAME)))
len(vis), len(scores), len(in_ids)
start = 3
vis[start*2:start*2+2]

[{u'caption': u'a man standing in front of a truck', u'image_id': u'7'},
 {u'caption': u'a lunch box with a variety of food', u'image_id': u'8'}]