## MTurk I/O for Image Dataset

The query of interest is: **The outlined boxes depict a person riding a bike** 

Want to generate the input .csv file for MTurk and parse the output csv file for results. Using MTurkHelper from babble with adjustments for candidate type.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
%matplotlib inline

import os
import sys
sys.path.append('/dfs/scratch0/paroma/coco')
sys.path.append('/dfs/scratch0/paroma/coco/PythonAPI/')

from pycocotools.coco import COCO
import skimage.io as io
import pylab
import cv2

import pandas as pd


%load_ext autoreload
%autoreload 2

## Generate Input CSV
Load the necessary data for this task and write out variables in the form:

`<img class="img-responsive center-block" src="http://paroma.github.io/turk_images/train_IDX_PIDX_BIDX.png" />`

In [None]:
vg_folder = '/dfs/scratch0/paroma/visual_genome/'
train_mscoco = np.load(vg_folder+'train_mscoco.npy')
val_mscoco = np.load(vg_folder+'val_mscoco.npy')

train_ground = np.load(vg_folder+'train_ground.npy')
val_ground = np.load(vg_folder+'val_ground.npy')

In [None]:
#sizes of train-val-test
print "Train Set Size: ", np.shape(train_mscoco)[0], " Positive: ", np.shape(np.where(train_ground == 1))[1] 
print "Val Set Size: ", np.shape(val_mscoco)[0], " Positive: ", np.shape(np.where(val_ground == 1))[1] 

In [None]:
def generate_anns(filename, coco_ids):
    coco=COCO(filename)
    catIds = coco.getCatIds(catNms=['person','bicycle']);
    set_anns = []

    for set_id in coco_ids:
        annIds = coco.getAnnIds(imgIds=set_id, catIds=catIds, iscrowd=None)
        anns = coco.loadAnns(annIds)

        temp_list = []
        for i in xrange(len(anns)):
            temp_dict = {'category_id':anns[i]['category_id'], 'bbox':anns[i]['bbox']}
            temp_list.append(temp_dict)
        set_anns.append(temp_list)
        
    return set_anns

In [None]:
from pycocotools.coco import COCO
coco_path = '/dfs/scratch0/paroma/coco/annotations/'

train_anns = generate_anns(coco_path+'instances_train2014.json', train_mscoco)
#val_anns = generate_anns(coco_path+'instances_val2014.json', val_mscoco)

## MTurk Helper for Input CSV

We want to make "fake" candidates for the visual tesk since it doesn't fit into the actual sense of the word "candidate". They will just be a list of image names that are saved (how convenient!)

## MTurk Helper for Output CSV

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys

sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/babble/data/')
from mturk_processing import MTurkHelper

In [2]:
output_csv_path = './mturk_visual_out.csv'

In [3]:
from mturk_processing import MTurkHelper
helper = MTurkHelper(candidates=[], labels=[], num_hits=25, domain='vg',
                    anns_path='/Users/bradenjh/repos/snorkel/tutorials/babble/data/image/train_anns.npy')
explanations = helper.postprocess_visual(output_csv_path, candidates=[], verbose=False)

Num HITs unique: 25
Num HITs total: 75
Unanimous: 65
Majority: 24
Split: 0
Bad: 11


In [4]:
explanations[:10]

[Explanation("True, The top of Box Y is inside Box X."),
 Explanation("True, Box Y is directly below box X"),
 Explanation("True, Box X mostly overlaps Box Y."),
 Explanation("True, Box X and Y line up perfectly."),
 Explanation("True, Box X is centered and slightly above Box Y."),
 Explanation("True, Box x is an appropriate size over this size of box y."),
 Explanation("False, Box Y is much bigger than box X."),
 Explanation("False, The bottom of box X is touching the top of box Y at the top left corner of box Y."),
 Explanation("False, Box X doesn't overlap Box Y."),
 Explanation("False, Box Y is much bigger than it needs to be.")]

In [5]:
print "Number of Valid Explanations: ", len(explanations)

Number of Valid Explanations:  243


In [6]:
# from snorkel.contrib.babble import Explanation
# from snorkel.contrib.babble.image import BBox
# A = BBox({'bbox': (100, 100, 100, 100), 'category_id': 1}, None)
# B = BBox({'bbox': (150, 150, 100, 100), 'category_id': 2}, None)
# a_and_b = (A, B)

# explanations = [
#     Explanation(
#         condition="the top of box y is below the top of box x",
#         label=True,
#         candidate=a_and_b,
#         semantics=None),
# ]

In [7]:
from snorkel.contrib.babble import Babbler
user_lists = {}
babbler = Babbler(mode='image', explanations=explanations)

Created grammar with 225 rules


In [8]:
lfs = babbler.generate_lfs()

47 parses created from 33 out of 243 explanation(s)


In [None]:
babbler.get_explanations()

In [None]:
from snorkel.contrib.babble import sem_to_str

for parse in babbler.parses:
    print(sem_to_str(parse.semantics))
#     print(parse.semantics)