In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import os

# TO USE A DATABASE OTHER THAN SQLITE, USE THIS LINE
# Note that this is necessary for parallel execution amongst other things...
# os.environ['SNORKELDB'] = 'postgres://localhost:5432/babble_test_bike'

from snorkel import SnorkelSession
session = SnorkelSession()

In [2]:
import numpy as np

# anns_folder = '/dfs/scratch0/paroma/coco/annotations/'
anns_folder = os.environ['SNORKELHOME'] + '/tutorials/babble/bike/data/'
train_path = anns_folder + 'train_anns.npy'
val_path = anns_folder + 'val_anns.npy'

train_anns = np.load(train_path).tolist()
val_anns = np.load(val_path).tolist()

In [3]:
from snorkel.models import candidate_subclass

Biker = candidate_subclass('Biker', ['person', 'bike'])

In [4]:
from snorkel.parser import ImageCorpusExtractor, CocoPreprocessor

corpus_extractor = ImageCorpusExtractor(candidate_class=Biker)

coco_preprocessor = CocoPreprocessor(train_path, source=0)
%time corpus_extractor.apply(coco_preprocessor)

coco_preprocessor = CocoPreprocessor(val_path, source=1)
%time corpus_extractor.apply(coco_preprocessor, clear=False)

for split in [0, 1]:
    num_candidates = session.query(Biker).filter(Biker.split == split).count()
    print("Split {} candidates: {}".format(split, num_candidates))

Clearing existing...
Running UDF...
CPU times: user 14.1 s, sys: 26.7 ms, total: 14.1 s
Wall time: 14.2 s
Running UDF...
CPU times: user 1.44 s, sys: 4.89 ms, total: 1.45 s
Wall time: 1.45 s
Split 0 candidates: 2406
Split 1 candidates: 1037


In [5]:
candidates = session.query(Biker).all()
for c in candidates:
    print(c.get_stable_id())

0:0::bbox:10~~0:0::bbox:4
0:0::bbox:13~~0:0::bbox:5
0:0::bbox:14~~0:0::bbox:4
0:0::bbox:14~~0:0::bbox:17
0:0::bbox:14~~0:0::bbox:18
0:0::bbox:15~~0:0::bbox:21
0:0::bbox:22~~0:0::bbox:4
0:0::bbox:22~~0:0::bbox:17
0:0::bbox:27~~0:0::bbox:4
0:0::bbox:27~~0:0::bbox:5
0:0::bbox:27~~0:0::bbox:17
0:0::bbox:27~~0:0::bbox:18
0:0::bbox:27~~0:0::bbox:19
0:0::bbox:27~~0:0::bbox:20
0:0::bbox:27~~0:0::bbox:21
0:0::bbox:27~~0:0::bbox:23
0:0::bbox:27~~0:0::bbox:25
0:1::bbox:4~~0:1::bbox:0
0:2::bbox:0~~0:2::bbox:1
0:3::bbox:3~~0:3::bbox:15
0:3::bbox:4~~0:3::bbox:15
0:3::bbox:12~~0:3::bbox:1
0:3::bbox:12~~0:3::bbox:15
0:3::bbox:13~~0:3::bbox:15
0:3::bbox:14~~0:3::bbox:15
0:3::bbox:18~~0:3::bbox:15
0:3::bbox:19~~0:3::bbox:0
0:3::bbox:19~~0:3::bbox:1
0:3::bbox:19~~0:3::bbox:2
0:3::bbox:19~~0:3::bbox:15
0:3::bbox:19~~0:3::bbox:16
0:3::bbox:19~~0:3::bbox:17
0:4::bbox:1~~0:4::bbox:0
0:5::bbox:1~~0:5::bbox:0
0:6::bbox:2~~0:6::bbox:0
0:7::bbox:1~~0:7::bbox:2
0:9::bbox:1~~0:9::bbox:0
0:9::bbox:1~~0:9::bbox:3
0:

In [6]:
labels_by_candidate = np.load(anns_folder + 'labels_by_candidate.npy').tolist()

In [7]:
from snorkel.models import StableLabel
from snorkel.db_helpers import reload_annotator_labels

candidate_class = Biker
annotator_name = 'gold'

for candidate_hash, label in labels_by_candidate.items():
    set_name, image_idx, bbox1_idx, bbox2_idx = candidate_hash.split(':')
    source = {'train': 0, 'val': 1}[set_name]
    stable_id_1 = "{}:{}::bbox:{}".format(source, image_idx, bbox1_idx)
    stable_id_2 = "{}:{}::bbox:{}".format(source, image_idx, bbox2_idx)
    context_stable_ids = "~~".join([stable_id_1, stable_id_2])
    query = session.query(StableLabel).filter(StableLabel.context_stable_ids == context_stable_ids)
    query = query.filter(StableLabel.annotator_name == annotator_name)
    label = 1 if label else -1
    if query.count() == 0:
        session.add(StableLabel(
            context_stable_ids=context_stable_ids,
            annotator_name=annotator_name,
            value=label))

session.commit()
reload_annotator_labels(session, candidate_class, annotator_name, split=1, filter_label_split=False)

AnnotatorLabels created: 906


In [8]:
stable_labels = session.query(StableLabel).filter(StableLabel.annotator_name == annotator_name).all()
len(stable_labels)

906

In [9]:
candidates = num_candidates = session.query(Biker).filter(Biker.split == 1).all()
print(candidates[0])

Biker(Bbox(val:0:3:person:(248.88, 305.52, 438.54, 455.78)), Bbox(val:0:1:bike:(303.56, 358.9, 455.17, 533.18)))


In [10]:
from snorkel.annotations import load_gold_labels

L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_dev

<1037x1 sparse matrix of type '<type 'numpy.int64'>'
	with 906 stored elements in Compressed Sparse Row format>