In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

DOMAIN = 'bike'

db_dict = {
    'test':   'postgres://localhost:5432/babble_model_unittest',
    'spouse': 'postgres://localhost:5432/babble_model_spouse',
    'bike':   'postgres://localhost:5432/babble_model_bike',
    'cdr':    'postgres://localhost:5432/babble_model_cdr',
}

os.environ['SNORKELDB'] = db_dict[DOMAIN]

In [3]:
from snorkel import SnorkelSession
session = SnorkelSession()

In [4]:
from snorkel.contrib.babble.models import configuration

config = configuration
config['domain'] = DOMAIN
config['parallelism'] = 1
config['majority_vote'] = True

In [5]:
from snorkel.models import candidate_subclass

if DOMAIN == 'spouse':
    from tutorials.babble.spouse import SpouseModel
    Spouse = candidate_subclass('Spouse', ['person1', 'person2'])
    candidate_class = Spouse
    sm = SpouseModel(session, Spouse, config)
    
    from tutorials.babble.data import MTurkHelper
    output_csv_path= (os.environ['SNORKELHOME'] + 
                      '/tutorials/babble/data/spouse/mturk_spouse_01_out.csv')
    candidates = session.query(Spouse).all()
    helper = MTurkHelper(candidates, num_hits=25)
    explanations = helper.postprocess(output_csv_path, candidates=candidates, verbose=False)
    user_lists = {}
elif DOMAIN == 'bike':
    from tutorials.babble.bike import BikeModel
    Biker = candidate_subclass('Biker', ['person', 'bike'])
    candidate_class = Biker
    sm = BikeModel(session, Biker, config)
    
    output_csv_path = (os.environ['SNORKELHOME'] + 
                       '/tutorials/babble/data/bike/VisualGenome_all_out.csv')
    helper = MTurkHelper(candidates=[], labels=[], num_hits=40, domain='vg', workers_per_hit=3)
    explanations = helper.postprocess_visual(output_csv_path, candidates=[], verbose=True)
else:
    raise Exception('Invalid domain: {}'.format(DOMAIN))

In [6]:
# anns_folder = '/dfs/scratch0/paroma/coco/annotations/'
anns_folder = os.environ['SNORKELHOME'] + '/tutorials/babble/bike/data/'
%time sm.parse(anns_folder)

Clearing existing...
Running UDF...
Running UDF...
CPU times: user 6.02 s, sys: 582 ms, total: 6.61 s
Wall time: 14.9 s


In [9]:
%time sm.extract()

Extraction was performed during parse stage.
Candidates [Split 0]: 2406
Candidates [Split 1]: 1037
Candidates [Split 2]: 0
CPU times: user 10.3 ms, sys: 3.25 ms, total: 13.6 ms
Wall time: 41.8 ms


In [10]:
%time sm.load_gold()

AnnotatorLabels created: 906
CPU times: user 5.41 s, sys: 171 ms, total: 5.59 s
Wall time: 7.05 s


In [None]:
%time sm.babble(explanations, user_lists=user_lists)

In [None]:
# sm.use_intro_lfs()

In [None]:
%time sm.label()

In [None]:
%time sm.supervise()

In [None]:
%time sm.classify()