In [1]:
%load_ext autoreload
%autoreload 2

## Setup

In [2]:
config = {
    'domain': 'spouse',
    'postgres': True,
    'parallelism': 1,
    'db_name': 'babble_spouse_demo',
    'debug': False,
    'babbler_candidate_split': 1,
    'babbler_label_split': 1,
    'disc_model_search_space': 1,
    'gen_model_search_space': 1,
    'supervision': 'majority_vote',
}

In [3]:
# Get DB connection string and add to globals
# NOTE: $SNORKELDB must be set before any snorkel imports
import os

default_db_name = 'babble_' + config['domain'] + ('_debug' if config.get('debug', False) else '')
DB_NAME = config.get('db_name', default_db_name)
if 'postgres' in config and config['postgres']:
    DB_TYPE = 'postgres'
else:
    DB_TYPE = 'sqlite'
    DB_NAME += '.db'
DB_ADDR = "localhost:{0}".format(config['db_port']) if 'db_port' in config else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

$SNORKELDB = postgres:///babble_spouse_demo


In [4]:
from snorkel import SnorkelSession
session = SnorkelSession()

# Resolve config conflicts (nb_config > local_config > global_config)
from snorkel.contrib.babble.pipelines import merge_configs
config = merge_configs(config)

if config['debug']:
    print("NOTE: --debug=True: modifying parameters...")
    config['max_docs'] = 100
    config['gen_model_search_space'] = 2
    config['disc_model_search_space'] = 2
    config['gen_params_default']['epochs'] = 25
    config['disc_params_default']['n_epochs'] = 5

Overwriting domain=None to domain=spouse
Overwriting babbler_candidate_split=0 to babbler_candidate_split=1
Overwriting init_class_prior=0 to init_class_prior=-1.15
Overwriting reg_param=0.1 to reg_param=0.5
Overwriting decay=0.95 to decay=0.99
Overwriting disc_model_search_space=10 to disc_model_search_space=1
Overwriting postgres=False to postgres=True
Overwriting supervision=generative to supervision=majority_vote
Overwriting babbler_label_split=0 to babbler_label_split=1
Overwriting gen_model_search_space=10 to gen_model_search_space=1


In [5]:
from snorkel.models import candidate_subclass
from snorkel.contrib.babble import ExplanationIO
from tutorials.babble.spouse import SpousePipeline

Spouse = candidate_subclass('Spouse', ['person1', 'person2'])
candidate_class = Spouse
pipe = SpousePipeline(session, Spouse, config)

## Parse, Extract, Load

In [6]:
# %time pipe.parse()

In [7]:
# %time pipe.extract()

In [8]:
# %time pipe.load_gold()

In [9]:
from tutorials.babble.spouse.spouse_examples import get_explanations, get_user_lists

candidates = session.query(Spouse).filter(Spouse.split == 0).all()
spouse_explanations = get_explanations(candidates)
spouse_user_lists = get_user_lists()

Building list of target candidate ids...
Collected 11 unique target candidate ids from 11 explanations.
Gathering desired candidates...
Found 11/11 desired candidates
Linking explanations to candidates...
Linked 11/11 explanations


## Now the real work begins...

In [10]:
from snorkel.contrib.babble import BabbleStream
bs = BabbleStream(session, strategy='linear', candidate_class=Spouse)

In [11]:
bs.preload(explanations=spouse_explanations, user_lists=spouse_user_lists)

Created grammar with 494 rules
11 explanation(s) out of 11 were parseable.
24 parse(s) generated from 11 explanation(s).
20 parse(s) remain (4 parse(s) removed by DuplicateSemanticsFilter).
13 parse(s) remain (7 parse(s) removed by ConsistencyFilter).
Applying labeling functions to split 1

12 parse(s) remain (1 parse(s) removed by UniformSignatureFilter: (1 None, 0 All)).
9 parse(s) remain (3 parse(s) removed by DuplicateSignatureFilter).
Added 9 parse(s) to set. (Total # parses = 9)
Added 9 explanation(s) to set. (Total # explanations = 9)


In [12]:
bs.get_label_matrix()

<2456x9 sparse matrix of type '<type 'numpy.float64'>'
	with 7156 stored elements in Compressed Sparse Row format>

In [13]:
# from snorkel.contrib.babble import Babbler
# bb = Babbler('text', Spouse, spouse_explanations, user_lists=spouse_user_lists)
# bb.apply(split=1)

In [14]:
c = bs.next()

In [15]:
from snorkel.viewer import SentenceNgramViewer
sv = SentenceNgramViewer([c], session, n_per_page=1, height=150)
sv

<IPython.core.display.Javascript object>

In [16]:
from snorkel.contrib.babble import Explanation
label = False
condition = "'the' is immediately to the left of arg 1"
explanation = Explanation(condition, label, candidate=c, name='')

In [17]:
explanation

Explanation("False, 'the' is immediately to the left of arg 1")

In [18]:
%time parse_list, conf_matrix_list, stats_list = bs.apply(explanation)

1 explanation(s) out of 1 were parseable.
1 parse(s) generated from 1 explanation(s).
1 parse(s) remain (0 parse(s) removed by DuplicateSemanticsFilter).
1 parse(s) remain (0 parse(s) removed by ConsistencyFilter).
Applying labeling functions to split 1

1 parse(s) remain (0 parse(s) removed by UniformSignatureFilter: (0 None, 0 All)).
1 parse(s) remain (0 parse(s) removed by DuplicateSignatureFilter).
CPU times: user 230 ms, sys: 10.4 ms, total: 240 ms
Wall time: 238 ms


In [19]:
print("Accuracy: {}".format(stats_list[0].accuracy))
print("Class Coverage: {}".format(stats_list[0].class_coverage))

Accuracy: 0.983870967742
Class Coverage: 0.0261603375527


In [48]:
from snorkel.viewer import SentenceNgramViewer
error_set = list(conf_matrix_list[0].tn)
sv = SentenceNgramViewer(error_set[:10], session, n_per_page=3, height=300)
sv

<IPython.core.display.Javascript object>

In [21]:
bs.commit() # Permanently adds the parses corresponding to these idxs

Added 1 parse(s) to set. (Total # parses = 10)
Added 1 explanation(s) to set. (Total # explanations = 10)


In [22]:
L_train = bs.get_label_matrix()
L_train

<2456x10 sparse matrix of type '<type 'numpy.float64'>'
	with 7222 stored elements in COOrdinate format>

### Add another explanation

In [23]:
from snorkel.contrib.babble import Explanation
label = False
condition = "'where' is within two words to the right of arg 1"
explanation = Explanation(condition, label, candidate=c, name='')

In [24]:
%time parse_list, conf_matrix_list, stats_list = bs.apply(explanation)

1 explanation(s) out of 1 were parseable.
2 parse(s) generated from 1 explanation(s).
2 parse(s) remain (0 parse(s) removed by DuplicateSemanticsFilter).
2 parse(s) remain (0 parse(s) removed by ConsistencyFilter).
Applying labeling functions to split 1

2 parse(s) remain (0 parse(s) removed by UniformSignatureFilter: (0 None, 0 All)).
1 parse(s) remain (1 parse(s) removed by DuplicateSignatureFilter).
CPU times: user 356 ms, sys: 22.2 ms, total: 379 ms
Wall time: 370 ms


In [25]:
print("Accuracy: {}".format(stats_list[0].accuracy))
print("Class Coverage: {}".format(stats_list[0].class_coverage))

Accuracy: 1.0
Class Coverage: 0.00084388185654


In [26]:
bs.commit()

Added 1 parse(s) to set. (Total # parses = 11)
Added 1 explanation(s) to set. (Total # explanations = 11)


In [27]:
parse = parse_list[0]
parse.semantics

('.root',
 ('.label',
  ('.bool', False),
  ('.call',
   ('.in',
    ('.extract_text',
     ('.right',
      ('.arg', ('.int', 1)),
      ('.string', '.eq'),
      ('.int', 2),
      ('.string', 'words')))),
   ('.string', 'where'))))

In [28]:
bs.semparser.grammar.translate(parse.semantics)

"return -1 if call(in text(right(arg1,'.eq',2,'words')), 'where') else 0"

In [30]:
pipe.lfs = [parse.function for parse in bs.parses]
pipe.label()

Clearing existing...
Running UDF...


Labeled split 0: (23490,10) sparse (nnz = 66893)
Clearing existing...
Running UDF...


Labeled split 1: (2456,10) sparse (nnz = 7158)
                            j  Coverage  Overlaps  Conflicts   TP    FP  FN  \
LF_identical_args_0         0  0.048860  0.048453   0.047231    0     0   0   
LF_spouse_between_0         1  0.964984  0.940554   0.919788  174  2112   0   
LF_too_far_apart_0          2  0.556596  0.556596   0.556596    0     0  75   
Explanation0_0              3  0.000814  0.000814   0.000814    0     0   0   
LF_no_spouse_in_sentence_0  4  0.822068  0.795195   0.792752    0     0  59   
LF_married_after_3          5  0.009772  0.009772   0.008143   10    13   0   
LF_other_between_0          6  0.026873  0.026873   0.026873    0     0   3   
LF_family_to_left_0         7  0.077769  0.077362   0.076140    0     0   7   
LF_spouse_to_left_0         8  0.051303  0.051303   0.030945   79    45   0   
LF_third_wheel_0            9  0.35545

In [31]:
%time pipe.supervise()

Using L_train: <23490x10 sparse matrix of type '<type 'numpy.int64'>'
	with 66893 stored elements in Compressed Sparse Row format>
Using L_dev: <2456x10 sparse matrix of type '<type 'numpy.int64'>'
	with 7158 stored elements in Compressed Sparse Row format>
Using L_test: <1820x10 sparse matrix of type '<type 'numpy.int64'>'
	with 5075 stored elements in Compressed Sparse Row format>


In [32]:
%time pipe.classify()

Skipping grid search.


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


[reRNN] Training model
[reRNN] n_train=7144  #epochs=20  batch size=128
[reRNN] Epoch 0 (13.84s)	Average loss=0.462903	Dev F1=38.84
[reRNN] Epoch 1 (29.60s)	Average loss=0.275553	Dev F1=40.29
[reRNN] Epoch 2 (45.35s)	Average loss=0.235557	Dev F1=45.19
[reRNN] Epoch 3 (60.93s)	Average loss=0.182595	Dev F1=46.93
[reRNN] Epoch 4 (76.37s)	Average loss=0.168066	Dev F1=45.85
[reRNN] Epoch 5 (91.52s)	Average loss=0.147579	Dev F1=48.70
[reRNN] Epoch 6 (106.80s)	Average loss=0.123285	Dev F1=48.36
[reRNN] Epoch 7 (122.52s)	Average loss=0.103012	Dev F1=42.68
[reRNN] Epoch 8 (137.75s)	Average loss=0.090690	Dev F1=47.14
[reRNN] Epoch 9 (153.15s)	Average loss=0.073195	Dev F1=49.13
[reRNN] Epoch 10 (168.15s)	Average loss=0.054659	Dev F1=44.08
[reRNN] Epoch 11 (183.23s)	Average loss=0.044302	Dev F1=47.70
[reRNN] Epoch 12 (198.13s)	Average loss=0.026818	Dev F1=44.65
[reRNN] Epoch 13 (212.92s)	Average loss=0.024365	Dev F1=45.23
[reRNN] Epoch 14 (228.07s)	Average loss=0.021320	Dev F1=47.89
[reRNN] Epoch 



[reRNN] Model saved as <discriminative_spouse>
### [7.2] Evaluate generative model (opt_b=0.9)
### Done in 0.2s.

### [7.3] Evaluate discriminative model (opt_b=0.9)




### Done in 5.8s.

      Coverage  F1 Score  Precision    Recall
Disc       1.0  0.458716   0.423729  0.500000
Gen        1.0  0.505747   0.444444  0.586667


In [55]:
L_gold_train = load_gold_labels(session, annotator_name='gold', split=0)
L_gold_train

<23490x1 sparse matrix of type '<type 'numpy.int64'>'
	with 1201 stored elements in Compressed Sparse Row format>

In [53]:
L_gold_dev = load_gold_labels(session, annotator_name='gold', split=1)
L_gold_dev

<2456x1 sparse matrix of type '<type 'numpy.int64'>'
	with 2370 stored elements in Compressed Sparse Row format>

In [56]:
L_gold_test = load_gold_labels(session, annotator_name='gold', split=2)
L_gold_test

<1820x1 sparse matrix of type '<type 'numpy.int64'>'
	with 1747 stored elements in Compressed Sparse Row format>