# Environment Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
config = {
    'domain': 'spouse',
    'postgres': False,
    'parallelism': 1,
#     'db_name': 'babble_spouse_demo',
    'babbler_candidate_split': 1,
    'babbler_label_split': 1,
    'supervision': 'majority_vote',
    'gen_model_search_space': 1,
    'disc_model_search_space': 1,
}

In [3]:
# Get DB connection string and add to globals
# NOTE: $SNORKELDB must be set before any snorkel imports
import os

default_db_name = 'babble_' + config['domain'] + ('_debug' if config.get('debug', False) else '')
DB_NAME = config.get('db_name', default_db_name)
if 'postgres' in config and config['postgres']:
    DB_TYPE = 'postgres'
else:
    DB_TYPE = 'sqlite'
    DB_NAME += '.db'
DB_ADDR = "localhost:{0}".format(config['db_port']) if 'db_port' in config else ""
os.environ['SNORKELDB'] = '{0}://{1}/{2}'.format(DB_TYPE, DB_ADDR, DB_NAME)
print("$SNORKELDB = {0}".format(os.environ['SNORKELDB']))

$SNORKELDB = sqlite:///babble_spouse.db


In [4]:
from snorkel import SnorkelSession
session = SnorkelSession()

# Resolve config conflicts (nb_config > local_config > global_config)
from snorkel.contrib.babble.pipelines import merge_configs
config = merge_configs(config)

Overwriting domain=None to domain=spouse
Overwriting disc_model_search_space=10 to disc_model_search_space=1
Overwriting babbler_label_split=0 to babbler_label_split=1
Overwriting init_class_prior=0 to init_class_prior=-1.15
Overwriting reg_param=0.1 to reg_param=0.5
Overwriting decay=0.95 to decay=0.99
Overwriting babbler_candidate_split=0 to babbler_candidate_split=1
Overwriting supervision=generative to supervision=majority_vote
Overwriting gen_model_search_space=10 to gen_model_search_space=1


In [5]:
from snorkel.models import candidate_subclass
from tutorials.babble.spouse import SpousePipeline

Spouse = candidate_subclass('Spouse', ['person1', 'person2'])
candidate_class = Spouse
pipe = SpousePipeline(session, Spouse, config)

### Initialization

In [6]:
# %time pipe.parse()

In [7]:
# %time pipe.extract()

In [8]:
# %time pipe.load_gold()

### Pre-load Explanations (10) + User Lists (4)

In [9]:
from snorkel.contrib.babble import BabbleStream
bs = BabbleStream(session, candidate_class=Spouse, balanced=True, shuffled=True, seed=1234)

In [10]:
from tutorials.babble.spouse.spouse_examples import get_explanations, get_user_lists

candidates = session.query(Spouse).filter(Spouse.split == 0).all()
spouse_explanations = get_explanations(candidates)
# spouse_explanations = []
spouse_user_lists = get_user_lists()

Building list of target candidate ids...
Collected 10 unique target candidate ids from 10 explanations.
Gathering desired candidates...
Found 10/10 desired candidates
Linking explanations to candidates...
Linked 10/10 explanations


In [11]:
bs.preload(explanations=spouse_explanations, user_lists=spouse_user_lists)

Created grammar with 499 rules
Flushing all parses from previous explanation set.
All previously uncommitted parses have been flushed.
10 explanation(s) out of 10 were parseable.
29 parse(s) generated from 10 explanation(s).
19 parse(s) remain (10 parse(s) removed by DuplicateSemanticsFilter).
12 parse(s) remain (7 parse(s) removed by ConsistencyFilter).
Applying labeling functions to split 1

12 parse(s) remain (0 parse(s) removed by UniformSignatureFilter: (0 None, 0 All)).
10 parse(s) remain (2 parse(s) removed by DuplicateSignatureFilter).
Added 10 parse(s) to set. (Total # parses = 10)


# Start Demo:

### View user_lists

In [None]:
user_lists = bs.user_lists
for alias, values in user_lists.items():
    if len(values) <= 10:
        print("{}:\n {}\n".format(alias, values))
    else:
        print("{}:\n {}...\n".format(alias, list(values)[:10]))

Optionally add another user_list.

In [None]:
ALIAS = None    # e.g., 'marriage_words'
VALUES = []    # e.g., ['engaged', 'betrothed', 'proposed']

if ALIAS:
    bs.add_user_lists({ALIAS: VALUES})

### View a candidate

In [None]:
c = bs.next()
from snorkel.viewer import SentenceNgramViewer
sv = SentenceNgramViewer([c], session, n_per_page=1, height=max(len(c.get_parent().words)*2, 80))
sv

### Give an explanation

(See MTurk instructions for examples)

In [None]:
LABEL = True
CONDITION = "there are no people between the arg 1 and arg 2 and 'husband' is immediately to the left of arg 2"
# CONDITION = "'announcing' is less than six words to the left of arg 2"
# CONDITION = "'syndrome' occurs to within three words to the right of arg 2"

With the user input and the current candidate, we make an Explanation object.

In [None]:
from snorkel.contrib.babble import Explanation
explanation = Explanation(CONDITION, LABEL, candidate=c)

### Parse and view labeling stats

In [None]:
%time parse_list, filtered_parses, conf_matrix_list, stats_list = bs.apply(explanation)

In [None]:
bs.filtered_analysis(filtered_parses)

In [None]:
PARSE_IDX = 0
parse = parse_list[PARSE_IDX]
conf_matrix = conf_matrix_list[PARSE_IDX]
stats = stats_list[PARSE_IDX]

print("Parse {}:\n{}\n".format(PARSE_IDX, bs.semparser.grammar.translate(parse.semantics)))
print(stats.accuracy)
print(stats.class_coverage)
print(stats.coverage)

### View labeled candidates

Select the subset of labeled candidates you would like to view.

In [None]:
SUBSET = 'correct' # Must be one of ['correct', 'incorrect', 'abstained']

subset = getattr(conf_matrix_list[0], SUBSET)
def candidate_generator(subset):
    for c in subset: 
        yield c
subset_generator = candidate_generator(subset)

In [None]:
from snorkel.viewer import SentenceNgramViewer
c = subset_generator.next()
sv = SentenceNgramViewer([c], session, n_per_page=3, height=max(len(c.get_parent().words)*2, 80))
sv

### Commit parses

If you are satisfied with the given parses, commit them.

In [None]:
bs.commit()

### View global stats

In [None]:
bs.get_global_coverage()

In [None]:
bs.get_lf_stats()

In [None]:
bs.get_parses()

In [None]:
bs.get_lfs()

In [None]:
bs.get_explanations()

## REPEAT (go back to "START DEMO")

When you have entered all of the explanations that you would like to, run these final cells.

In [None]:
# %time pipe.supervise()

In [None]:
# %time pipe.classify()