In [1]:
import logging
import os
import pickle

import matplotlib.pyplot as plt
import numpy as np
from fonduer import Meta
from fonduer.candidates import CandidateExtractor, MentionExtractor, MentionNgrams
from fonduer.candidates.models import (
    Candidate,
    Mention,
    candidate_subclass,
    mention_subclass,
)

from fonduer.features import Featurizer
from fonduer.learning import SparseLogisticRegression
from fonduer.parser.models import Document, Figure, Paragraph, Section, Sentence
from fonduer.supervision import Labeler
from metal import analysis
from metal.label_model import LabelModel

from hack.transistors.transistor_lfs import (
    TRUE,
    ce_v_max_lfs,
    polarity_lfs,
    stg_temp_max_lfs,
    stg_temp_min_lfs,
)
from hack.transistors.transistor_matchers import get_matcher
from hack.transistors.transistor_spaces import (
    MentionNgramsPart,
    MentionNgramsTemp,
    MentionNgramsVolt,
)
from hack.transistors.transistor_throttlers import (
    ce_v_max_filter,
    polarity_filter,
    stg_temp_filter,
)
from hack.transistors.transistor_utils import (
    Score,
    entity_level_scores,
    load_transistor_labels,
)
from hack.utils import parse_dataset

In [2]:
# Use the first set of GPUs
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Configure logging for Hack
logging.basicConfig(
    format="[%(asctime)s][%(levelname)s] %(name)s:%(lineno)s - %(message)s",
    level=logging.INFO,
    handlers=[
        logging.FileHandler(
            os.path.join(os.path.abspath(''), f"transistors.log")
        ),
        logging.StreamHandler(),
    ],
)
logger = logging.getLogger(__name__)

In [3]:
def parsing(session, first_time=True, parallel=1, max_docs=float("inf")):
#     dirname = os.path.dirname(__file__)
    dirname = os.path.abspath('')
    logger.debug(f"Starting parsing...")
    docs, train_docs, dev_docs, test_docs = parse_dataset(
        session, dirname, first_time=first_time, parallel=parallel, max_docs=max_docs
    )
    logger.debug(f"Done")

    logger.info(f"# of train Documents: {len(train_docs)}")
    logger.info(f"# of dev Documents: {len(dev_docs)}")
    logger.info(f"# of test Documents: {len(test_docs)}")

    logger.info(f"Documents: {session.query(Document).count()}")
    logger.info(f"Sections: {session.query(Section).count()}")
    logger.info(f"Paragraphs: {session.query(Paragraph).count()}")
    logger.info(f"Sentences: {session.query(Sentence).count()}")
    logger.info(f"Figures: {session.query(Figure).count()}")

    return docs, train_docs, dev_docs, test_docs

In [4]:
def mention_extraction(
    session,
    docs,
    first_time=True,
    part=True,
    stg_temp_min=True,
    stg_temp_max=True,
    polarity=True,
    ce_v_max=True,
    parallel=1,
):
    Part = mention_subclass("Part")
    part_matcher = get_matcher("part")
    part_ngrams = MentionNgramsPart(parts_by_doc=None, n_max=3)

    StgTempMin = mention_subclass("StgTempMin")
    stg_temp_min_matcher = get_matcher("stg_temp_min")
    stg_temp_min_ngrams = MentionNgramsTemp(n_max=2)

    StgTempMax = mention_subclass("StgTempMax")
    stg_temp_max_matcher = get_matcher("stg_temp_max")
    stg_temp_max_ngrams = MentionNgramsTemp(n_max=2)

    Polarity = mention_subclass("Polarity")
    polarity_matcher = get_matcher("polarity")
    polarity_ngrams = MentionNgrams(n_max=1)

    CeVMax = mention_subclass("CeVMax")
    ce_v_max_matcher = get_matcher("ce_v_max")
    ce_v_max_ngrams = MentionNgramsVolt(n_max=1)

    mentions = []
    ngrams = []
    matchers = []

    # Only do those that are incrementally enabled
    if part:
        mentions.append(Part)
        ngrams.append(part_ngrams)
        matchers.append(part_matcher)

    if stg_temp_min:
        mentions.append(StgTempMin)
        ngrams.append(stg_temp_min_ngrams)
        matchers.append(stg_temp_min_matcher)

    if stg_temp_max:
        mentions.append(StgTempMax)
        ngrams.append(stg_temp_max_ngrams)
        matchers.append(stg_temp_max_matcher)

    if polarity:
        mentions.append(Polarity)
        ngrams.append(polarity_ngrams)
        matchers.append(polarity_matcher)

    if ce_v_max:
        mentions.append(CeVMax)
        ngrams.append(ce_v_max_ngrams)
        matchers.append(ce_v_max_matcher)

    mention_extractor = MentionExtractor(session, mentions, ngrams, matchers)

    if first_time:
        mention_extractor.apply(docs, parallelism=parallel)

    logger.info(f"Total Mentions: {session.query(Mention).count()}")
    logger.info(f"Total Part: {session.query(Part).count()}")
    logger.info(f"Total StgTempMin: {session.query(StgTempMin).count()}")
    logger.info(f"Total StgTempMax: {session.query(StgTempMax).count()}")
    logger.info(f"Total Polarity: {session.query(Polarity).count()}")
    logger.info(f"Total CeVMax: {session.query(CeVMax).count()}")
    return Part, StgTempMin, StgTempMax, Polarity, CeVMax

In [5]:
def candidate_extraction(
    session,
    Part,
    StgTempMin,
    StgTempMax,
    Polarity,
    CeVMax,
    train_docs,
    dev_docs,
    test_docs,
    stg_temp_min=True,
    stg_temp_max=True,
    polarity=True,
    ce_v_max=True,
    first_time=True,
    parallel=1,
):
    PartStgTempMin = candidate_subclass("PartStgTempMin", [Part, StgTempMin])
    stg_temp_min_throttler = stg_temp_filter

    PartStgTempMax = candidate_subclass("PartStgTempMax", [Part, StgTempMax])
    stg_temp_max_throttler = stg_temp_filter

    PartPolarity = candidate_subclass("PartPolarity", [Part, Polarity])
    polarity_throttler = polarity_filter

    PartCeVMax = candidate_subclass("PartCeVMax", [Part, CeVMax])
    ce_v_max_throttler = ce_v_max_filter

    cands = []
    throttlers = []
    if stg_temp_min:
        cands.append(PartStgTempMin)
        throttlers.append(stg_temp_min_throttler)

    if stg_temp_max:
        cands.append(PartStgTempMax)
        throttlers.append(stg_temp_max_throttler)

    if polarity:
        cands.append(PartPolarity)
        throttlers.append(polarity_throttler)

    if ce_v_max:
        cands.append(PartCeVMax)
        throttlers.append(ce_v_max_throttler)

    candidate_extractor = CandidateExtractor(session, cands, throttlers=throttlers)

    if first_time:
        for i, docs in enumerate([train_docs, dev_docs, test_docs]):
            candidate_extractor.apply(docs, split=i, parallelism=parallel)
            num_cands = session.query(Candidate).filter(Candidate.split == i).count()
            logger.info(f"Candidates in split={i}: {num_cands}")

    train_cands = candidate_extractor.get_candidates(split=0)
    dev_cands = candidate_extractor.get_candidates(split=1)
    test_cands = candidate_extractor.get_candidates(split=2)

    logger.info(f"Total train candidate: {len(train_cands[0])}")
    logger.info(f"Total dev candidate: {len(dev_cands[0])}")
    logger.info(f"Total test candidate: {len(test_cands[0])}")

    return (
        PartStgTempMin,
        PartStgTempMax,
        PartPolarity,
        PartCeVMax,
        train_cands,
        dev_cands,
        test_cands,
    )

In [6]:
def featurization(
    session,
    train_cands,
    dev_cands,
    test_cands,
    PartStgTempMin,
    PartStgTempMax,
    PartPolarity,
    PartCeVMax,
    stg_temp_min=True,
    stg_temp_max=True,
    polarity=True,
    ce_v_max=True,
    first_time=True,
    parallel=1,
):
#     dirname = os.path.dirname(__file__)
    dirname = os.path.abspath('')
    cands = []
    if stg_temp_min:
        cands.append(PartStgTempMin)

    if stg_temp_max:
        cands.append(PartStgTempMax)

    if polarity:
        cands.append(PartPolarity)

    if ce_v_max:
        cands.append(PartCeVMax)

    featurizer = Featurizer(session, cands)
    if first_time:
        logger.info("Starting featurizer...")
        featurizer.apply(split=0, train=True, parallelism=parallel)
        featurizer.apply(split=1, parallelism=parallel)
        featurizer.apply(split=2, parallelism=parallel)
        logger.info("Done")

    logger.info("Getting feature matrices...")
    # Serialize feature matrices on first run
    if first_time:
        F_train = featurizer.get_feature_matrices(train_cands)
        F_dev = featurizer.get_feature_matrices(dev_cands)
        F_test = featurizer.get_feature_matrices(test_cands)
        pickle.dump(F_train, open(os.path.join(dirname, "F_train.pkl"), "wb"))
        pickle.dump(F_dev, open(os.path.join(dirname, "F_dev.pkl"), "wb"))
        pickle.dump(F_test, open(os.path.join(dirname, "F_test.pkl"), "wb"))
    else:
        F_train = pickle.load(open(os.path.join(dirname, "F_train.pkl"), "rb"))
        F_dev = pickle.load(open(os.path.join(dirname, "F_dev.pkl"), "rb"))
        F_test = pickle.load(open(os.path.join(dirname, "F_test.pkl"), "rb"))
    logger.info("Done.")

    for i, cand in enumerate(cands):
        logger.info(f"{cand} Train shape: {F_train[i].shape}")
        logger.info(f"{cand} Test shape: {F_test[i].shape}")
        logger.info(f"{cand} Dev shape: {F_dev[i].shape}")

    return F_train, F_dev, F_test

In [7]:
def load_labels(session, relation, cand, first_time=True):
    if first_time:
        logger.info(f"Loading gold labels for {relation.value}")
        load_transistor_labels(session, [cand], [relation.value], annotator_name="gold")

In [8]:
def labeling(session, cands, cand_classes, lfs, split=1, train=False, first_time=True, parallel=1):
    labeler = Labeler(session, cand_classes)
#     if relation == Relation.STG_TEMP_MIN:
#         lfs = stg_temp_min_lfs
#     elif relation == Relation.STG_TEMP_MAX:
#         lfs = stg_temp_max_lfs
#     elif relation == Relation.POLARITY:
#         lfs = polarity_lfs
#     elif relation == Relation.CE_V_MAX:
#         lfs = ce_v_max_lfs
#     else:
#         raise ValueError(f"Invalid Relation: {relation}")

#     lfs = ce_v_max_lfs
    
    if first_time:
        logger.info("Applying LFs...")
        labeler.apply(split=split, lfs=lfs, train=train, parallelism=parallel)
        logger.info("Done...")

    logger.info("Getting label matrices...")
    L_mat = labeler.get_label_matrices(cands)
    L_gold = labeler.get_gold_labels(cands, annotator="gold")
    logger.info("Done.")
    logger.info(f"L_mat shape: {L_mat[0].shape}")
    logger.info(f"L_gold shape: {L_gold[0].shape}")

#     if train:
#         try:
#             df = analysis.lf_summary(
#                 L_mat[0],
#                 lf_names=labeler.get_keys(),
#                 Y=L_gold[0].todense().reshape(-1).tolist()[0],
#             )
#             logger.info(f"\n{df.to_string()}")
#         except Exception:
#             import pdb

#             pdb.set_trace()

    return L_mat, L_gold

In [9]:
def generative_model(L_train, n_epochs=500, print_every=100):
    model = LabelModel(k=2)

    logger.info("Training generative model...")
    model.train_model(L_train, n_epochs=n_epochs, print_every=print_every)
    logger.info("Done.")

    marginals = model.predict_proba(L_train)
#     plt.hist(marginals[:, TRUE - 1], bins=20)
#     plt.savefig(f"{relation.value}_marginals.pdf")
    return marginals

In [10]:
def discriminative_model(train_cands, F_train, marginals, n_epochs=50, lr=0.001):
    disc_model = SparseLogisticRegression()

    logger.info("Training discriminative model...")
    disc_model.train(
        (train_cands, F_train),
        marginals,
        n_epochs=n_epochs,
        lr=lr,
        host_device="GPU",
    )
    logger.info("Done.")

    return disc_model

In [29]:
def scoring(relation, disc_model, test_cands, test_docs, F_test, parts_by_doc, num=100):
    logger.info("Calculating the best F1 score and threshold (b)...")

    # Iterate over a range of `b` values in order to find the b with the
    # highest F1 score. We are using cardinality==2. See fonduer/classifier.py.
    Y_prob = disc_model.marginals((test_cands, F_test))

    # Get prediction for a particular b, store the full tuple to output
    # (b, pref, rec, f1, TP, FP, FN)
    best_result = Score(0, 0, 0, [], [], [])
    best_b = 0
    for b in np.linspace(0, 1, num=num):
        try:
            test_score = np.array(
                [TRUE if p[TRUE - 1] > b else 3 - TRUE for p in Y_prob]
            )
            true_pred = [
                test_cands[_] for _ in np.nditer(np.where(test_score == TRUE))
            ]
            result = entity_level_scores(
                true_pred,
                attribute=relation,
                corpus=test_docs,
                parts_by_doc=parts_by_doc,
            )
            logger.info(f"b = {b}, f1 = {result.f1}")
            if result.f1 > best_result.f1:
                best_result = result
                best_b = b
        except Exception as e:
            logger.debug(f"{e}, skipping.")
            break

    logger.info("===================================================")
    logger.info(f"Scoring on Entity-Level Gold Data with b={best_b}")
    logger.info("===================================================")
    logger.info(f"Corpus Precision {best_result.prec:.3f}")
    logger.info(f"Corpus Recall    {best_result.rec:.3f}")
    logger.info(f"Corpus F1        {best_result.f1:.3f}")
    logger.info("---------------------------------------------------")
    logger.info(
        f"TP: {len(best_result.TP)} "
        f"| FP: {len(best_result.FP)} "
        f"| FN: {len(best_result.FN)}"
    )
    logger.info("===================================================\n")
    return best_result, best_b

In [12]:
# See https://docs.python.org/3/library/os.html#os.cpu_count
parallel = 16  # len(os.sched_getaffinity(0)) // 4
component = "transistors"
first_time = True
max_docs = 500
conn_string = f"postgresql:///{component}_all_relations"
logger.info(f"\n\n")
logger.info(f"=" * 80)
logger.info(f"Small transistors with parallel: {parallel}, max_docs: {max_docs}")

[2019-03-07 21:37:13,767][INFO] __main__:7 - 


[2019-03-07 21:37:13,771][INFO] __main__:9 - Small transistors with parallel: 16, max_docs: 500


In [13]:
session = Meta.init(conn_string).Session()
docs, train_docs, dev_docs, test_docs = parsing(
    session, first_time=True, parallel=parallel, max_docs=max_docs
)

[2019-03-07 21:37:13,916][INFO] fonduer.meta:86 - Connecting user:None to None:None/transistors_all_relations
[2019-03-07 21:37:13,921][INFO] fonduer.meta:110 - Initializing the storage schema
[2019-03-07 21:37:14,401][INFO] hack.utils.utils:41 - Parsing dev...
[2019-03-07 21:37:28,486][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=120), HTML(value='')))

[2019-03-07 21:40:43,912][INFO] hack.utils.utils:41 - Parsing test...





[2019-03-07 21:40:53,659][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

[2019-03-07 21:43:04,249][INFO] hack.utils.utils:41 - Parsing train...





[2019-03-07 21:44:19,789][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))




[2019-03-07 22:00:06,349][INFO] __main__:10 - # of train Documents: 500
[2019-03-07 22:00:06,352][INFO] __main__:11 - # of dev Documents: 120
[2019-03-07 22:00:06,353][INFO] __main__:12 - # of test Documents: 75
[2019-03-07 22:00:06,364][INFO] __main__:14 - Documents: 695
[2019-03-07 22:00:06,374][INFO] __main__:15 - Sections: 695
[2019-03-07 22:00:07,367][INFO] __main__:16 - Paragraphs: 392426
[2019-03-07 22:00:08,550][INFO] __main__:17 - Sentences: 406080
[2019-03-07 22:00:09,715][INFO] __main__:18 - Figures: 19463


In [14]:
Part, StgTempMin, StgTempMax, Polarity, CeVMax = mention_extraction(
    session, docs, first_time=True, parallel=parallel
)

[2019-03-07 22:00:10,214][INFO] fonduer.candidates.mentions:460 - Clearing table: part
[2019-03-07 22:00:10,259][INFO] fonduer.candidates.mentions:460 - Clearing table: stg_temp_min
[2019-03-07 22:00:10,264][INFO] fonduer.candidates.mentions:460 - Clearing table: stg_temp_max
[2019-03-07 22:00:10,269][INFO] fonduer.candidates.mentions:460 - Clearing table: polarity
[2019-03-07 22:00:10,274][INFO] fonduer.candidates.mentions:460 - Clearing table: ce_v_max
[2019-03-07 22:00:10,277][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=695), HTML(value='')))

[2019-03-07 22:03:05,270][INFO] __main__:67 - Total Mentions: 26609
[2019-03-07 22:03:05,289][INFO] __main__:68 - Total Part: 13308
[2019-03-07 22:03:05,300][INFO] __main__:69 - Total StgTempMin: 2025
[2019-03-07 22:03:05,313][INFO] __main__:70 - Total StgTempMax: 7691
[2019-03-07 22:03:05,323][INFO] __main__:71 - Total Polarity: 2624
[2019-03-07 22:03:05,332][INFO] __main__:72 - Total CeVMax: 961





In [15]:
(
    PartStgTempMin,
    PartStgTempMax,
    PartPolarity,
    PartCeVMax,
    train_cands,
    dev_cands,
    test_cands,
) = candidate_extraction(
    session,
    Part,
    StgTempMin,
    StgTempMax,
    Polarity,
    CeVMax,
    train_docs,
    dev_docs,
    test_docs,
    first_time=True,
    parallel=parallel,
)

[2019-03-07 22:03:05,446][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_min (split 0)
[2019-03-07 22:03:05,510][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_max (split 0)
[2019-03-07 22:03:05,513][INFO] fonduer.candidates.candidates:125 - Clearing table part_polarity (split 0)
[2019-03-07 22:03:05,516][INFO] fonduer.candidates.candidates:125 - Clearing table part_ce_v_max (split 0)
[2019-03-07 22:03:05,519][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

[2019-03-07 22:10:38,116][INFO] __main__:54 - Candidates in split=0: 212493
[2019-03-07 22:10:38,117][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_min (split 1)
[2019-03-07 22:10:38,122][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_max (split 1)
[2019-03-07 22:10:38,130][INFO] fonduer.candidates.candidates:125 - Clearing table part_polarity (split 1)
[2019-03-07 22:10:38,136][INFO] fonduer.candidates.candidates:125 - Clearing table part_ce_v_max (split 1)
[2019-03-07 22:10:38,141][INFO] fonduer.utils.udf:57 - Running UDF...





HBox(children=(IntProgress(value=0, max=120), HTML(value='')))

[2019-03-07 22:11:46,118][INFO] __main__:54 - Candidates in split=1: 53942
[2019-03-07 22:11:46,120][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_min (split 2)
[2019-03-07 22:11:46,125][INFO] fonduer.candidates.candidates:125 - Clearing table part_stg_temp_max (split 2)
[2019-03-07 22:11:46,130][INFO] fonduer.candidates.candidates:125 - Clearing table part_polarity (split 2)
[2019-03-07 22:11:46,136][INFO] fonduer.candidates.candidates:125 - Clearing table part_ce_v_max (split 2)
[2019-03-07 22:11:46,140][INFO] fonduer.utils.udf:57 - Running UDF...





HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

[2019-03-07 22:11:58,493][INFO] __main__:54 - Candidates in split=2: 5210





[2019-03-07 22:12:07,740][INFO] __main__:60 - Total train candidate: 10829
[2019-03-07 22:12:07,742][INFO] __main__:61 - Total dev candidate: 7272
[2019-03-07 22:12:07,743][INFO] __main__:62 - Total test candidate: 566


In [19]:
from hack.transistors.transistor_utils import (
    Score,
    entity_level_scores,
    entity_to_candidates,
)

from pprint import pformat

In [20]:
import pickle
pickle_file = 'data/parts_by_doc_new.pkl'
with open(pickle_file, 'rb') as f:
    parts_by_doc = pickle.load(f)

In [21]:
# First, check total recall

for i, name in enumerate(['stg_temp_min', 'stg_temp_max', 'polarity', 'ce_v_max']):
    logger.info(name)
    result = entity_level_scores(dev_cands[i], corpus=dev_docs, attribute=name, parts_by_doc=parts_by_doc)
    logger.info(f"Gain Total Dev Recall: {result.rec:.3f}")
    # logger.info(f"\n{pformat(result.FN)}")
    result = entity_level_scores(test_cands[i], corpus=test_docs, attribute=name, parts_by_doc=parts_by_doc)
    logger.info(f"Gain Total Test Recall: {result.rec:.3f}")
    # logger.info(f"\n{pformat(result.FN)}")


[2019-03-07 22:15:09,290][INFO] __main__:4 - stg_temp_min


HBox(children=(IntProgress(value=0, max=7272), HTML(value='')))

KeyError: 'BC550'

In [22]:
F_train, F_dev, F_test = featurization(
    session,
    train_cands,
    dev_cands,
    test_cands,
    PartStgTempMin,
    PartStgTempMax,
    PartPolarity,
    PartCeVMax,
    first_time=first_time,
    parallel=parallel,
)

[2019-03-07 22:16:11,959][INFO] __main__:34 - Starting featurizer...
[2019-03-07 22:16:21,269][INFO] fonduer.features.featurizer:190 - Clearing Features (split 0)
[2019-03-07 22:16:21,279][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=457), HTML(value='')))

[2019-03-07 23:43:04,366][INFO] fonduer.features.featurizer:190 - Clearing Features (split 1)
[2019-03-07 23:43:05,018][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=114), HTML(value='')))

[2019-03-07 23:45:09,093][INFO] fonduer.features.featurizer:190 - Clearing Features (split 2)
[2019-03-07 23:45:09,133][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=75), HTML(value='')))

[2019-03-07 23:45:47,999][INFO] __main__:38 - Done
[2019-03-07 23:45:48,002][INFO] __main__:40 - Getting feature matrices...
[2019-03-07 23:56:08,506][INFO] __main__:53 - Done.
[2019-03-07 23:56:08,509][INFO] __main__:56 - <class 'fonduer.candidates.models.candidate.PartStgTempMin'> Train shape: (10829, 55127)
[2019-03-07 23:56:08,511][INFO] __main__:57 - <class 'fonduer.candidates.models.candidate.PartStgTempMin'> Test shape: (566, 55127)
[2019-03-07 23:56:08,512][INFO] __main__:58 - <class 'fonduer.candidates.models.candidate.PartStgTempMin'> Dev shape: (7272, 55127)
[2019-03-07 23:56:08,513][INFO] __main__:56 - <class 'fonduer.candidates.models.candidate.PartStgTempMax'> Train shape: (110738, 55127)
[2019-03-07 23:56:08,514][INFO] __main__:57 - <class 'fonduer.candidates.models.candidate.PartStgTempMax'> Test shape: (1848, 55127)
[2019-03-07 23:56:08,516][INFO] __main__:58 - <class 'fonduer.candidates.models.candidate.PartStgTempMax'> Dev shape: (27473, 55127)
[2019-03-07 23:56:08,5

In [25]:
# load_labels(session, relation, PartCeVMax, first_time=first_time)
logger.info("Labeling training data...")
L_train, L_gold_train = labeling(
    session, train_cands, \
    [PartStgTempMin, PartStgTempMax, PartPolarity, PartCeVMax], \
    [stg_temp_min_lfs, stg_temp_max_lfs, polarity_lfs, ce_v_max_lfs], \
    split=0, train=True, parallel=parallel, first_time=True
)
logger.info("Done.")

[2019-03-08 00:58:09,036][INFO] __main__:2 - Labeling training data...
[2019-03-08 00:58:09,039][INFO] __main__:17 - Applying LFs...
[2019-03-08 00:58:15,015][INFO] fonduer.supervision.labeler:219 - Clearing Labels (split 0)
[2019-03-08 00:58:16,688][INFO] fonduer.utils.udf:57 - Running UDF...


HBox(children=(IntProgress(value=0, max=457), HTML(value='')))

[2019-03-08 01:32:21,876][INFO] __main__:19 - Done...
[2019-03-08 01:32:21,893][INFO] __main__:21 - Getting label matrices...
[2019-03-08 01:40:27,461][INFO] __main__:24 - Done.
[2019-03-08 01:40:27,463][INFO] __main__:25 - L_mat shape: (10829, 44)
[2019-03-08 01:40:27,465][INFO] __main__:26 - L_gold shape: (10829, 1)
[2019-03-08 01:40:27,467][INFO] __main__:9 - Done.


In [31]:
marginals = generative_model(L_train[0])
disc_model1 = discriminative_model(train_cands[0], F_train[0], marginals, n_epochs=100)

relation = 'stg_temp_min'
best_result, best_b = scoring(
    relation, disc_model1, test_cands[0], test_docs, F_test[0], parts_by_doc, num=100
)

[2019-03-08 02:15:50,564][INFO] __main__:4 - Training generative model...


Computing O...
Estimating \mu...
[E:0]	Train Loss: 3.392
[E:100]	Train Loss: 0.003
[E:200]	Train Loss: 0.003
[E:300]	Train Loss: 0.003
[E:400]	Train Loss: 0.003


[2019-03-08 02:15:53,068][INFO] __main__:6 - Done.
[2019-03-08 02:15:53,095][INFO] __main__:4 - Training discriminative model...
[2019-03-08 02:15:53,099][INFO] fonduer.learning.classifier:142 - Loading default parameters for Sparse Logistic Regression
[2019-03-08 02:15:53,130][INFO] fonduer.learning.classifier:193 - Using GPU...
[2019-03-08 02:15:53,132][INFO] fonduer.learning.classifier:195 - Settings: {'n_epochs': 100, 'lr': 0.001, 'batch_size': 256, 'shuffle': True, 'seed': 1234, 'host_device': 'GPU', 'bias': False, 'input_dim': 55128}
[2019-03-08 02:15:53,144][INFO] fonduer.learning.classifier:213 - [SparseLogisticRegression] Training model
[2019-03-08 02:15:53,146][INFO] fonduer.learning.classifier:215 - [SparseLogisticRegression] n_train=7389 #epochs=100 batch size=256


[E:499]	Train Loss: 0.003
Finished Training


[2019-03-08 02:15:54,406][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 1 (1.26s)	Average loss=0.223570
[2019-03-08 02:15:58,994][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 5 (5.85s)	Average loss=0.061675
[2019-03-08 02:16:04,189][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 10 (11.04s)	Average loss=0.056004
[2019-03-08 02:16:09,478][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 15 (16.33s)	Average loss=0.054554
[2019-03-08 02:16:14,778][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 20 (21.63s)	Average loss=0.054032
[2019-03-08 02:16:19,877][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 25 (26.73s)	Average loss=0.053799
[2019-03-08 02:16:25,413][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 30 (32.27s)	Average loss=0.053634
[2019-03-08 02:16:30,928][INFO] fonduer.learning.classifier:262 - [Sparse

HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:44,838][INFO] __main__:26 - b = 0.0, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:44,983][INFO] __main__:26 - b = 0.010101010101010102, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,126][INFO] __main__:26 - b = 0.020202020202020204, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,269][INFO] __main__:26 - b = 0.030303030303030304, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,411][INFO] __main__:26 - b = 0.04040404040404041, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,555][INFO] __main__:26 - b = 0.05050505050505051, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,697][INFO] __main__:26 - b = 0.06060606060606061, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,839][INFO] __main__:26 - b = 0.07070707070707072, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:45,982][INFO] __main__:26 - b = 0.08080808080808081, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,123][INFO] __main__:26 - b = 0.09090909090909091, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,265][INFO] __main__:26 - b = 0.10101010101010102, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,407][INFO] __main__:26 - b = 0.11111111111111112, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,549][INFO] __main__:26 - b = 0.12121212121212122, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,695][INFO] __main__:26 - b = 0.13131313131313133, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=566), HTML(value='')))

[2019-03-08 02:17:46,837][INFO] __main__:26 - b = 0.14141414141414144, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=565), HTML(value='')))

[2019-03-08 02:17:46,980][INFO] __main__:26 - b = 0.15151515151515152, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=565), HTML(value='')))

[2019-03-08 02:17:47,123][INFO] __main__:26 - b = 0.16161616161616163, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=564), HTML(value='')))

[2019-03-08 02:17:47,265][INFO] __main__:26 - b = 0.17171717171717174, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=564), HTML(value='')))

[2019-03-08 02:17:47,407][INFO] __main__:26 - b = 0.18181818181818182, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=564), HTML(value='')))

[2019-03-08 02:17:47,553][INFO] __main__:26 - b = 0.19191919191919193, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

[2019-03-08 02:17:47,695][INFO] __main__:26 - b = 0.20202020202020204, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=563), HTML(value='')))

[2019-03-08 02:17:47,838][INFO] __main__:26 - b = 0.21212121212121213, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=562), HTML(value='')))

[2019-03-08 02:17:47,980][INFO] __main__:26 - b = 0.22222222222222224, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=562), HTML(value='')))

[2019-03-08 02:17:48,123][INFO] __main__:26 - b = 0.23232323232323235, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=561), HTML(value='')))

[2019-03-08 02:17:48,266][INFO] __main__:26 - b = 0.24242424242424243, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=559), HTML(value='')))

[2019-03-08 02:17:48,410][INFO] __main__:26 - b = 0.25252525252525254, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=559), HTML(value='')))

[2019-03-08 02:17:48,555][INFO] __main__:26 - b = 0.26262626262626265, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=559), HTML(value='')))

[2019-03-08 02:17:48,697][INFO] __main__:26 - b = 0.27272727272727276, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=559), HTML(value='')))

[2019-03-08 02:17:48,842][INFO] __main__:26 - b = 0.2828282828282829, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=558), HTML(value='')))

[2019-03-08 02:17:48,994][INFO] __main__:26 - b = 0.29292929292929293, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=558), HTML(value='')))

[2019-03-08 02:17:49,138][INFO] __main__:26 - b = 0.30303030303030304, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=558), HTML(value='')))

[2019-03-08 02:17:49,288][INFO] __main__:26 - b = 0.31313131313131315, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=558), HTML(value='')))

[2019-03-08 02:17:49,433][INFO] __main__:26 - b = 0.32323232323232326, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=558), HTML(value='')))

[2019-03-08 02:17:49,576][INFO] __main__:26 - b = 0.33333333333333337, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=557), HTML(value='')))

[2019-03-08 02:17:49,719][INFO] __main__:26 - b = 0.3434343434343435, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=555), HTML(value='')))

[2019-03-08 02:17:49,863][INFO] __main__:26 - b = 0.3535353535353536, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=554), HTML(value='')))

[2019-03-08 02:17:50,004][INFO] __main__:26 - b = 0.36363636363636365, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=553), HTML(value='')))

[2019-03-08 02:17:50,148][INFO] __main__:26 - b = 0.37373737373737376, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=552), HTML(value='')))

[2019-03-08 02:17:50,293][INFO] __main__:26 - b = 0.38383838383838387, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=551), HTML(value='')))

[2019-03-08 02:17:50,436][INFO] __main__:26 - b = 0.393939393939394, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=548), HTML(value='')))

[2019-03-08 02:17:50,577][INFO] __main__:26 - b = 0.4040404040404041, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=546), HTML(value='')))

[2019-03-08 02:17:50,720][INFO] __main__:26 - b = 0.4141414141414142, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=545), HTML(value='')))

[2019-03-08 02:17:50,862][INFO] __main__:26 - b = 0.42424242424242425, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=543), HTML(value='')))

[2019-03-08 02:17:51,004][INFO] __main__:26 - b = 0.43434343434343436, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=541), HTML(value='')))

[2019-03-08 02:17:51,145][INFO] __main__:26 - b = 0.4444444444444445, f1 = 0.6617647058823528


HBox(children=(IntProgress(value=0, max=539), HTML(value='')))

[2019-03-08 02:17:51,286][INFO] __main__:26 - b = 0.4545454545454546, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=539), HTML(value='')))

[2019-03-08 02:17:51,428][INFO] __main__:26 - b = 0.4646464646464647, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=538), HTML(value='')))

[2019-03-08 02:17:51,571][INFO] __main__:26 - b = 0.4747474747474748, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=538), HTML(value='')))

[2019-03-08 02:17:51,714][INFO] __main__:26 - b = 0.48484848484848486, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=538), HTML(value='')))

[2019-03-08 02:17:51,856][INFO] __main__:26 - b = 0.494949494949495, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=538), HTML(value='')))

[2019-03-08 02:17:51,998][INFO] __main__:26 - b = 0.5050505050505051, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=537), HTML(value='')))

[2019-03-08 02:17:52,140][INFO] __main__:26 - b = 0.5151515151515152, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=534), HTML(value='')))

[2019-03-08 02:17:52,282][INFO] __main__:26 - b = 0.5252525252525253, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=533), HTML(value='')))

[2019-03-08 02:17:52,425][INFO] __main__:26 - b = 0.5353535353535354, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=532), HTML(value='')))

[2019-03-08 02:17:52,566][INFO] __main__:26 - b = 0.5454545454545455, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=532), HTML(value='')))

[2019-03-08 02:17:52,711][INFO] __main__:26 - b = 0.5555555555555556, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=531), HTML(value='')))

[2019-03-08 02:17:52,854][INFO] __main__:26 - b = 0.5656565656565657, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

[2019-03-08 02:17:52,996][INFO] __main__:26 - b = 0.5757575757575758, f1 = 0.6666666666666667


HBox(children=(IntProgress(value=0, max=529), HTML(value='')))

[2019-03-08 02:17:53,137][INFO] __main__:26 - b = 0.5858585858585859, f1 = 0.6691449814126395


HBox(children=(IntProgress(value=0, max=528), HTML(value='')))

[2019-03-08 02:17:53,281][INFO] __main__:26 - b = 0.595959595959596, f1 = 0.6691449814126395


HBox(children=(IntProgress(value=0, max=526), HTML(value='')))

[2019-03-08 02:17:53,423][INFO] __main__:26 - b = 0.6060606060606061, f1 = 0.6691449814126395


HBox(children=(IntProgress(value=0, max=522), HTML(value='')))

[2019-03-08 02:17:53,564][INFO] __main__:26 - b = 0.6161616161616162, f1 = 0.6691449814126395


HBox(children=(IntProgress(value=0, max=511), HTML(value='')))

[2019-03-08 02:17:53,705][INFO] __main__:26 - b = 0.6262626262626263, f1 = 0.6691449814126395


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

[2019-03-08 02:17:53,845][INFO] __main__:26 - b = 0.6363636363636365, f1 = 0.6716417910447762


HBox(children=(IntProgress(value=0, max=497), HTML(value='')))

[2019-03-08 02:17:53,987][INFO] __main__:26 - b = 0.6464646464646465, f1 = 0.6716417910447762


HBox(children=(IntProgress(value=0, max=494), HTML(value='')))

[2019-03-08 02:17:54,128][INFO] __main__:26 - b = 0.6565656565656566, f1 = 0.6716417910447762


HBox(children=(IntProgress(value=0, max=488), HTML(value='')))

[2019-03-08 02:17:54,268][INFO] __main__:26 - b = 0.6666666666666667, f1 = 0.6716417910447762


HBox(children=(IntProgress(value=0, max=486), HTML(value='')))

[2019-03-08 02:17:54,409][INFO] __main__:26 - b = 0.6767676767676768, f1 = 0.6716417910447762


HBox(children=(IntProgress(value=0, max=481), HTML(value='')))

[2019-03-08 02:17:54,551][INFO] __main__:26 - b = 0.686868686868687, f1 = 0.6741573033707866


HBox(children=(IntProgress(value=0, max=474), HTML(value='')))

[2019-03-08 02:17:54,691][INFO] __main__:26 - b = 0.696969696969697, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=470), HTML(value='')))

[2019-03-08 02:17:54,834][INFO] __main__:26 - b = 0.7070707070707072, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=465), HTML(value='')))

[2019-03-08 02:17:54,974][INFO] __main__:26 - b = 0.7171717171717172, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=462), HTML(value='')))

[2019-03-08 02:17:55,115][INFO] __main__:26 - b = 0.7272727272727273, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=457), HTML(value='')))

[2019-03-08 02:17:55,256][INFO] __main__:26 - b = 0.7373737373737375, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=455), HTML(value='')))

[2019-03-08 02:17:55,395][INFO] __main__:26 - b = 0.7474747474747475, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=449), HTML(value='')))

[2019-03-08 02:17:55,537][INFO] __main__:26 - b = 0.7575757575757577, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=441), HTML(value='')))

[2019-03-08 02:17:55,677][INFO] __main__:26 - b = 0.7676767676767677, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=434), HTML(value='')))

[2019-03-08 02:17:55,818][INFO] __main__:26 - b = 0.7777777777777778, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=423), HTML(value='')))

[2019-03-08 02:17:55,958][INFO] __main__:26 - b = 0.787878787878788, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=390), HTML(value='')))

[2019-03-08 02:17:56,096][INFO] __main__:26 - b = 0.797979797979798, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=383), HTML(value='')))

[2019-03-08 02:17:56,234][INFO] __main__:26 - b = 0.8080808080808082, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=376), HTML(value='')))

[2019-03-08 02:17:56,373][INFO] __main__:26 - b = 0.8181818181818182, f1 = 0.6844106463878327


HBox(children=(IntProgress(value=0, max=373), HTML(value='')))

[2019-03-08 02:17:56,511][INFO] __main__:26 - b = 0.8282828282828284, f1 = 0.6896551724137931


HBox(children=(IntProgress(value=0, max=370), HTML(value='')))

[2019-03-08 02:17:56,650][INFO] __main__:26 - b = 0.8383838383838385, f1 = 0.703125


HBox(children=(IntProgress(value=0, max=366), HTML(value='')))

[2019-03-08 02:17:56,789][INFO] __main__:26 - b = 0.8484848484848485, f1 = 0.703125


HBox(children=(IntProgress(value=0, max=360), HTML(value='')))

[2019-03-08 02:17:56,925][INFO] __main__:26 - b = 0.8585858585858587, f1 = 0.703125


HBox(children=(IntProgress(value=0, max=347), HTML(value='')))

[2019-03-08 02:17:57,062][INFO] __main__:26 - b = 0.8686868686868687, f1 = 0.703125


HBox(children=(IntProgress(value=0, max=337), HTML(value='')))

[2019-03-08 02:17:57,199][INFO] __main__:26 - b = 0.8787878787878789, f1 = 0.6980392156862745


HBox(children=(IntProgress(value=0, max=330), HTML(value='')))

[2019-03-08 02:17:57,336][INFO] __main__:26 - b = 0.888888888888889, f1 = 0.6980392156862745


HBox(children=(IntProgress(value=0, max=321), HTML(value='')))

[2019-03-08 02:17:57,473][INFO] __main__:26 - b = 0.8989898989898991, f1 = 0.6980392156862745


HBox(children=(IntProgress(value=0, max=310), HTML(value='')))

[2019-03-08 02:17:57,610][INFO] __main__:26 - b = 0.9090909090909092, f1 = 0.7035573122529645


HBox(children=(IntProgress(value=0, max=284), HTML(value='')))

[2019-03-08 02:17:57,746][INFO] __main__:26 - b = 0.9191919191919192, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=277), HTML(value='')))

[2019-03-08 02:17:57,883][INFO] __main__:26 - b = 0.9292929292929294, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=276), HTML(value='')))

[2019-03-08 02:17:58,018][INFO] __main__:26 - b = 0.9393939393939394, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=276), HTML(value='')))

[2019-03-08 02:17:58,154][INFO] __main__:26 - b = 0.9494949494949496, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=276), HTML(value='')))

[2019-03-08 02:17:58,291][INFO] __main__:26 - b = 0.9595959595959597, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=269), HTML(value='')))

[2019-03-08 02:17:58,428][INFO] __main__:26 - b = 0.9696969696969697, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=252), HTML(value='')))

[2019-03-08 02:17:58,582][INFO] __main__:26 - b = 0.9797979797979799, f1 = 0.7235772357723577


HBox(children=(IntProgress(value=0, max=236), HTML(value='')))

[2019-03-08 02:17:58,716][INFO] __main__:26 - b = 0.98989898989899, f1 = 0.71900826446281
[2019-03-08 02:17:58,721][INFO] __main__:35 - Scoring on Entity-Level Gold Data with b=0.9191919191919192
[2019-03-08 02:17:58,722][INFO] __main__:37 - Corpus Precision 0.957
[2019-03-08 02:17:58,723][INFO] __main__:38 - Corpus Recall    0.582
[2019-03-08 02:17:58,723][INFO] __main__:39 - Corpus F1        0.724
[2019-03-08 02:17:58,724][INFO] __main__:40 - ---------------------------------------------------
[2019-03-08 02:17:58,725][INFO] __main__:42 - TP: 89 | FP: 4 | FN: 64



In [32]:
marginals = generative_model(L_train[1])
disc_model1 = discriminative_model(train_cands[1], F_train[1], marginals, n_epochs=100)

relation = 'stg_temp_max'
best_result, best_b = scoring(
    relation, disc_model1, test_cands[1], test_docs, F_test[1], parts_by_doc, num=100
)

[2019-03-08 02:17:58,737][INFO] __main__:4 - Training generative model...


Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.833
[E:100]	Train Loss: 0.043
[E:200]	Train Loss: 0.042
[E:300]	Train Loss: 0.042
[E:400]	Train Loss: 0.042


[2019-03-08 02:18:01,416][INFO] __main__:6 - Done.


[E:499]	Train Loss: 0.042
Finished Training


[2019-03-08 02:18:01,689][INFO] __main__:4 - Training discriminative model...
[2019-03-08 02:18:01,696][INFO] fonduer.learning.classifier:142 - Loading default parameters for Sparse Logistic Regression
[2019-03-08 02:18:04,227][INFO] fonduer.learning.classifier:193 - Using GPU...
[2019-03-08 02:18:04,229][INFO] fonduer.learning.classifier:195 - Settings: {'n_epochs': 100, 'lr': 0.001, 'batch_size': 256, 'shuffle': True, 'seed': 1234, 'host_device': 'GPU', 'bias': False, 'input_dim': 55128}
[2019-03-08 02:18:04,240][INFO] fonduer.learning.classifier:213 - [SparseLogisticRegression] Training model
[2019-03-08 02:18:04,241][INFO] fonduer.learning.classifier:215 - [SparseLogisticRegression] n_train=110738 #epochs=100 batch size=256
[2019-03-08 02:18:18,279][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 1 (14.04s)	Average loss=0.483407
[2019-03-08 02:19:12,453][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 5 (68.21s)	Average loss=0.47617

HBox(children=(IntProgress(value=0, max=1848), HTML(value='')))

[2019-03-08 02:40:42,116][INFO] __main__:26 - b = 0.0, f1 = 0.4832214765100672


HBox(children=(IntProgress(value=0, max=1792), HTML(value='')))

[2019-03-08 02:40:42,294][INFO] __main__:26 - b = 0.010101010101010102, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1792), HTML(value='')))

[2019-03-08 02:40:42,469][INFO] __main__:26 - b = 0.020202020202020204, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1780), HTML(value='')))

[2019-03-08 02:40:42,644][INFO] __main__:26 - b = 0.030303030303030304, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1772), HTML(value='')))

[2019-03-08 02:40:42,817][INFO] __main__:26 - b = 0.04040404040404041, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1747), HTML(value='')))

[2019-03-08 02:40:42,989][INFO] __main__:26 - b = 0.05050505050505051, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1717), HTML(value='')))

[2019-03-08 02:40:43,161][INFO] __main__:26 - b = 0.06060606060606061, f1 = 0.48758465011286684


HBox(children=(IntProgress(value=0, max=1700), HTML(value='')))

[2019-03-08 02:40:43,335][INFO] __main__:26 - b = 0.07070707070707072, f1 = 0.4897959183673469


HBox(children=(IntProgress(value=0, max=1685), HTML(value='')))

[2019-03-08 02:40:43,508][INFO] __main__:26 - b = 0.08080808080808081, f1 = 0.4920273348519362


HBox(children=(IntProgress(value=0, max=1674), HTML(value='')))

[2019-03-08 02:40:43,680][INFO] __main__:26 - b = 0.09090909090909091, f1 = 0.494279176201373


HBox(children=(IntProgress(value=0, max=1663), HTML(value='')))

[2019-03-08 02:40:43,852][INFO] __main__:26 - b = 0.10101010101010102, f1 = 0.4988452655889145


HBox(children=(IntProgress(value=0, max=1648), HTML(value='')))

[2019-03-08 02:40:44,024][INFO] __main__:26 - b = 0.11111111111111112, f1 = 0.5034965034965035


HBox(children=(IntProgress(value=0, max=1638), HTML(value='')))

[2019-03-08 02:40:44,195][INFO] __main__:26 - b = 0.12121212121212122, f1 = 0.5094339622641508


HBox(children=(IntProgress(value=0, max=1628), HTML(value='')))

[2019-03-08 02:40:44,366][INFO] __main__:26 - b = 0.13131313131313133, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1624), HTML(value='')))

[2019-03-08 02:40:44,536][INFO] __main__:26 - b = 0.14141414141414144, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1614), HTML(value='')))

[2019-03-08 02:40:44,705][INFO] __main__:26 - b = 0.15151515151515152, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1601), HTML(value='')))

[2019-03-08 02:40:44,877][INFO] __main__:26 - b = 0.16161616161616163, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1583), HTML(value='')))

[2019-03-08 02:40:45,047][INFO] __main__:26 - b = 0.17171717171717174, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1558), HTML(value='')))

[2019-03-08 02:40:45,217][INFO] __main__:26 - b = 0.18181818181818182, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1513), HTML(value='')))

[2019-03-08 02:40:45,385][INFO] __main__:26 - b = 0.19191919191919193, f1 = 0.513064133016627


HBox(children=(IntProgress(value=0, max=1460), HTML(value='')))

[2019-03-08 02:40:45,552][INFO] __main__:26 - b = 0.20202020202020204, f1 = 0.5230024213075061


HBox(children=(IntProgress(value=0, max=1419), HTML(value='')))

[2019-03-08 02:40:45,717][INFO] __main__:26 - b = 0.21212121212121213, f1 = 0.5255474452554745


HBox(children=(IntProgress(value=0, max=1371), HTML(value='')))

[2019-03-08 02:40:45,881][INFO] __main__:26 - b = 0.22222222222222224, f1 = 0.5281173594132029


HBox(children=(IntProgress(value=0, max=1344), HTML(value='')))

[2019-03-08 02:40:46,044][INFO] __main__:26 - b = 0.23232323232323235, f1 = 0.5320197044334976


HBox(children=(IntProgress(value=0, max=1301), HTML(value='')))

[2019-03-08 02:40:46,207][INFO] __main__:26 - b = 0.24242424242424243, f1 = 0.5320197044334976


HBox(children=(IntProgress(value=0, max=1239), HTML(value='')))

[2019-03-08 02:40:46,368][INFO] __main__:26 - b = 0.25252525252525254, f1 = 0.5373134328358209


HBox(children=(IntProgress(value=0, max=1185), HTML(value='')))

[2019-03-08 02:40:46,528][INFO] __main__:26 - b = 0.26262626262626265, f1 = 0.5482233502538071


HBox(children=(IntProgress(value=0, max=1134), HTML(value='')))

[2019-03-08 02:40:46,690][INFO] __main__:26 - b = 0.27272727272727276, f1 = 0.5669291338582677


HBox(children=(IntProgress(value=0, max=1085), HTML(value='')))

[2019-03-08 02:40:46,848][INFO] __main__:26 - b = 0.2828282828282829, f1 = 0.576


HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))

[2019-03-08 02:40:47,005][INFO] __main__:26 - b = 0.29292929292929293, f1 = 0.5901639344262295


HBox(children=(IntProgress(value=0, max=1004), HTML(value='')))

[2019-03-08 02:40:47,162][INFO] __main__:26 - b = 0.30303030303030304, f1 = 0.5950413223140496


HBox(children=(IntProgress(value=0, max=974), HTML(value='')))

[2019-03-08 02:40:47,317][INFO] __main__:26 - b = 0.31313131313131315, f1 = 0.6016713091922006


HBox(children=(IntProgress(value=0, max=935), HTML(value='')))

[2019-03-08 02:40:47,471][INFO] __main__:26 - b = 0.32323232323232326, f1 = 0.6153846153846154


HBox(children=(IntProgress(value=0, max=891), HTML(value='')))

[2019-03-08 02:40:47,625][INFO] __main__:26 - b = 0.33333333333333337, f1 = 0.6334310850439883


HBox(children=(IntProgress(value=0, max=856), HTML(value='')))

[2019-03-08 02:40:47,778][INFO] __main__:26 - b = 0.3434343434343435, f1 = 0.6506024096385543


HBox(children=(IntProgress(value=0, max=822), HTML(value='')))

[2019-03-08 02:40:47,929][INFO] __main__:26 - b = 0.3535353535353536, f1 = 0.6417445482866043


HBox(children=(IntProgress(value=0, max=794), HTML(value='')))

[2019-03-08 02:40:48,081][INFO] __main__:26 - b = 0.36363636363636365, f1 = 0.6417445482866043


HBox(children=(IntProgress(value=0, max=772), HTML(value='')))

[2019-03-08 02:40:48,233][INFO] __main__:26 - b = 0.37373737373737376, f1 = 0.64375


HBox(children=(IntProgress(value=0, max=752), HTML(value='')))

[2019-03-08 02:40:48,384][INFO] __main__:26 - b = 0.38383838383838387, f1 = 0.64375


HBox(children=(IntProgress(value=0, max=736), HTML(value='')))

[2019-03-08 02:40:48,534][INFO] __main__:26 - b = 0.393939393939394, f1 = 0.6477987421383647


HBox(children=(IntProgress(value=0, max=714), HTML(value='')))

[2019-03-08 02:40:48,684][INFO] __main__:26 - b = 0.4040404040404041, f1 = 0.6688311688311688


HBox(children=(IntProgress(value=0, max=680), HTML(value='')))

[2019-03-08 02:40:48,834][INFO] __main__:26 - b = 0.4141414141414142, f1 = 0.6754098360655737


HBox(children=(IntProgress(value=0, max=656), HTML(value='')))

[2019-03-08 02:40:48,982][INFO] __main__:26 - b = 0.42424242424242425, f1 = 0.6898954703832753


HBox(children=(IntProgress(value=0, max=636), HTML(value='')))

[2019-03-08 02:40:49,133][INFO] __main__:26 - b = 0.43434343434343436, f1 = 0.7037037037037037


HBox(children=(IntProgress(value=0, max=617), HTML(value='')))

[2019-03-08 02:40:49,281][INFO] __main__:26 - b = 0.4444444444444445, f1 = 0.7169811320754716


HBox(children=(IntProgress(value=0, max=611), HTML(value='')))

[2019-03-08 02:40:49,428][INFO] __main__:26 - b = 0.4545454545454546, f1 = 0.7196969696969697


HBox(children=(IntProgress(value=0, max=603), HTML(value='')))

[2019-03-08 02:40:49,577][INFO] __main__:26 - b = 0.4646464646464647, f1 = 0.7251908396946565


HBox(children=(IntProgress(value=0, max=596), HTML(value='')))

[2019-03-08 02:40:49,724][INFO] __main__:26 - b = 0.4747474747474748, f1 = 0.7251908396946565


HBox(children=(IntProgress(value=0, max=589), HTML(value='')))

[2019-03-08 02:40:49,872][INFO] __main__:26 - b = 0.48484848484848486, f1 = 0.7279693486590038


HBox(children=(IntProgress(value=0, max=581), HTML(value='')))

[2019-03-08 02:40:50,019][INFO] __main__:26 - b = 0.494949494949495, f1 = 0.7335907335907336


HBox(children=(IntProgress(value=0, max=578), HTML(value='')))

[2019-03-08 02:40:50,166][INFO] __main__:26 - b = 0.5050505050505051, f1 = 0.7364341085271319


HBox(children=(IntProgress(value=0, max=576), HTML(value='')))

[2019-03-08 02:40:50,314][INFO] __main__:26 - b = 0.5151515151515152, f1 = 0.7364341085271319


HBox(children=(IntProgress(value=0, max=565), HTML(value='')))

[2019-03-08 02:40:50,460][INFO] __main__:26 - b = 0.5252525252525253, f1 = 0.7364341085271319


HBox(children=(IntProgress(value=0, max=559), HTML(value='')))

[2019-03-08 02:40:50,605][INFO] __main__:26 - b = 0.5353535353535354, f1 = 0.7364341085271319


HBox(children=(IntProgress(value=0, max=548), HTML(value='')))

[2019-03-08 02:40:50,751][INFO] __main__:26 - b = 0.5454545454545455, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=547), HTML(value='')))

[2019-03-08 02:40:50,897][INFO] __main__:26 - b = 0.5555555555555556, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=547), HTML(value='')))

[2019-03-08 02:40:51,042][INFO] __main__:26 - b = 0.5656565656565657, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=543), HTML(value='')))

[2019-03-08 02:40:51,188][INFO] __main__:26 - b = 0.5757575757575758, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=538), HTML(value='')))

[2019-03-08 02:40:51,334][INFO] __main__:26 - b = 0.5858585858585859, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=530), HTML(value='')))

[2019-03-08 02:40:51,480][INFO] __main__:26 - b = 0.595959595959596, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=526), HTML(value='')))

[2019-03-08 02:40:51,627][INFO] __main__:26 - b = 0.6060606060606061, f1 = 0.7392996108949415


HBox(children=(IntProgress(value=0, max=521), HTML(value='')))

[2019-03-08 02:40:51,773][INFO] __main__:26 - b = 0.6161616161616162, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=521), HTML(value='')))

[2019-03-08 02:40:51,918][INFO] __main__:26 - b = 0.6262626262626263, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=521), HTML(value='')))

[2019-03-08 02:40:52,064][INFO] __main__:26 - b = 0.6363636363636365, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,209][INFO] __main__:26 - b = 0.6464646464646465, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,356][INFO] __main__:26 - b = 0.6565656565656566, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,501][INFO] __main__:26 - b = 0.6666666666666667, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,647][INFO] __main__:26 - b = 0.6767676767676768, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,794][INFO] __main__:26 - b = 0.686868686868687, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:52,940][INFO] __main__:26 - b = 0.696969696969697, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=520), HTML(value='')))

[2019-03-08 02:40:53,085][INFO] __main__:26 - b = 0.7070707070707072, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=518), HTML(value='')))

[2019-03-08 02:40:53,230][INFO] __main__:26 - b = 0.7171717171717172, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=516), HTML(value='')))

[2019-03-08 02:40:53,376][INFO] __main__:26 - b = 0.7272727272727273, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=515), HTML(value='')))

[2019-03-08 02:40:53,520][INFO] __main__:26 - b = 0.7373737373737375, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=513), HTML(value='')))

[2019-03-08 02:40:53,666][INFO] __main__:26 - b = 0.7474747474747475, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=511), HTML(value='')))

[2019-03-08 02:40:53,814][INFO] __main__:26 - b = 0.7575757575757577, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=507), HTML(value='')))

[2019-03-08 02:40:53,962][INFO] __main__:26 - b = 0.7676767676767677, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=506), HTML(value='')))

[2019-03-08 02:40:54,109][INFO] __main__:26 - b = 0.7777777777777778, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=506), HTML(value='')))

[2019-03-08 02:40:54,255][INFO] __main__:26 - b = 0.787878787878788, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

[2019-03-08 02:40:54,401][INFO] __main__:26 - b = 0.797979797979798, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=505), HTML(value='')))

[2019-03-08 02:40:54,547][INFO] __main__:26 - b = 0.8080808080808082, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=503), HTML(value='')))

[2019-03-08 02:40:54,693][INFO] __main__:26 - b = 0.8181818181818182, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=502), HTML(value='')))

[2019-03-08 02:40:54,838][INFO] __main__:26 - b = 0.8282828282828284, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=500), HTML(value='')))

[2019-03-08 02:40:54,985][INFO] __main__:26 - b = 0.8383838383838385, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=497), HTML(value='')))

[2019-03-08 02:40:55,131][INFO] __main__:26 - b = 0.8484848484848485, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:55,278][INFO] __main__:26 - b = 0.8585858585858587, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:55,427][INFO] __main__:26 - b = 0.8686868686868687, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:55,573][INFO] __main__:26 - b = 0.8787878787878789, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:55,720][INFO] __main__:26 - b = 0.888888888888889, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:55,867][INFO] __main__:26 - b = 0.8989898989898991, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:56,013][INFO] __main__:26 - b = 0.9090909090909092, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=496), HTML(value='')))

[2019-03-08 02:40:56,159][INFO] __main__:26 - b = 0.9191919191919192, f1 = 0.7421875


HBox(children=(IntProgress(value=0, max=482), HTML(value='')))

[2019-03-08 02:40:56,305][INFO] __main__:26 - b = 0.9292929292929294, f1 = 0.7450980392156862


HBox(children=(IntProgress(value=0, max=473), HTML(value='')))

[2019-03-08 02:40:56,452][INFO] __main__:26 - b = 0.9393939393939394, f1 = 0.7450980392156862


HBox(children=(IntProgress(value=0, max=460), HTML(value='')))

[2019-03-08 02:40:56,598][INFO] __main__:26 - b = 0.9494949494949496, f1 = 0.75098814229249


HBox(children=(IntProgress(value=0, max=442), HTML(value='')))

[2019-03-08 02:40:56,744][INFO] __main__:26 - b = 0.9595959595959597, f1 = 0.7410358565737052


HBox(children=(IntProgress(value=0, max=434), HTML(value='')))

[2019-03-08 02:40:56,890][INFO] __main__:26 - b = 0.9696969696969697, f1 = 0.736


HBox(children=(IntProgress(value=0, max=425), HTML(value='')))

[2019-03-08 02:40:57,034][INFO] __main__:26 - b = 0.9797979797979799, f1 = 0.7309236947791165


HBox(children=(IntProgress(value=0, max=420), HTML(value='')))

[2019-03-08 02:40:57,178][INFO] __main__:26 - b = 0.98989898989899, f1 = 0.7154471544715448
[2019-03-08 02:40:57,189][INFO] __main__:35 - Scoring on Entity-Level Gold Data with b=0.9494949494949496
[2019-03-08 02:40:57,190][INFO] __main__:37 - Corpus Precision 0.979
[2019-03-08 02:40:57,191][INFO] __main__:38 - Corpus Recall    0.609
[2019-03-08 02:40:57,191][INFO] __main__:39 - Corpus F1        0.751
[2019-03-08 02:40:57,192][INFO] __main__:40 - ---------------------------------------------------
[2019-03-08 02:40:57,193][INFO] __main__:42 - TP: 95 | FP: 2 | FN: 61



In [33]:
marginals = generative_model(L_train[2])
disc_model1 = discriminative_model(train_cands[2], F_train[2], marginals, n_epochs=100)

relation = 'polarity'
best_result, best_b = scoring(
    relation, disc_model1, test_cands[2], test_docs, F_test[2], parts_by_doc, num=100
)

[2019-03-08 02:40:57,203][INFO] __main__:4 - Training generative model...


Computing O...
Estimating \mu...
[E:0]	Train Loss: 0.505
[E:100]	Train Loss: 0.054
[E:200]	Train Loss: 0.045
[E:300]	Train Loss: 0.045
[E:400]	Train Loss: 0.045


[2019-03-08 02:40:59,712][INFO] __main__:6 - Done.
[2019-03-08 02:40:59,816][INFO] __main__:4 - Training discriminative model...
[2019-03-08 02:40:59,821][INFO] fonduer.learning.classifier:142 - Loading default parameters for Sparse Logistic Regression


[E:499]	Train Loss: 0.045
Finished Training


[2019-03-08 02:41:00,030][INFO] fonduer.learning.classifier:193 - Using GPU...
[2019-03-08 02:41:00,031][INFO] fonduer.learning.classifier:195 - Settings: {'n_epochs': 100, 'lr': 0.001, 'batch_size': 256, 'shuffle': True, 'seed': 1234, 'host_device': 'GPU', 'bias': False, 'input_dim': 55128}
[2019-03-08 02:41:00,043][INFO] fonduer.learning.classifier:213 - [SparseLogisticRegression] Training model
[2019-03-08 02:41:00,044][INFO] fonduer.learning.classifier:215 - [SparseLogisticRegression] n_train=68199 #epochs=100 batch size=256
[2019-03-08 02:41:09,090][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 1 (9.05s)	Average loss=0.219603
[2019-03-08 02:41:37,532][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 5 (37.49s)	Average loss=0.186457
[2019-03-08 02:42:12,537][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 10 (72.49s)	Average loss=0.184800
[2019-03-08 02:42:47,965][INFO] fonduer.learning.classifier:262 - [Sp

HBox(children=(IntProgress(value=0, max=2007), HTML(value='')))

[2019-03-08 02:52:46,978][INFO] __main__:26 - b = 0.0, f1 = 0.8867924528301887


HBox(children=(IntProgress(value=0, max=1726), HTML(value='')))

[2019-03-08 02:52:47,161][INFO] __main__:26 - b = 0.010101010101010102, f1 = 0.879746835443038


HBox(children=(IntProgress(value=0, max=1526), HTML(value='')))

[2019-03-08 02:52:47,336][INFO] __main__:26 - b = 0.020202020202020204, f1 = 0.8761904761904761


HBox(children=(IntProgress(value=0, max=1402), HTML(value='')))

[2019-03-08 02:52:47,508][INFO] __main__:26 - b = 0.030303030303030304, f1 = 0.8690095846645367


HBox(children=(IntProgress(value=0, max=1326), HTML(value='')))

[2019-03-08 02:52:47,679][INFO] __main__:26 - b = 0.04040404040404041, f1 = 0.8580645161290323


HBox(children=(IntProgress(value=0, max=1274), HTML(value='')))

[2019-03-08 02:52:47,850][INFO] __main__:26 - b = 0.05050505050505051, f1 = 0.854368932038835


HBox(children=(IntProgress(value=0, max=1244), HTML(value='')))

[2019-03-08 02:52:48,017][INFO] __main__:26 - b = 0.06060606060606061, f1 = 0.8506493506493505


HBox(children=(IntProgress(value=0, max=1209), HTML(value='')))

[2019-03-08 02:52:48,184][INFO] __main__:26 - b = 0.07070707070707072, f1 = 0.8506493506493505


HBox(children=(IntProgress(value=0, max=1183), HTML(value='')))

[2019-03-08 02:52:48,350][INFO] __main__:26 - b = 0.08080808080808081, f1 = 0.8534201954397393


HBox(children=(IntProgress(value=0, max=1158), HTML(value='')))

[2019-03-08 02:52:48,515][INFO] __main__:26 - b = 0.09090909090909091, f1 = 0.8562091503267973


HBox(children=(IntProgress(value=0, max=1125), HTML(value='')))

[2019-03-08 02:52:48,678][INFO] __main__:26 - b = 0.10101010101010102, f1 = 0.8361204013377928


HBox(children=(IntProgress(value=0, max=1106), HTML(value='')))

[2019-03-08 02:52:48,841][INFO] __main__:26 - b = 0.11111111111111112, f1 = 0.8361204013377928


HBox(children=(IntProgress(value=0, max=1077), HTML(value='')))

[2019-03-08 02:52:49,003][INFO] __main__:26 - b = 0.12121212121212122, f1 = 0.8322147651006712


HBox(children=(IntProgress(value=0, max=1056), HTML(value='')))

[2019-03-08 02:52:49,163][INFO] __main__:26 - b = 0.13131313131313133, f1 = 0.8322147651006712


HBox(children=(IntProgress(value=0, max=1023), HTML(value='')))

[2019-03-08 02:52:49,328][INFO] __main__:26 - b = 0.14141414141414144, f1 = 0.8282828282828283


HBox(children=(IntProgress(value=0, max=981), HTML(value='')))

[2019-03-08 02:52:49,489][INFO] __main__:26 - b = 0.15151515151515152, f1 = 0.8163265306122448


HBox(children=(IntProgress(value=0, max=953), HTML(value='')))

[2019-03-08 02:52:49,649][INFO] __main__:26 - b = 0.16161616161616163, f1 = 0.8109965635738831


HBox(children=(IntProgress(value=0, max=915), HTML(value='')))

[2019-03-08 02:52:49,814][INFO] __main__:26 - b = 0.17171717171717174, f1 = 0.7859649122807018


HBox(children=(IntProgress(value=0, max=894), HTML(value='')))

[2019-03-08 02:52:49,972][INFO] __main__:26 - b = 0.18181818181818182, f1 = 0.7816901408450704


HBox(children=(IntProgress(value=0, max=873), HTML(value='')))

[2019-03-08 02:52:50,128][INFO] __main__:26 - b = 0.19191919191919193, f1 = 0.7773851590106007


HBox(children=(IntProgress(value=0, max=852), HTML(value='')))

[2019-03-08 02:52:50,285][INFO] __main__:26 - b = 0.20202020202020204, f1 = 0.7642857142857142


HBox(children=(IntProgress(value=0, max=834), HTML(value='')))

[2019-03-08 02:52:50,441][INFO] __main__:26 - b = 0.21212121212121213, f1 = 0.7598566308243728


HBox(children=(IntProgress(value=0, max=810), HTML(value='')))

[2019-03-08 02:52:50,596][INFO] __main__:26 - b = 0.22222222222222224, f1 = 0.7553956834532373


HBox(children=(IntProgress(value=0, max=786), HTML(value='')))

[2019-03-08 02:52:50,751][INFO] __main__:26 - b = 0.23232323232323235, f1 = 0.7553956834532373


HBox(children=(IntProgress(value=0, max=764), HTML(value='')))

[2019-03-08 02:52:50,907][INFO] __main__:26 - b = 0.24242424242424243, f1 = 0.7372262773722628


HBox(children=(IntProgress(value=0, max=739), HTML(value='')))

[2019-03-08 02:52:51,062][INFO] __main__:26 - b = 0.25252525252525254, f1 = 0.7326007326007326


HBox(children=(IntProgress(value=0, max=711), HTML(value='')))

[2019-03-08 02:52:51,215][INFO] __main__:26 - b = 0.26262626262626265, f1 = 0.7279411764705883


HBox(children=(IntProgress(value=0, max=695), HTML(value='')))

[2019-03-08 02:52:51,367][INFO] __main__:26 - b = 0.27272727272727276, f1 = 0.7279411764705883


HBox(children=(IntProgress(value=0, max=679), HTML(value='')))

[2019-03-08 02:52:51,519][INFO] __main__:26 - b = 0.2828282828282829, f1 = 0.7185185185185184


HBox(children=(IntProgress(value=0, max=656), HTML(value='')))

[2019-03-08 02:52:51,671][INFO] __main__:26 - b = 0.29292929292929293, f1 = 0.7185185185185184


HBox(children=(IntProgress(value=0, max=626), HTML(value='')))

[2019-03-08 02:52:51,821][INFO] __main__:26 - b = 0.30303030303030304, f1 = 0.7137546468401488


HBox(children=(IntProgress(value=0, max=611), HTML(value='')))

[2019-03-08 02:52:51,973][INFO] __main__:26 - b = 0.31313131313131315, f1 = 0.7137546468401488


HBox(children=(IntProgress(value=0, max=591), HTML(value='')))

[2019-03-08 02:52:52,123][INFO] __main__:26 - b = 0.32323232323232326, f1 = 0.6992481203007518


HBox(children=(IntProgress(value=0, max=574), HTML(value='')))

[2019-03-08 02:52:52,280][INFO] __main__:26 - b = 0.33333333333333337, f1 = 0.6743295019157088


HBox(children=(IntProgress(value=0, max=567), HTML(value='')))

[2019-03-08 02:52:52,475][INFO] __main__:26 - b = 0.3434343434343435, f1 = 0.6743295019157088


HBox(children=(IntProgress(value=0, max=550), HTML(value='')))

[2019-03-08 02:52:52,672][INFO] __main__:26 - b = 0.3535353535353536, f1 = 0.6666666666666666


HBox(children=(IntProgress(value=0, max=536), HTML(value='')))

[2019-03-08 02:52:52,842][INFO] __main__:26 - b = 0.36363636363636365, f1 = 0.6666666666666666


HBox(children=(IntProgress(value=0, max=524), HTML(value='')))

[2019-03-08 02:52:53,013][INFO] __main__:26 - b = 0.37373737373737376, f1 = 0.6509803921568627


HBox(children=(IntProgress(value=0, max=508), HTML(value='')))

[2019-03-08 02:52:53,183][INFO] __main__:26 - b = 0.38383838383838387, f1 = 0.6509803921568627


HBox(children=(IntProgress(value=0, max=488), HTML(value='')))

[2019-03-08 02:52:53,352][INFO] __main__:26 - b = 0.393939393939394, f1 = 0.6403162055335969


HBox(children=(IntProgress(value=0, max=476), HTML(value='')))

[2019-03-08 02:52:53,520][INFO] __main__:26 - b = 0.4040404040404041, f1 = 0.6428571428571428


HBox(children=(IntProgress(value=0, max=459), HTML(value='')))

[2019-03-08 02:52:53,689][INFO] __main__:26 - b = 0.4141414141414142, f1 = 0.632


HBox(children=(IntProgress(value=0, max=446), HTML(value='')))

[2019-03-08 02:52:53,857][INFO] __main__:26 - b = 0.42424242424242425, f1 = 0.632


HBox(children=(IntProgress(value=0, max=433), HTML(value='')))

[2019-03-08 02:52:54,025][INFO] __main__:26 - b = 0.43434343434343436, f1 = 0.632


HBox(children=(IntProgress(value=0, max=423), HTML(value='')))

[2019-03-08 02:52:54,192][INFO] __main__:26 - b = 0.4444444444444445, f1 = 0.632


HBox(children=(IntProgress(value=0, max=402), HTML(value='')))

[2019-03-08 02:52:54,359][INFO] __main__:26 - b = 0.4545454545454546, f1 = 0.6290322580645161


HBox(children=(IntProgress(value=0, max=393), HTML(value='')))

[2019-03-08 02:52:54,526][INFO] __main__:26 - b = 0.4646464646464647, f1 = 0.6290322580645161


HBox(children=(IntProgress(value=0, max=381), HTML(value='')))

[2019-03-08 02:52:54,694][INFO] __main__:26 - b = 0.4747474747474748, f1 = 0.631578947368421


HBox(children=(IntProgress(value=0, max=366), HTML(value='')))

[2019-03-08 02:52:54,862][INFO] __main__:26 - b = 0.48484848484848486, f1 = 0.631578947368421


HBox(children=(IntProgress(value=0, max=355), HTML(value='')))

[2019-03-08 02:52:55,031][INFO] __main__:26 - b = 0.494949494949495, f1 = 0.631578947368421


HBox(children=(IntProgress(value=0, max=344), HTML(value='')))

[2019-03-08 02:52:55,197][INFO] __main__:26 - b = 0.5050505050505051, f1 = 0.6260162601626016


HBox(children=(IntProgress(value=0, max=335), HTML(value='')))

[2019-03-08 02:52:55,362][INFO] __main__:26 - b = 0.5151515151515152, f1 = 0.6260162601626016


HBox(children=(IntProgress(value=0, max=328), HTML(value='')))

[2019-03-08 02:52:55,527][INFO] __main__:26 - b = 0.5252525252525253, f1 = 0.6390041493775933


HBox(children=(IntProgress(value=0, max=322), HTML(value='')))

[2019-03-08 02:52:55,691][INFO] __main__:26 - b = 0.5353535353535354, f1 = 0.6390041493775933


HBox(children=(IntProgress(value=0, max=314), HTML(value='')))

[2019-03-08 02:52:55,855][INFO] __main__:26 - b = 0.5454545454545455, f1 = 0.6390041493775933


HBox(children=(IntProgress(value=0, max=309), HTML(value='')))

[2019-03-08 02:52:56,020][INFO] __main__:26 - b = 0.5555555555555556, f1 = 0.6390041493775933


HBox(children=(IntProgress(value=0, max=295), HTML(value='')))

[2019-03-08 02:52:56,183][INFO] __main__:26 - b = 0.5656565656565657, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=287), HTML(value='')))

[2019-03-08 02:52:56,346][INFO] __main__:26 - b = 0.5757575757575758, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=277), HTML(value='')))

[2019-03-08 02:52:56,510][INFO] __main__:26 - b = 0.5858585858585859, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=267), HTML(value='')))

[2019-03-08 02:52:56,672][INFO] __main__:26 - b = 0.595959595959596, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=262), HTML(value='')))

[2019-03-08 02:52:56,835][INFO] __main__:26 - b = 0.6060606060606061, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=260), HTML(value='')))

[2019-03-08 02:52:56,998][INFO] __main__:26 - b = 0.6161616161616162, f1 = 0.6333333333333333


HBox(children=(IntProgress(value=0, max=252), HTML(value='')))

[2019-03-08 02:52:57,161][INFO] __main__:26 - b = 0.6262626262626263, f1 = 0.6218487394957983


HBox(children=(IntProgress(value=0, max=246), HTML(value='')))

[2019-03-08 02:52:57,323][INFO] __main__:26 - b = 0.6363636363636365, f1 = 0.6160337552742615


HBox(children=(IntProgress(value=0, max=245), HTML(value='')))

[2019-03-08 02:52:57,485][INFO] __main__:26 - b = 0.6464646464646465, f1 = 0.6101694915254238


HBox(children=(IntProgress(value=0, max=241), HTML(value='')))

[2019-03-08 02:52:57,647][INFO] __main__:26 - b = 0.6565656565656566, f1 = 0.6101694915254238


HBox(children=(IntProgress(value=0, max=239), HTML(value='')))

[2019-03-08 02:52:57,809][INFO] __main__:26 - b = 0.6666666666666667, f1 = 0.6101694915254238


HBox(children=(IntProgress(value=0, max=234), HTML(value='')))

[2019-03-08 02:52:57,970][INFO] __main__:26 - b = 0.6767676767676768, f1 = 0.6042553191489363


HBox(children=(IntProgress(value=0, max=229), HTML(value='')))

[2019-03-08 02:52:58,132][INFO] __main__:26 - b = 0.686868686868687, f1 = 0.5982905982905983


HBox(children=(IntProgress(value=0, max=224), HTML(value='')))

[2019-03-08 02:52:58,293][INFO] __main__:26 - b = 0.696969696969697, f1 = 0.6008583690987124


HBox(children=(IntProgress(value=0, max=222), HTML(value='')))

[2019-03-08 02:52:58,454][INFO] __main__:26 - b = 0.7070707070707072, f1 = 0.603448275862069


HBox(children=(IntProgress(value=0, max=219), HTML(value='')))

[2019-03-08 02:52:58,615][INFO] __main__:26 - b = 0.7171717171717172, f1 = 0.603448275862069


HBox(children=(IntProgress(value=0, max=208), HTML(value='')))

[2019-03-08 02:52:58,778][INFO] __main__:26 - b = 0.7272727272727273, f1 = 0.5851528384279476


HBox(children=(IntProgress(value=0, max=201), HTML(value='')))

[2019-03-08 02:52:58,940][INFO] __main__:26 - b = 0.7373737373737375, f1 = 0.5789473684210527


HBox(children=(IntProgress(value=0, max=194), HTML(value='')))

[2019-03-08 02:52:59,102][INFO] __main__:26 - b = 0.7474747474747475, f1 = 0.5688888888888889


HBox(children=(IntProgress(value=0, max=188), HTML(value='')))

[2019-03-08 02:52:59,265][INFO] __main__:26 - b = 0.7575757575757577, f1 = 0.5688888888888889


HBox(children=(IntProgress(value=0, max=182), HTML(value='')))

[2019-03-08 02:52:59,436][INFO] __main__:26 - b = 0.7676767676767677, f1 = 0.5688888888888889


HBox(children=(IntProgress(value=0, max=176), HTML(value='')))

[2019-03-08 02:52:59,605][INFO] __main__:26 - b = 0.7777777777777778, f1 = 0.5688888888888889


HBox(children=(IntProgress(value=0, max=171), HTML(value='')))

[2019-03-08 02:52:59,768][INFO] __main__:26 - b = 0.787878787878788, f1 = 0.5429864253393665


HBox(children=(IntProgress(value=0, max=161), HTML(value='')))

[2019-03-08 02:52:59,932][INFO] __main__:26 - b = 0.797979797979798, f1 = 0.5296803652968036


HBox(children=(IntProgress(value=0, max=145), HTML(value='')))

[2019-03-08 02:53:00,092][INFO] __main__:26 - b = 0.8080808080808082, f1 = 0.5092592592592592


HBox(children=(IntProgress(value=0, max=136), HTML(value='')))

[2019-03-08 02:53:00,251][INFO] __main__:26 - b = 0.8181818181818182, f1 = 0.4953271028037383


HBox(children=(IntProgress(value=0, max=125), HTML(value='')))

[2019-03-08 02:53:00,422][INFO] __main__:26 - b = 0.8282828282828284, f1 = 0.47393364928909953


HBox(children=(IntProgress(value=0, max=118), HTML(value='')))

[2019-03-08 02:53:00,581][INFO] __main__:26 - b = 0.8383838383838385, f1 = 0.4593301435406699


HBox(children=(IntProgress(value=0, max=111), HTML(value='')))

[2019-03-08 02:53:00,740][INFO] __main__:26 - b = 0.8484848484848485, f1 = 0.44444444444444436


HBox(children=(IntProgress(value=0, max=103), HTML(value='')))

[2019-03-08 02:53:00,897][INFO] __main__:26 - b = 0.8585858585858587, f1 = 0.3980099502487562


HBox(children=(IntProgress(value=0, max=94), HTML(value='')))

[2019-03-08 02:53:01,055][INFO] __main__:26 - b = 0.8686868686868687, f1 = 0.35714285714285715


HBox(children=(IntProgress(value=0, max=81), HTML(value='')))

[2019-03-08 02:53:01,213][INFO] __main__:26 - b = 0.8787878787878789, f1 = 0.35051546391752575


HBox(children=(IntProgress(value=0, max=65), HTML(value='')))

[2019-03-08 02:53:01,370][INFO] __main__:26 - b = 0.888888888888889, f1 = 0.2622950819672131


HBox(children=(IntProgress(value=0, max=56), HTML(value='')))

[2019-03-08 02:53:01,527][INFO] __main__:26 - b = 0.8989898989898991, f1 = 0.24309392265193366


HBox(children=(IntProgress(value=0, max=44), HTML(value='')))

[2019-03-08 02:53:01,684][INFO] __main__:26 - b = 0.9090909090909092, f1 = 0.19318181818181815


HBox(children=(IntProgress(value=0, max=43), HTML(value='')))

[2019-03-08 02:53:01,841][INFO] __main__:26 - b = 0.9191919191919192, f1 = 0.1954022988505747


HBox(children=(IntProgress(value=0, max=35), HTML(value='')))

[2019-03-08 02:53:01,997][INFO] __main__:26 - b = 0.9292929292929294, f1 = 0.15294117647058825


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

[2019-03-08 02:53:02,153][INFO] __main__:26 - b = 0.9393939393939394, f1 = 0.15294117647058825


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))

[2019-03-08 02:53:02,309][INFO] __main__:26 - b = 0.9494949494949496, f1 = 0.11976047904191617


HBox(children=(IntProgress(value=0, max=16), HTML(value='')))

[2019-03-08 02:53:02,465][INFO] __main__:26 - b = 0.9595959595959597, f1 = 0.10909090909090909


HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

[2019-03-08 02:53:02,623][INFO] __main__:26 - b = 0.9696969696969697, f1 = 0.012903225806451615


HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

[2019-03-08 02:53:02,778][INFO] __main__:26 - b = 0.9797979797979799, f1 = 0.012903225806451615
[2019-03-08 02:53:02,791][INFO] __main__:35 - Scoring on Entity-Level Gold Data with b=0.0
[2019-03-08 02:53:02,793][INFO] __main__:37 - Corpus Precision 0.855
[2019-03-08 02:53:02,794][INFO] __main__:38 - Corpus Recall    0.922
[2019-03-08 02:53:02,794][INFO] __main__:39 - Corpus F1        0.887
[2019-03-08 02:53:02,795][INFO] __main__:40 - ---------------------------------------------------
[2019-03-08 02:53:02,796][INFO] __main__:42 - TP: 141 | FP: 24 | FN: 12



In [28]:
marginals = generative_model(L_train[3])
disc_model1 = discriminative_model(train_cands[3], F_train[3], marginals, n_epochs=100)

[2019-03-08 02:10:31,873][INFO] __main__:4 - Training generative model...


Computing O...
Estimating \mu...
[E:0]	Train Loss: 27.843
[E:100]	Train Loss: 0.097
[E:200]	Train Loss: 0.092
[E:300]	Train Loss: 0.091
[E:400]	Train Loss: 0.091


[2019-03-08 02:10:34,322][INFO] __main__:6 - Done.
[2019-03-08 02:10:34,363][INFO] __main__:4 - Training discriminative model...
[2019-03-08 02:10:34,366][INFO] fonduer.learning.classifier:142 - Loading default parameters for Sparse Logistic Regression
[2019-03-08 02:10:34,431][INFO] fonduer.learning.classifier:193 - Using GPU...
[2019-03-08 02:10:34,432][INFO] fonduer.learning.classifier:195 - Settings: {'n_epochs': 100, 'lr': 0.001, 'batch_size': 256, 'shuffle': True, 'seed': 1234, 'host_device': 'GPU', 'bias': False, 'input_dim': 55128}
[2019-03-08 02:10:34,449][INFO] fonduer.learning.classifier:213 - [SparseLogisticRegression] Training model
[2019-03-08 02:10:34,450][INFO] fonduer.learning.classifier:215 - [SparseLogisticRegression] n_train=17506 #epochs=100 batch size=256


[E:499]	Train Loss: 0.091
Finished Training


[2019-03-08 02:10:36,489][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 1 (2.04s)	Average loss=0.385586
[2019-03-08 02:10:43,719][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 5 (9.27s)	Average loss=0.301332
[2019-03-08 02:10:52,574][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 10 (18.12s)	Average loss=0.297307
[2019-03-08 02:11:01,369][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 15 (26.92s)	Average loss=0.295298
[2019-03-08 02:11:10,872][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 20 (36.42s)	Average loss=0.295058
[2019-03-08 02:11:20,253][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 25 (45.80s)	Average loss=0.294835
[2019-03-08 02:11:30,416][INFO] fonduer.learning.classifier:262 - [SparseLogisticRegression] Epoch 30 (55.97s)	Average loss=0.294493
[2019-03-08 02:11:39,480][INFO] fonduer.learning.classifier:262 - [Sparse

In [30]:
relation = 'ce_v_max'
best_result, best_b = scoring(
    relation, disc_model1, test_cands[3], test_docs, F_test[3], parts_by_doc, num=100
)

[2019-03-08 02:14:55,355][INFO] __main__:2 - Calculating the best F1 score and threshold (b)...


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:00,396][INFO] __main__:26 - b = 0.0, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:00,576][INFO] __main__:26 - b = 0.010101010101010102, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:00,811][INFO] __main__:26 - b = 0.020202020202020204, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:00,991][INFO] __main__:26 - b = 0.030303030303030304, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:01,161][INFO] __main__:26 - b = 0.04040404040404041, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:01,333][INFO] __main__:26 - b = 0.05050505050505051, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:01,504][INFO] __main__:26 - b = 0.06060606060606061, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:01,739][INFO] __main__:26 - b = 0.07070707070707072, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:01,946][INFO] __main__:26 - b = 0.08080808080808081, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,122][INFO] __main__:26 - b = 0.09090909090909091, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,293][INFO] __main__:26 - b = 0.10101010101010102, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,466][INFO] __main__:26 - b = 0.11111111111111112, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,637][INFO] __main__:26 - b = 0.12121212121212122, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,808][INFO] __main__:26 - b = 0.13131313131313133, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:02,981][INFO] __main__:26 - b = 0.14141414141414144, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:03,160][INFO] __main__:26 - b = 0.15151515151515152, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:03,332][INFO] __main__:26 - b = 0.16161616161616163, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:03,503][INFO] __main__:26 - b = 0.17171717171717174, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:03,675][INFO] __main__:26 - b = 0.18181818181818182, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:03,847][INFO] __main__:26 - b = 0.19191919191919193, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,018][INFO] __main__:26 - b = 0.20202020202020204, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,189][INFO] __main__:26 - b = 0.21212121212121213, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,362][INFO] __main__:26 - b = 0.22222222222222224, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,533][INFO] __main__:26 - b = 0.23232323232323235, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,704][INFO] __main__:26 - b = 0.24242424242424243, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:04,876][INFO] __main__:26 - b = 0.25252525252525254, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,048][INFO] __main__:26 - b = 0.26262626262626265, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,218][INFO] __main__:26 - b = 0.27272727272727276, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,389][INFO] __main__:26 - b = 0.2828282828282829, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,563][INFO] __main__:26 - b = 0.29292929292929293, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,778][INFO] __main__:26 - b = 0.30303030303030304, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:05,949][INFO] __main__:26 - b = 0.31313131313131315, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,121][INFO] __main__:26 - b = 0.32323232323232326, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,293][INFO] __main__:26 - b = 0.33333333333333337, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,465][INFO] __main__:26 - b = 0.3434343434343435, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,636][INFO] __main__:26 - b = 0.3535353535353536, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,808][INFO] __main__:26 - b = 0.36363636363636365, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:06,979][INFO] __main__:26 - b = 0.37373737373737376, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:07,151][INFO] __main__:26 - b = 0.38383838383838387, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:07,324][INFO] __main__:26 - b = 0.393939393939394, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:07,495][INFO] __main__:26 - b = 0.4040404040404041, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:07,669][INFO] __main__:26 - b = 0.4141414141414142, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:07,840][INFO] __main__:26 - b = 0.42424242424242425, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,014][INFO] __main__:26 - b = 0.43434343434343436, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,189][INFO] __main__:26 - b = 0.4444444444444445, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,362][INFO] __main__:26 - b = 0.4545454545454546, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,539][INFO] __main__:26 - b = 0.4646464646464647, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,713][INFO] __main__:26 - b = 0.4747474747474748, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:08,887][INFO] __main__:26 - b = 0.48484848484848486, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,078][INFO] __main__:26 - b = 0.494949494949495, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,252][INFO] __main__:26 - b = 0.5050505050505051, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,431][INFO] __main__:26 - b = 0.5151515151515152, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,602][INFO] __main__:26 - b = 0.5252525252525253, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,773][INFO] __main__:26 - b = 0.5353535353535354, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:09,944][INFO] __main__:26 - b = 0.5454545454545455, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,115][INFO] __main__:26 - b = 0.5555555555555556, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,286][INFO] __main__:26 - b = 0.5656565656565657, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,456][INFO] __main__:26 - b = 0.5757575757575758, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,627][INFO] __main__:26 - b = 0.5858585858585859, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,798][INFO] __main__:26 - b = 0.595959595959596, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:10,969][INFO] __main__:26 - b = 0.6060606060606061, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,141][INFO] __main__:26 - b = 0.6161616161616162, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,311][INFO] __main__:26 - b = 0.6262626262626263, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,482][INFO] __main__:26 - b = 0.6363636363636365, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,654][INFO] __main__:26 - b = 0.6464646464646465, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,826][INFO] __main__:26 - b = 0.6565656565656566, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:11,997][INFO] __main__:26 - b = 0.6666666666666667, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:12,168][INFO] __main__:26 - b = 0.6767676767676768, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:12,343][INFO] __main__:26 - b = 0.686868686868687, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:12,514][INFO] __main__:26 - b = 0.696969696969697, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:12,685][INFO] __main__:26 - b = 0.7070707070707072, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:12,857][INFO] __main__:26 - b = 0.7171717171717172, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,028][INFO] __main__:26 - b = 0.7272727272727273, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,199][INFO] __main__:26 - b = 0.7373737373737375, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,371][INFO] __main__:26 - b = 0.7474747474747475, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,542][INFO] __main__:26 - b = 0.7575757575757577, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,713][INFO] __main__:26 - b = 0.7676767676767677, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:13,885][INFO] __main__:26 - b = 0.7777777777777778, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,057][INFO] __main__:26 - b = 0.787878787878788, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,228][INFO] __main__:26 - b = 0.797979797979798, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,400][INFO] __main__:26 - b = 0.8080808080808082, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,572][INFO] __main__:26 - b = 0.8181818181818182, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,755][INFO] __main__:26 - b = 0.8282828282828284, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:14,928][INFO] __main__:26 - b = 0.8383838383838385, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:15,100][INFO] __main__:26 - b = 0.8484848484848485, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:15,274][INFO] __main__:26 - b = 0.8585858585858587, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=789), HTML(value='')))

[2019-03-08 02:15:15,447][INFO] __main__:26 - b = 0.8686868686868687, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=788), HTML(value='')))

[2019-03-08 02:15:15,645][INFO] __main__:26 - b = 0.8787878787878789, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=788), HTML(value='')))

[2019-03-08 02:15:15,817][INFO] __main__:26 - b = 0.888888888888889, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=788), HTML(value='')))

[2019-03-08 02:15:15,990][INFO] __main__:26 - b = 0.8989898989898991, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=788), HTML(value='')))

[2019-03-08 02:15:16,161][INFO] __main__:26 - b = 0.9090909090909092, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=788), HTML(value='')))

[2019-03-08 02:15:16,332][INFO] __main__:26 - b = 0.9191919191919192, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=785), HTML(value='')))

[2019-03-08 02:15:16,503][INFO] __main__:26 - b = 0.9292929292929294, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=783), HTML(value='')))

[2019-03-08 02:15:16,674][INFO] __main__:26 - b = 0.9393939393939394, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=778), HTML(value='')))

[2019-03-08 02:15:16,846][INFO] __main__:26 - b = 0.9494949494949496, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=769), HTML(value='')))

[2019-03-08 02:15:17,016][INFO] __main__:26 - b = 0.9595959595959597, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=755), HTML(value='')))

[2019-03-08 02:15:17,188][INFO] __main__:26 - b = 0.9696969696969697, f1 = 0.8178694158075602


HBox(children=(IntProgress(value=0, max=715), HTML(value='')))

[2019-03-08 02:15:17,358][INFO] __main__:26 - b = 0.9797979797979799, f1 = 0.8055555555555556


HBox(children=(IntProgress(value=0, max=623), HTML(value='')))

[2019-03-08 02:15:17,525][INFO] __main__:26 - b = 0.98989898989899, f1 = 0.7045454545454545
[2019-03-08 02:15:17,531][INFO] __main__:35 - Scoring on Entity-Level Gold Data with b=0.0
[2019-03-08 02:15:17,532][INFO] __main__:37 - Corpus Precision 0.875
[2019-03-08 02:15:17,533][INFO] __main__:38 - Corpus Recall    0.768
[2019-03-08 02:15:17,534][INFO] __main__:39 - Corpus F1        0.818
[2019-03-08 02:15:17,535][INFO] __main__:40 - ---------------------------------------------------
[2019-03-08 02:15:17,535][INFO] __main__:42 - TP: 119 | FP: 17 | FN: 36

