# CB_V_MAX

## Setup

In [None]:
# If necessary:
import os
os.remove('snorkel.db')

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys
sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

## Parsing

In [None]:
from snorkel import SnorkelSession
session = SnorkelSession()

### Define Parser

In [None]:
import os
from snorkel.parser import CorpusParser, HTMLParser, OmniParser
from snorkel.utils import get_ORM_instance
from snorkel.queries import split_corpus

html_path = os.environ['SNORKELHOME'] + '/tutorials/tables/data/hardware/hardware100_html/'
pdf_path  = os.environ['SNORKELHOME'] + '/tutorials/tables/data/hardware/hardware100_pdf/'
doc_parser = HTMLParser(path=html_path)
context_parser = OmniParser(pdf_path=pdf_path, session=session)
cp = CorpusParser(doc_parser, context_parser, max_docs=100) 

### Run Parser

In [None]:
%time corpus = cp.parse_corpus(name='Hardware', session=session)

session.add(corpus)
session.commit()

### Split Corpus

In [None]:
from snorkel.models import Corpus

corpus = get_ORM_instance(Corpus, session, 'Hardware')
split_corpus(session, corpus, train=0.8, development=0.2, test=0, seed=9)

In [None]:
# If necessary
import os
os.system('cp snorkel.db snorkel.db\ corpus');

## Extraction

In [1]:
# If necessary:
import os
os.remove('snorkel.db');
os.system('cp snorkel.db\ corpus snorkel.db');

%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

from snorkel import SnorkelSession
session = SnorkelSession()

In [2]:
from snorkel.models import candidate_subclass

Part_Temp = candidate_subclass('Part_Temp', ['part','temp'])

### Define Matchers

In [3]:
from snorkel.matchers import RegexMatchSpan, Union

eeca_rgx = ur'([b]{1}[abcdefklnpqruyz]{1}[\swxyz]?[0-9]{3,5}[\s]?[A-Z]{0,2}[\/]?[A-Z]{0,2}[0-9]?[A-Z]?([(\-|\u2010|\u2011|\u2012|\u2013|\u2014|\u2212)][A-Z0-9]{1,7})?([(\-|\u2010|\u2011|\u2012|\u2013|\u2014|\u2212)][A-Z0-9]{1,2})?)'
eeca_matcher = RegexMatchSpan(rgx=eeca_rgx, longest_match_only=True)
jedec_rgx = '([123]N\d{3,4}[A-Z]{0,5}[0-9]?[A-Z]?)'
jedec_matcher = RegexMatchSpan(rgx=jedec_rgx, longest_match_only=True)
jis_rgx = '(2S[abcdefghjkmqrstvz]{1}[\d]{2,4})'
jis_matcher = RegexMatchSpan(rgx=jis_rgx, longest_match_only=True)
others_rgx = '((NSVBC|SMBT|MJ|MJE|MPS|MRF|RCA|TIP|ZTX|ZT|TIS|TIPL|DTC|MMBT|PZT){1}[\d]{2,4}[A-Z]{0,3}([-][A-Z0-9]{0,6})?([-][A-Z0-9]{0,1})?)'
others_matcher = RegexMatchSpan(rgx=others_rgx, longest_match_only=True)
# parts_rgx = '|'.join([eeca_rgx, jedec_rgx, jis_rgx, others_rgx])
parts_matcher = Union(eeca_matcher, jedec_matcher, jis_matcher, others_matcher)

#NOTE: This is super specific.
temp_matcher = RegexMatchSpan(rgx=r'-[5-7][05]', longest_match_only=False)

In [4]:
# import re
# part_finder = re.compile(parts_rgx, re.I)
# print any([part_finder.match(x) for x in ['blue', 'red', 'black', 'green']])
# print any([part_finder.match(x) for x in ['blue', 'red', 'BC546A', 'green']])

### Define ContextSpaces

In [5]:
from hardware_utils import get_gold_dict, get_first_pass_dict, OmniNgramsPart, OmniNgramsTemp, merge_two_dicts
from collections import defaultdict
from snorkel.candidates import OmniNgrams
from snorkel.utils import get_ORM_instance
from snorkel.models import Corpus

eeca_suffix = '^(A|B|C|-16|-25|-40|16|25|40)$'
suffix_matcher = RegexMatchSpan(rgx=eeca_suffix, ignore_case=False)
suffix_ngrams = OmniNgrams(n_max=1)
part_ngrams = OmniNgramsPart(n_max=5)


dev_corpus = get_ORM_instance(Corpus, session, "Hardware Development")

train_corpus = get_ORM_instance(Corpus, session, "Hardware Training")


parts_dev, s_dev, p_dev = get_first_pass_dict(dev_corpus.documents, parts_matcher=parts_matcher, part_ngrams=part_ngrams, suffix_matcher=suffix_matcher, suffix_ngrams=suffix_ngrams)      

parts_train, s_train, p_train = get_first_pass_dict(train_corpus.documents, parts_matcher=parts_matcher, part_ngrams=part_ngrams, suffix_matcher=suffix_matcher, suffix_ngrams=suffix_ngrams)  

parts_by_doc = merge_two_dicts(parts_dev, parts_train)
    
part_ngrams = OmniNgramsPart(parts_by_doc=parts_by_doc, n_max=5)

# TODO: This is missing the current represented as an Amp rather than a milliamp
temp_ngrams = OmniNgramsTemp(n_max=3)



### Define Throttler

In [6]:
from snorkel.lf_helpers import *

def part_throttler((part_span, attr_span)):
    """
    Keep only those candidates where both spans are on the same page and
    either aligned in the same table (visually or structurally) or the part is global.
    """
    # TODO: Write this in an easier to tweak way. No reason to try and
    # cram it all into a single return statement.
    return(
        same_page((part_span, attr_span)) and
        (part_span.parent.table is None or
        (same_row((part_span, attr_span)) or is_horz_aligned((part_span, attr_span)))))


### Run CandidateExtractor

In [7]:
from snorkel.models import Corpus
from snorkel.candidates import CandidateExtractor
from snorkel.utils import get_ORM_instance


ce = CandidateExtractor(Part_Temp, 
                        [part_ngrams, temp_ngrams], 
                        [parts_matcher, temp_matcher],
                        throttler=part_throttler)

for corpus_name in ['Hardware Training', 'Hardware Development']:
    corpus = get_ORM_instance(Corpus, session, corpus_name)
    print "Extracting Candidates from %s" % corpus
    %time candidates = ce.extract(\
        corpus.documents, corpus_name + ' Candidates', session)
    session.add(candidates)
    print "%s contains %d Candidates" % (candidates, len(candidates))
session.commit()

Extracting Candidates from Corpus (Hardware Training)

AttributeError: 'NoneType' object has no attribute 'page'

### Assess Recall

In [None]:
from hardware_utils import entity_level_total_recall, most_common_document, get_gold_dict
from snorkel.utils import get_ORM_instance
from snorkel.models import Candidate, Corpus

all_candidates = session.query(Candidate).all()
gold_file = os.environ['SNORKELHOME'] + '/tutorials/tables/data/hardware/hardware_gold.csv'

corpus = get_ORM_instance(Corpus, session, 'Hardware')
(tp, fp, fn) = entity_level_total_recall(
    all_candidates, gold_file, 'cb_v_max', corpus=corpus, relation=True, integerize=True)
print len(tp)
print len(fp)
print len(fn)

In [None]:
from pprint import pprint
fns = list(fn)
pprint(sorted(fns)[:10])

In [None]:
# corpus = get_ORM_instance(Corpus, session, 'Hardware Training')
# for document in corpus.documents:
# #     print document.name
#     if document.name == 'MOTOS03160-1':
#         doc = document
# print doc

In [None]:
# for c in all_candidates:
#     if c.part.get_span()=='BC183' and c.part.parent.document.name=='MOTOS03160-1':
#         print c

In [None]:
# for phrase in doc.phrases:
#     if 'BC183' in phrase.words:
#         print phrase

In [None]:
# If necessary
import os
os.system('cp snorkel.db snorkel.db\ candidates');

## Gold Labels

In [None]:
# If necessary
# import os
# os.remove('snorkel.db');
# os.system('cp snorkel.db\ candidates snorkel.db');

# %load_ext autoreload
# %autoreload 2
# %matplotlib inline

# import sys
# sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

# from snorkel import SnorkelSession
# session = SnorkelSession()

# from snorkel.models import candidate_subclass
# Part_Voltage = candidate_subclass('Part_Voltage', ['part','voltage'])

In [None]:
import os
from snorkel.models import CandidateSet
from hardware_utils import load_hardware_labels

gold_file = os.environ['SNORKELHOME'] + '/tutorials/tables/data/hardware/hardware_gold.csv'
for set_name in ['Training', 'Development']:
    candidate_set_name = 'Hardware %s Candidates' % set_name
    candidates = session.query(CandidateSet).filter(
        CandidateSet.name == candidate_set_name).one()
    label_set_name = 'Hardware %s Candidates -- Gold' % set_name
    annotation_key_name = 'Hardware %s Labels -- Gold' % set_name
    %time gold_candidates, annotation_key = load_hardware_labels(session,\
                           label_set_name, \
                           annotation_key_name, \
                           candidates, \
                           gold_file, \
                           attrib='cb_v_max')
    candidates_gold = session.query(CandidateSet).filter(
        CandidateSet.name == candidate_set_name + ' -- Gold').one()
    print "%d/%d Candidates in %s have positive Labels" % (
        len(candidates_gold), len(candidates), candidates)

In [None]:
# If necessary
import os
os.system('cp snorkel.db snorkel.db\ labels');

## Features

In [None]:
# If necessary:
import os
os.remove('snorkel.db');
os.system('cp snorkel.db\ labels snorkel.db');

%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.models import candidate_subclass
Part_Voltage = candidate_subclass('Part_Voltage', ['part','voltage'])

### Extract Features

In [None]:
from snorkel.models import CandidateSet
from snorkel.fast_annotations import FeatureManager
from snorkel.utils import get_ORM_instance

train = get_ORM_instance(CandidateSet, session, 'Hardware Training Candidates')
dev   = get_ORM_instance(CandidateSet, session, 'Hardware Development Candidates')

feature_manager = FeatureManager()
%time F_train = feature_manager.create(session, train, 'Train Features')
%time F_dev = feature_manager.update(session, dev, 'Train Features', expand_key_set=False)

In [None]:
# If necessary:
import os
os.system('cp snorkel.db snorkel.db\ featurized');

## LFs

In [None]:
# If necessary
import os
os.remove('snorkel.db');
os.system('cp snorkel.db\ featurized snorkel.db');

%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.models import candidate_subclass
Part_Voltage = candidate_subclass('Part_Voltage', ['part','voltage'])
from snorkel.models import CandidateSet
train = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Training Candidates').one()
dev = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Development Candidates').one()

### Define LFs

In [None]:
from snorkel.fast_annotations import LabelManager
from snorkel.lf_helpers import *
label_manager = LabelManager()

LFs = []

###################################################################
# BOTH
###################################################################

# cb_words_individual = set(['collector', 'base', 'voltage'])
# cb_words_together = set(['collector-base', 'voltage'])
cb_words = set(['collector base', 'collector-base', 'collector - base'])
def LF_cb_keywords_all(c):
    return 1 if overlap(cb_words, get_row_ngrams(c.voltage, spread=[0,3], n_max=3)) else -1
LFs.append(LF_cb_keywords_all)

###################################################################
# POSITIVE
###################################################################
    
pos_keys = set(['cbo', 'vcbo']) # 'value', 'rating'
def LF_pos_keywords_in_row(c):
    return 1 if overlap(pos_keys, get_row_ngrams(c.voltage, spread=[0,3])) else 0
LFs.append(LF_pos_keywords_in_row)

def LF_pos_keywords_horz(c):
    return 1 if overlap(pos_keys, get_horz_aligned_ngrams(c.voltage)) else 0
LFs.append(LF_pos_keywords_horz)

###################################################################
# NEGATIVE
###################################################################

eeca_rgx = '([b]{1}[abcdefklnpqruyz]{1}[\swxyz]?[0-9]{3,5}[\s]?[A-Z\/]{0,5}[0-9]?[A-Z]?([-][A-Z0-9]{1,7})?([-][A-Z0-9]{1,2})?)'
jedec_rgx = '([123]N\d{3,4}[A-Z]{0,5}[0-9]?[A-Z]?)'
jis_rgx = '(2S[abcdefghjkmqrstvz]{1}[\d]{2,4})'
others_rgx = '((NSVBC|SMBT|MJ|MJE|MPS|MRF|RCA|TIP|ZTX|ZT|TIS|TIPL|DTC|MMBT|PZT){1}[\d]{2,4}[A-Z]{0,3}([-][A-Z0-9]{0,6})?([-][A-Z0-9]{0,1})?)'
parts_rgx = '|'.join([eeca_rgx, jedec_rgx, jis_rgx, others_rgx])
part_sniffer = re.compile(parts_rgx)
def LF_cheating_with_another_part(c):
    return -1 if (any(part_sniffer.match(x) for x in get_horz_aligned_ngrams(c)) and 
                     not is_horz_aligned(c)) else 0
LFs.append(LF_cheating_with_another_part)

def LF_same_table_must_align(c):
    part_row_ngrams    = list(get_row_ngrams(c.voltage, spread=[0,3], n_max=2))
    volt_row_ngrams = list(get_row_ngrams(c.voltage, spread=[0,3], n_max=2))
    return -1 if (overlap(part_row_ngrams, cb_words) and 
                  overlap(volt_row_ngrams, cb_words) and
                  not is_horz_aligned(c)) else 0
LFs.append(LF_same_table_must_align)

# A good LF, but made obsolete by the throttling condition
def LF_not_horz_aligned(c):
    return -1 if (same_table(c) and not is_horz_aligned(c)) else 0
LFs.append(LF_not_horz_aligned)

def LF_voltage_not_in_table(c):
    return -1 if c.voltage.parent.table is None else 0
LFs.append(LF_voltage_not_in_table)

def LF_low_table_num(c):
    return -1 if (c.voltage.parent.table and
        c.voltage.parent.table.position > 2) else 0
LFs.append(LF_low_table_num)

neg_keys = set(['continuous', 'emitter', 'cut-off', 'gain'])
def LF_specific_neg_row_keywords(c):
    return -1 if overlap(neg_keys, get_row_ngrams(c.voltage)) else 0
LFs.append(LF_specific_neg_row_keywords)

def LF_equals_in_row(c):
    return -1 if overlap('=', get_row_ngrams(c.voltage)) else 0
LFs.append(LF_equals_in_row)

def LF_i_in_row(c):
    return -1 if overlap('i', get_row_ngrams(c.voltage)) else 0
LFs.append(LF_i_in_row)

def LF_too_many_numbers_row(c):
    num_numbers = list(get_row_ngrams(c.voltage, attrib="ner_tags")).count('number')
    return -1 if num_numbers >= 4 else 0
LFs.append(LF_too_many_numbers_row)

In [None]:
from snorkel.models import CandidateSet
from snorkel.utils import get_ORM_instance
from snorkel.utils import ProgressBar

# train = get_ORM_instance(CandidateSet, session, 'Hardware Training Candidates')
dev   = get_ORM_instance(CandidateSet, session, 'Hardware Development Candidates')

tp = set()
fp = set()
fn = set()
tn = set()
pb = ProgressBar(len(dev))

def heuristic(c): 
    return LF_cb_keywords_all(c) == 1 and not (
        LF_cheating_with_another_part(c) or
        LF_not_horz_aligned(c) or
        LF_voltage_not_in_table(c) or
        LF_same_table_must_align(c) or
        LF_specific_neg_row_keywords(c) or
        LF_equals_in_row(c) or
        LF_i_in_row(c) or
        LF_too_many_numbers_row(c)
        )

# from snorkel.utils import get_ORM_instance
# from snorkel.models import Candidate
# all_candidates = session.query(Candidate).all()

for i, c in enumerate(dev):
    pb.bar(i)
    if heuristic(c):
        tp.add(c)
    else:
        tn.add(c)
pb.close()

In [None]:
print len(dev)
print len(tp) + len(tn)
print len(tp)
print len(tn)

### Apply LFs

In [None]:
%time L_train = label_manager.create(session, train, 'LF Labels', f=LFs)
L_train

### Assess LF accuracy

In [None]:
train_gold = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Training Candidates -- Gold').one()
%time L_train.lf_stats(train_gold)

In [None]:
# If necessary
import os
os.system('cp snorkel.db snorkel.db\ features');

## Learn and Evaluate

In [None]:
# If necessary:
import os
os.remove('snorkel.db');
os.system('cp snorkel.db\ features snorkel.db');

%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.append(os.environ['SNORKELHOME'] + '/tutorials/tables/')

from snorkel import SnorkelSession
session = SnorkelSession()

from snorkel.models import candidate_subclass
Part_Voltage = candidate_subclass('Part_Voltage', ['part','voltage'])

In [None]:
from snorkel.fast_annotations import FeatureManager, LabelManager
from snorkel.models import CandidateSet
train = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Training Candidates').one()
dev = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Development Candidates').one()

feature_manager = FeatureManager()
%time F_train = feature_manager.load(session, train, 'Train Features')
%time F_dev = feature_manager.load(session, dev, 'Train Features')

label_manager = LabelManager()
%time L_train = label_manager.load(session, train, 'LF Labels')

In [None]:
from snorkel.learning import NaiveBayes

gen_model = NaiveBayes()
gen_model.train(L_train, n_iter=100000, rate=1e-6)
%time gen_model.save(session, 'Generative Params')
train_marginals = gen_model.marginals(L_train)

In [None]:
from snorkel.learning import LogReg

disc_model = LogReg()
disc_model.train(F_train, train_marginals, n_iter=2000, rate=1e-4)
%time disc_model.save(session, "Discriminative Params")

In [None]:
train_gold = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Training Candidates -- Gold').one()

dev_gold = session.query(CandidateSet).filter(
    CandidateSet.name == 'Hardware Development Candidates -- Gold').one()

from snorkel.models import CandidateSet
from snorkel.annotations import LabelManager
label_manager = LabelManager()
L_dev = label_manager.load(session, dev, 'Hardware Development Labels -- Gold')

tp, fp, tn, fn = disc_model.score(F_dev, L_dev, dev_gold)

In [None]:
from snorkel.models import Corpus
from snorkel.utils import get_ORM_instance
from hardware_utils import entity_level_f1
import os

dev_corpus = get_ORM_instance(Corpus, session, 'Hardware Development')

gold_file = os.environ['SNORKELHOME'] + '/tutorials/tables/data/hardware/hardware_gold.csv'
corpus = session.query(Corpus).filter(Corpus.name == 'Hardware Development').one()
(TP, FP, FN) = entity_level_f1(tp, fp, tn, fn, gold_file, dev_corpus, 'cb_v_max')

In [None]:
from pprint import pprint
FN_list = sorted(list(FN))
FP_list = sorted(list(FP))
TP_list = sorted(list(TP))
# pprint(FN_list[:])
pprint(FN_list[:10])
# pprint(TP_list[:10])

In [None]:
# docs = set()
# for doc in corpus.documents:
#     docs.add(doc.name.upper())
# pprint(docs)

In [None]:
# import itertools
# docs = set()
# for f in itertools.chain.from_iterable([tp, tn, fp, fn]):
#     docs.add(f.part.parent.document.name.upper())
# #     if f.part.parent.document.name.upper() == 'AUKCS04635-1':
# #         print f
# pprint(docs)

In [None]:
from hardware_utils import entity_to_candidates, part_error_analysis

# disc_model.get_feature_weights(F_dev)

entity = FN_list[0]
matches = entity_to_candidates(entity, dev)
print "Entity: (%d matches)" % len(matches)
print entity

stop = False
for i, c in enumerate(matches):
    part_error_analysis(c)
    results = []
    for lf in LFs:
        name = lf.__name__
        result = lf(c)
        results.append((name, result))
#         if name == 'LF_cb_keywords_all' and result == -1:
#             print name
#             ngrams = list(get_row_ngrams(c.voltage, spread=[0,3], n_max=3))
#             print ngrams
#             print overlap(ngrams, cb_words)
#             stop = False
#         if name == 'LF_cheating_with_another_part' and result == -1:
#             print name
#             horz_ngrams = list(get_horz_aligned_ngrams(c))
#             print horz_ngrams
#             print [part_sniffer.match(x) for x in horz_ngrams]
#             stop = False
        if name == 'LF_i_in_row' and result == -1:
            print name
            ngrams = list(get_row_ngrams(c.voltage, spread=[0,3], n_max=3))
            print ngrams
            stop = False
        if stop: break
    print "MATCH %d:" % i
    print heuristic(c)
    pprint(results)
    print "--------------------------------------------------------------------------------"
    if stop: break
    
#     if heuristic(candidate):
#         print "\nCandidate:"
# #         print candidate
#         print part_error_analysis(candidate)
#     print heuristic(candidate)
#     print LF_voltage_not_in_table(candidate)
#         print candidate.voltage.parent.table
#     print "\nScore:"
#     print disc_model.get_candidate_score(candidate, F_dev)

#     print "\nFeatures:"
#     pprint(disc_model.get_candidate_feature_weights(candidate, F_dev))

In [None]:
print cb_words

In [None]:
candidate[0].parent.text

In [None]:
candidate[0].get_span()

In [None]:
results = []
for lf in LFs:
    results.append(lf.__name__, lf(candidate))

In [None]:
print F_train.shape
print F_dev.shape

In [None]:
import os
os.system('cp snorkel.db snorkel.db\ final');

In [None]:
# def LF_first_row(c):
#     if c.voltage.parent.row_num == 0:
#         return -1
#     else:
#         return 0
# LFs.append(LF_first_row)
    
# def LF_not_ce_relevant(c):
#     ce_keywords = set(['collector', 'emitter', 'collector-emitter'])
#     ngrams = set(get_aligned_ngrams(c.voltage))
#     if not set_any_in_set(ce_keywords, ngrams):
#         return -1
#     else:
#         return 1
# LFs.append(LF_not_ce_relevant)

# def LF_negative_keywords(c):
#     row_neg_keys = set(['ambient',
#                     'small-signal',
#                     'cut-off',
#                     'na',
#                     'ma',
#                     'cex',
#                     'resistance',
#                     'power',
#                     'junction',
#                     'dissipation', 
#                     'breakdown',
#                     'current',
#                     'ceo',
#                     'vceo'
#                     'peak',
#                     '=',
#                     'f',
#                     'p',
#                     'base',
#                     'mw',
#                     'ebo',
#                     'vebo',
#                     'i c',
#                     'total',
#                     'device',
#                     'c',
#                     'mhz',
#                     'temperature',
#                     'saturation',
#                     'operating',
#                     'storage'
#                     'bandwidth',
#                     'derate',
#                     'above',
#                     'product',
#                     'figure',
#                     'conditions',
#                     'current gain',
#                     'saturation',
#                     'min',
#                     'min.',
#                     'typ',
#                     'typ.',
#                     'max',
#                     'max.',
#                     'gain',
#                     'p',
#                     'thermal',
#                     'test'])
#     row_ngrams = set(get_row_ngrams(c.voltage))
#     col_ngrams = set(get_col_ngrams(c.voltage))
#     col_neg_keys = set(['conditions', 
#                         'condition', 
#                         'parameter', 
#                         'min',
#                         'min.',
#                         'typ',
#                         'typ.',
#                         'max',
#                         'max.',
#                         'test'])
#     if set_any_in_set(row_neg_keys, row_ngrams):
#         return -1
#     if set_any_in_set(col_neg_keys, col_ngrams):
#         return -1
#     return 0
# LFs.append(LF_negative_keywords)
    
# def LF_negative_keywords_in_col(c):
#     neg_keys = set(['conditions',
#                     'condition',
#                     'parameter',
#                     'test'])
#     ngrams = set(get_col_ngrams(c.voltage))
#     if set_any_in_set(neg_keys, ngrams):
#         return -1
#     else:
#         return 0

# LFs.append(LF_negative_keywords_in_col)

# def LF_negative_keywords_in_part_aligned(c):
#     ngrams = set(get_aligned_ngrams(c.part))
#     return -1 if (
#         'gain'          in ngrams or
#         'small-signal'  in ngrams or
#         'small'         in ngrams or
#         'cbo'         in ngrams or
#         'collector-emitter' in ngrams or
#         'value'         in ngrams or
#         'thermal'       in ngrams) else 0
# LFs.append(LF_negative_keywords_in_part_aligned)

# def LF_negative_keywords(c):
#     ngrams = set(get_aligned_ngrams(c.voltage))
#     return -1 if (
#         'collector-base'    in ngrams or
#         'cut-off'           in ngrams or
#         '='                 in ngrams or
#         'gain'              in ngrams or
#         'h fe'              in ngrams or
#         'typ.'              in ngrams or
#         'typ'               in ngrams or
#         'min'               in ngrams or
#         'min.'              in ngrams or
#         'saturation'        in ngrams or
#         'mhz'               in ngrams or
#         'gain'              in ngrams or
#         'obo'               in ngrams or
#         'c obo'             in ngrams) else 0
# LFs.append(LF_negative_keywords)
