Having built a semantic matcher model using Keras' guide, this notebook implements it as it would sit in the finished product. A claim and a series of facts (related to the claim) are passed through the model.

A verdict is then formed based on the output of the model from the facts and the claim.

---

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import transformers

import os

In [2]:
dataset_name = 'semantic'
saved_model_path = './{}_bert'.format(dataset_name.replace('/', '_'))

batch_size = 32
max_length = 128
labels = ["contradiction", "entailment", "neutral"]

In [3]:
model = tf.keras.models.load_model(saved_model_path)

In [4]:
class BertSemanticDataGenerator(tf.keras.utils.Sequence):
    """Generates batches of data.

    Args:
        sentence_pairs: Array of premise and hypothesis input sentences.
        labels: Array of labels.
        batch_size: Integer batch size.
        shuffle: boolean, whether to shuffle the data.
        include_targets: boolean, whether to incude the labels.

    Returns:
        Tuples `([input_ids, attention_mask, `token_type_ids], labels)`
        (or just `[input_ids, attention_mask, `token_type_ids]`
         if `include_targets=False`)
    """

    def __init__(
        self,
        sentence_pairs,
        labels,
        batch_size=batch_size,
        shuffle=True,
        include_targets=True,
    ):
        self.sentence_pairs = sentence_pairs
        self.labels = labels
        self.shuffle = shuffle
        self.batch_size = batch_size
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        # We will use base-base-uncased pretrained model.
        self.tokenizer = transformers.BertTokenizer.from_pretrained(
            "bert-base-uncased", do_lower_case=True
        )
        self.indexes = np.arange(len(self.sentence_pairs))
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.sentence_pairs) // self.batch_size

    def __getitem__(self, idx):
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        sentence_pairs = self.sentence_pairs[indexes]

        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            sentence_pairs.tolist(),
            add_special_tokens=True,
            truncation = True,
            max_length=max_length,
            return_attention_mask=True,
            return_token_type_ids=True,
            pad_to_max_length=True,
            return_tensors="tf",
        )

        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        token_type_ids = np.array(encoded["token_type_ids"], dtype="int32")

        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            labels = np.array(self.labels[indexes], dtype="int32")
            return [input_ids, attention_masks, token_type_ids], labels
        else:
            return [input_ids, attention_masks, token_type_ids]

    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)

def check_similarTest(sentence1, sentence2):
    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data[0])[0]
    idx = np.argmax(proba)
    proba2 = f"{proba[idx]: .2f}%"
    pred = labels[idx]
    return pred, proba2, proba

In [6]:
def check_similarity(sentence1, sentence2):
    sentence_pairs = np.array([[str(sentence1), str(sentence2)]])
    test_data = BertSemanticDataGenerator(
        sentence_pairs, labels=None, batch_size=1, shuffle=False, include_targets=False,
    )

    proba = model.predict(test_data[0])[0]
    idx = np.argmax(proba)
    proba2 = proba[idx]
    return idx, proba2

In [7]:
# a logical counter to sentence 2, presented as a fact
sentence1 = "The government does not perpetrate terrorism."
# real excerpt from infowars
sentence2 = "All terrorism that we've looked at from the World Trade Center of Oklahoma City to Waco has been government actions."
check_similarity(sentence1, sentence2)



(0, 0.8072129)

In [8]:
# testing direct quotation as fact
# the claim should always relate to the originator of the quote as their name is the KB identifier
sentence1 = "I am orange"
sentence2 = "Trump never said 'I am orange'"
check_similarity(sentence1, sentence2)

(0, 0.83527046)

# old version for lists

def fact_check(claim, facts):
    # current highest certainty
    highest_similarity = 0
    # the verdict decided during analysis
    current_verdict = 0
    
    for fact, verdict in facts.iteritems():
    #for fact, verdict in facts:
        analysis = check_similarity(fact, claim)
        
        if analysis[1] > highest_similarity:
            highest_similarity = analysis[1]

            # inversion for non-facts in kb
            if not verdict:
                match analysis[0]:
                    case 0:
                        current_verdict = 1 
                    case 1:
                        current_verdict = 0
                    case _:
                        current_verdict = analysis[0]
            else:
                current_verdict = analysis[0]
    
    return(current_verdict, highest_similarity)

In [32]:
def fact_check(claim, facts):
    # current highest certainty
    highest_similarity = 0
    # the verdict decided during analysis
    current_verdict = 0
    
    for row in facts.index:
        analysis = check_similarity(facts['statement'][row], claim)
        print(analysis)
        
        if analysis[1] > highest_similarity:
            highest_similarity = analysis[1]
        
            # inversion for non-facts in kb
            if not facts['verdict'][row]:
                match analysis[0]:
                    case 0:
                        current_verdict = 1 
                    case 1:
                        current_verdict = 0
                    case _:
                        current_verdict = analysis[0]
            else:
                current_verdict = analysis[0]
                
            print(facts['statement'][row])
    
    return(current_verdict, highest_similarity)

---

This is the test using a very small knowledgebase of facts about Hydroxychloroquine from the NHS website.
The results from this test seem extremely promising.

The semantic matcher is incapable of inference based on given facts, so it won't join these together, just picks the one with the highest certainty.

In [85]:
# True means the statement is true, False means it is not true.
# these labels are used later to create the final verdict.

test_kb = [["Hydroxychloroquine is a type of medicine called a disease-modifying anti-rheumatic drug.", True], 
           ["Hydroxychloroquine is used to treat inflammatory conditions like arthritis or lupus.", True],
           ["Hydroxychloroquine is used to treat some skin conditions like sarcoidosis.", True],
           ["Hydroxychloroquine can affect your eyes.", True],
           ["Hydroxychloroquine can make you more likely to get infections.", True],
           ["Hydroxychloroquine is only available on prescription.", True]]

In [74]:
# here we go
result = fact_check('Yeah that hydroxchloroquine stuff. I\'ve heard it doesn\'t affect your eyes.', test_kb)
print(labels[result[0]], result[1])
# outputs verdict and certainty

contradiction 0.8416484


In [91]:
result = fact_check('I got some without a prescription.', test_kb)
print(labels[result[0]], result[1])

contradiction 0.8643546


In [94]:
result = fact_check('It stops you getting infections.', test_kb)
print(labels[result[0]], result[1])
# not a great result

neutral 0.6620801


In [88]:
result = fact_check('HCQ is no good for sarcoidosis or arthritis.', test_kb)
print(labels[result[0]], result[1])

contradiction 0.6823682


In [79]:
result = fact_check('They don\'t use it for lupus, it\'s no good for that.', test_kb)
print(labels[result[0]], result[1])

contradiction 0.87413996


In [87]:
# obviously a joke entry, but the kb doesn't say that it's not
result = fact_check('Hydroxychloroquine is a good soup ingredient', test_kb)
print(labels[result[0]], result[1])

neutral 0.8050079


---
Below here is tests using knowledgebases built from fact-checked datasets.
Facts are batched in as verdicts take a while to generate.

Current test plan:
    
    Batch size = 30
    
    Runs if under 80% certainty = 3
    
Potential test plan:

    If certainty under 65%
        Run until batches exhausted

In [10]:
subject = "Obama" + ".csv"

kbPath = os.path.join(os.path.abspath(""), "factbase")
kbPath = os.path.join(kbPath, subject)

fact_batch_size = 30
cycles = 3

In [11]:
kb = pd.read_csv(kbPath)
print(kb.columns)
print(kb.shape)

Index(['statement', 'verdict'], dtype='object')
(1627, 2)


In [25]:
def batch_check(claim, facts):
    # index of last fact checked in batch
    nextStart = 0
    runs = 0
    
    best = (0, 0)
    
    # only proceeds with further cycles if similarity is <80%
    while (best[1] < 0.8 and runs < cycles):
        result = fact_check(claim, facts.iloc[nextStart:nextStart+fact_batch_size])
        
        if result[1] > best[1]:
            best = result
        
        runs += 1
        nextStart = nextStart + fact_batch_size
        
        print("cycle: ", runs)
    return best

In [36]:
claim = "Obama did not allow Iran to produce nuclear weapons."

print(batch_check(claim, kb))

(0, 0.615222)
Says President Obama’s deal "allows Iran to produce a nuclear weapon.
(0, 0.9954383)
Given how expansive our program already was," expanding Medicaid in New Jersey due to Obamacare "was a relatively small expansion.
(0, 0.96810424)
(0, 0.9624356)
(0, 0.9250239)
(0, 0.9781985)
(0, 0.942493)
(0, 0.91812474)
(0, 0.9724557)
(0, 0.9913728)
(0, 0.98982775)
(0, 0.93472266)
(0, 0.9712233)
(0, 0.9856263)
(0, 0.99031675)
(0, 0.9834531)
(0, 0.96479136)
(0, 0.9908504)
(0, 0.97857785)
(0, 0.9720282)
(0, 0.97691965)
(0, 0.97931755)
(0, 0.9809281)
(0, 0.98718643)
(0, 0.9936605)
(0, 0.98643094)
(0, 0.9850835)
(0, 0.9905227)
(0, 0.94265336)
(0, 0.99163485)
cycle:  1
(0, 0.9954383)


In [27]:
print(0+fact_batch_size)

30


Given the output I'm getting here, it's pretty obvious the current approach doesn't work, so I decided to move to a keyword-centric solution.