!pip install spacy
!python -m spacy download en_core_web_sm
!pip install wikipedia

!pip install torchaudio

In [18]:
import spacy
nlp = spacy.load('en_core_web_sm')

def preprocess_claim(claim):
    doc = nlp(claim)
    tokens = [token.text.lower() for token in doc if not token.is_stop and not token.is_punct]
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return tokens, entities


In [52]:
import wikipedia

def search_wikipedia(query):
    try:
        # Get the summary from Wikipedia (first 2 sentences)
        summary = wikipedia.summary(query, sentences=4)
        return summary
    except wikipedia.exceptions.DisambiguationError as e:
        return f"Multiple options found for {query}: {e.options}"
    except wikipedia.exceptions.PageError:
        return "No relevant page found on Wikipedia."


In [20]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

In [43]:
def verify_claim(claim, evidence):
    x = tokenizer.encode(evidence, claim, return_tensors='pt',
                     truncation_strategy='only_first')
    nli_model.to(device)
    logits = nli_model(x.to())[0]
    
    # we throw away "neutral" (dim 1) and take the probability of
    # "entailment" (2) as the probability of the label being true 
    entail_contradiction_logits = logits[:,[0,2]]
    probs = entail_contradiction_logits.softmax(dim=1)
    prob_label_is_true = probs[:,1]
    print(prob_label_is_true)
    return prob_label_is_true


In [41]:
def respond_to_claim(claim, evidence):
    r = verify_claim(claim, evidence)
    result = 'ENTAILMENT' if r>0.5 else 'CONTRADICTION'
    
    if result == 'ENTAILMENT':
        return "True"
    elif result == 'CONTRADICTION':
        return "False"
    else:
        # If the model isn't confident (NEUTRAL), we could default to "Unknown"
        return "Could not verify the claim."

In [23]:
def fact_check(claim):
    # Step 1: Preprocess the claim to extract entities
    tokens, entities = preprocess_claim(claim)
    print(f"Tokens: {tokens}")
    print(f"Entities: {entities}")
    
    # Step 2: Use one of the extracted entities to search for evidence (if entities are present)
    if entities:
        # Take the first entity to search for on Wikipedia
        entity_to_search = entities[0][0]
        print(f"Searching Wikipedia for: {entity_to_search}")
        
        evidence = search_wikipedia(entity_to_search)
        print(f"Evidence found: {evidence}")
        
        # Step 3 & 4: Fact-check and generate the response
        response = respond_to_claim(claim, evidence)
        print(response)
        return response
    else:
        return "Could not find any entities to search for evidence."

In [55]:
if __name__ == "__main__":
    # User input
    claim = "Friend had 9 seasons."
    
    # Check the claim
    result = fact_check(claim)
    print(f"Claim: {claim} -> Result: {result}")

Tokens: ['friend', '9', 'seasons']
Entities: [('Friend', 'ORG'), ('9 seasons', 'DATE')]
Searching Wikipedia for: Friend
Evidence found: Friends is an American television sitcom created by David Crane and Marta Kauffman, which aired on NBC from September 22, 1994, to May 6, 2004, lasting ten seasons. With an ensemble cast starring Jennifer Aniston, Courteney Cox, Lisa Kudrow, Matt LeBlanc, Matthew Perry and David Schwimmer, the show revolves around six friends in their 20s and early 30s who live in Manhattan, New York City. The original executive producers were Kevin S. Bright, Kauffman, and Crane.
Kauffman and Crane began developing Friends under the working title Insomnia Cafe between November and December 1993.
tensor([0.1681], device='mps:0', grad_fn=<SelectBackward0>)
False
Claim: Friend had 9 seasons. -> Result: False


In [38]:
import torch
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
