In [None]:
!pip install -q wikipedia nltk

In [None]:
import wikipedia

def fetch_wiki(topic):
    wikipedia.set_lang("en")
    try:
        page = wikipedia.page(topic, auto_suggest=False)
        return page.summary
    except wikipedia.DisambiguationError as e:
        return wikipedia.summary(e.options[0], sentences=3)
    except:
        return "No information found"


print(fetch_wiki("Cat"))

In [None]:
import nltk
nltk.download("punkt")
nltk.download("punkt_tab")


In [None]:
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize

def split_into_claims(answer):
    return sent_tokenize(answer)

In [None]:
answer = "Alan Turing invented the computer in 1936. He was British."
claims = split_into_claims(answer)
print(claims)

In [None]:
claim = "Alan Turing invented the computer in 1936."
evidence = fetch_wiki("Alan Turing")

print("CLAIM:")
print(claim)
print("\nEVIDENCE:")
print(evidence)

In [None]:
!pip install -q sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")

In [None]:
def get_embedding(text):
    return model.encode(text)

In [None]:
from numpy import dot
from numpy.linalg import norm

def similarity(a, b):
    return dot(a, b) / (norm(a) * norm(b))


In [None]:
s1 = "Alan Turing was a pioneer of computer science."
s2 = "Alan Turing invented the computer."
s3 = "Bananas are yellow fruits."

e1 = get_embedding(s1)
e2 = get_embedding(s2)
e3 = get_embedding(s3)

print("Similar meaning:", similarity(e1, e2))
print("Different meaning:", similarity(e1, e3))


In [None]:
!pip install -q faiss-cpu


In [None]:
def chunk_text(text, chunk_size=2):
    sentences = text.split(". ")
    chunks = []
    for i in range(0, len(sentences), chunk_size):
        chunk = ". ".join(sentences[i:i+chunk_size])
        if chunk.strip():
            chunks.append(chunk)
    return chunks


In [None]:
import faiss
import numpy as np

class VectorSearch:
    def __init__(self, texts):
        self.texts = texts
        self.embeddings = model.encode(texts)
        dim = self.embeddings.shape[1]
        self.index = faiss.IndexFlatL2(dim)
        self.index.add(np.array(self.embeddings))

    def search(self, query, k=1):
        query_emb = model.encode([query])
        _, indices = self.index.search(np.array(query_emb), k)
        return [self.texts[i] for i in indices[0]]


In [None]:
wiki_text = fetch_wiki("Alan Turing")
chunks = chunk_text(wiki_text)

search_engine = VectorSearch(chunks)

claim = "Alan Turing invented the computer in 1936."
result = search_engine.search(claim)

print("CLAIM:")
print(claim)
print("\nBEST EVIDENCE FOUND:")
print(result[0])


In [None]:
!pip install -q transformers torch


In [None]:
from transformers import pipeline

nli_model = pipeline(
    "text-classification",
    model="facebook/bart-large-mnli"
)



In [None]:
def verify_claim_with_evidence(claim, evidence):
    text = f"{evidence} </s></s> {claim}"
    result = nli_model(text)[0]
    return result["label"], result["score"]


In [None]:
evidence = fetch_wiki("Alan Turing")

claim = "Alan Turing invented the computer in 1936."
label, confidence = verify_claim_with_evidence(claim, evidence)

print(label, confidence)


In [None]:
claim = "Alan Turing was a British mathematician."
label, confidence = verify_claim_with_evidence(claim, evidence)

print(label, confidence)

ENTAILMENT → good (supports truth)

NEUTRAL → suspicious

CONTRADICTION → bad (hallucination)


0 = very grounded

1 = very hallucinated

In [None]:
def hallucination_score(results):
    total = 0
    for label, confidence in results:
        if label.lower() == "entailment":
            total += 0
        elif label.lower() == "neutral":
            total += 0.5 * confidence
        elif label.lower() == "contradiction":
            total += 1.0 * confidence
    return min(total / len(results), 1)



In [None]:
def verdict_from_score(score):
    if score < 0.3:
        return "Grounded"
    elif score < 0.6:
        return "Partially Grounded"
    else:
        return "Hallucinated"


In [None]:
def normalize_claim(claim, subject):
    pronouns = ["He", "She", "They", "he", "she", "they"]
    words = claim.split()
    if words[0] in pronouns:
        return subject + " " + " ".join(words[1:])
    return claim


In [None]:
def check_answer(answer, subject):
    claims = split_into_claims(answer)

    wiki_text = fetch_wiki(subject)
    if wiki_text == "No information found":
        return "Unknown", 1.0, []

    chunks = chunk_text(wiki_text)
    search_engine = VectorSearch(chunks)

    claim_results = []

    for claim in claims:
        clean_claim = normalize_claim(claim, subject)
        if "normalize_nationality" in globals():
          clean_claim = normalize_nationality(clean_claim)


        evidence = search_engine.search(clean_claim)[0]
        label, confidence = verify_claim_with_evidence(clean_claim, evidence)

        claim_results.append({
            "claim": claim,
            "label": label,
            "confidence": confidence,
            "evidence": evidence
        })

    score = hallucination_score(
        [(c["label"], c["confidence"]) for c in claim_results]
    )
    verdict = verdict_from_score(score)

    return verdict, score, claim_results


In [None]:
answer = input("Paste the AI-generated answer:\n")
subject = input("\nWhat is this answer about? (subject):\n")

verdict, score, details = check_answer(answer, subject)

print("\nFINAL VERDICT:", verdict)
print("HALLUCINATION SCORE:", round(score, 2))

print("\nCLAIM-BY-CLAIM RESULTS:")
for d in details:
    print("-", d["claim"])
    print("  →", d["label"], f"({d['confidence']:.2f})")
