# Fact-Checking AI Agent Experiment

This notebook implements an AI agent that extracts claims from a PDF and verifies them against a set of web sources using a RAG (Retrieval-Augmented Generation) approach.

In [2]:
import sys
import os
import re
from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Setup Paths
notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from utils.model_loader import ModelLoader
from data.sources import sources

print(f"Project Root: {project_root}")
print(f"Loaded {len(sources)} sources from data/sources.py")

USER_AGENT environment variable not set, consider setting it to identify your requests.


Project Root: /Users/suriyaa/Desktop/check-mate
Loaded 10 sources from data/sources.py


In [10]:
# Initialize Models
loader = ModelLoader()
llm = loader.load_llm()
embeddings = loader.load_embeddings()
print("LLM and Embeddings loaded successfully.")

{"timestamp": "2026-01-16T15:06:44.909695Z", "level": "info", "event": "Running in LOCAL mode: .env loaded"}
{"timestamp": "2026-01-16T15:06:44.910480Z", "level": "info", "event": "Loaded GROQ_API_KEY from individual env var"}
{"timestamp": "2026-01-16T15:06:44.910889Z", "level": "info", "event": "Loaded GOOGLE_API_KEY from individual env var"}
{"keys": {"GROQ_API_KEY": "gsk_9p...", "GOOGLE_API_KEY": "AIzaSy..."}, "timestamp": "2026-01-16T15:06:44.911395Z", "level": "info", "event": "API keys loaded"}
{"config_keys": ["faiss_db", "embedding_model", "retriever", "llm"], "timestamp": "2026-01-16T15:06:44.914310Z", "level": "info", "event": "YAML config loaded"}
{"provider": "google", "model": "gemini-2.0-flash", "timestamp": "2026-01-16T15:06:44.914770Z", "level": "info", "event": "Loading LLM"}
{"model": "models/text-embedding-004", "timestamp": "2026-01-16T15:06:44.950730Z", "level": "info", "event": "Loading embedding model"}


LLM and Embeddings loaded successfully.


In [11]:
# Load PDF Content
pdf_path = os.path.join(project_root, "data", "Donald Trump’s Second Term.pdf")

if not os.path.exists(pdf_path):
    print(f"Error: PDF not found at {pdf_path}")
else:
    print(f"Loading PDF from: {pdf_path}")
    pdf_loader = PyPDFLoader(pdf_path)
    pdf_docs = pdf_loader.load()
    pdf_text = "\n".join([doc.page_content for doc in pdf_docs])
    print(f"PDF Text Length: {len(pdf_text)} characters")

Loading PDF from: /Users/suriyaa/Desktop/check-mate/data/Donald Trump’s Second Term.pdf
PDF Text Length: 2733 characters


In [12]:
# Extract Claims from PDF
print("Extracting key claims from the PDF...")

claim_extraction_prompt = PromptTemplate.from_template(
    """
    You are an expert fact-checker. Extract key claims from the following text that are specific, verifiable, and relevant to the topic of "Donald Trump's Second Term" foreign policy or actions.
    Focus on assertions of fact rather than opinions.
    Return the claims as a numbered list.
    
    Text:
    {text}
    
    Key Claims:
    """
)

claim_chain = claim_extraction_prompt | llm | StrOutputParser()
# Invoke with the full PDF text (assuming it fits in context, otherwise we'd chunk)
claims_raw = claim_chain.invoke({"text": pdf_text})

print("--- Raw Extracted Claims ---")
print(claims_raw)

# Parse and clean claims
claims = []
for line in claims_raw.split('\n'):
    line = line.strip()
    if line and (line[0].isdigit() or line.startswith('-')):
        # Remove numbering/bullets e.g. "1. " or "- "
        cleaned = re.sub(r'^[\d\-\.\s]+', '', line)
        if cleaned:
            claims.append(cleaned)

print(f"\nExtracted {len(claims)} individual claims.")

AFC is enabled with max remote calls: 10.


Extracting key claims from the PDF...


HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"


--- Raw Extracted Claims ---
Here are the key, verifiable claims extracted from the text, focusing on Donald Trump's second term foreign policy:

1.  In January 2026, the Trump administration formally announced the withdrawal of the United States from 66 international organizations, conventions, and treaties.

Extracted 1 individual claims.


In [13]:
# Load and Index Web Sources (RAG Setup)
print("Loading web sources...")
web_loader = WebBaseLoader(sources)
web_docs = web_loader.load()
print(f"Loaded {len(web_docs)} web documents.")

print("Splitting documents and creating Vector Store...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
web_chunks = text_splitter.split_documents(web_docs)

# Create FAISS Index
vectorstore = FAISS.from_documents(web_chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
print("Vector Store ready.")

Loading web sources...
Loaded 10 web documents.
Splitting documents and creating Vector Store...


HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-emb

Vector Store ready.


In [14]:
# Fact-Check Claims
print("Starting verification process...\n")

fact_check_prompt = PromptTemplate.from_template(
    """
    You are a strict fact-checker. Verify the following claim based ONLY on the provided context retrieved from reliable sources.
    
    Claim: {claim}
    
    Context:
    {context}
    
    Determine if the claim is SUPPORTED, CONTRADICTED, or NOT_MENTIONED by the context.
    Provide a brief explanation citing specific parts of the context.
    
    Output Format:
    **Verdict**: [SUPPORTED / CONTRADICTED / NOT_MENTIONED]
    **Explanation**: [Your explanation here]
    """
)

verification_chain = (
    {"context": retriever, "claim": RunnablePassthrough()}
    | fact_check_prompt
    | llm
    | StrOutputParser()
)

results = []

for i, claim in enumerate(claims):
    print(f"Checking Claim {i+1}/{len(claims)}")
    print(f"Claim: {claim}")
    
    try:
        result = verification_chain.invoke(claim)
        print(result)
        results.append({"claim": claim, "verification": result})
    except Exception as e:
        print(f"Error verifying claim: {e}")
    
    print("-" * 50)

print("Verification Complete.")

Starting verification process...

Checking Claim 1/1
Claim: In January 2026, the Trump administration formally announced the withdrawal of the United States from 66 international organizations, conventions, and treaties.


HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/text-embedding-004:batchEmbedContents "HTTP/1.1 200 OK"
AFC is enabled with max remote calls: 10.
HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent "HTTP/1.1 200 OK"


**Verdict**: SUPPORTED
**Explanation**: The provided context directly supports the claim. The documents state that on January 7, 2026, President Trump announced the withdrawal of the United States from 66 international organizations, conventions, and treaties. Specifically, one document mentions "President Donald Trump, citing 'the authority vested in me as President by the Constitution and the laws of the United States of America,' withdrew the United States 'from International Organizations, Conventions, and Treaties that Are Contrary to the Interests of the United States.'" Another document mentions "On January 7, 2026, through Executive Order 14199, President Trump announced the withdrawal of the United States from 66 international organizations (35 non-United Nations and 31 UN."
--------------------------------------------------
Verification Complete.


In [10]:
from model.models import PromptLoader
from prompt.prompt_library import PROMPT_REGISTRY

In [None]:
PROMPT_REGISTRY[PromptLoader.FETCH_KEY_CLAIMS.value]

PromptTemplate(input_variables=['text'], input_types={}, partial_variables={}, template='\n    You are an expert fact-checker. Extract key claims from the following text that are specific, verifiable, and relevant to the topic of "Donald Trump\'s Second Term" foreign policy or actions.\n    Focus on assertions of fact rather than opinions.\n    Return the claims as a numbered list.\n    \n    Text:\n    {text}\n    \n    Key Claims:\n    ')