## Setup Code (Mostly Duplicate)

In [1]:
# !pip install langchain sentence_transformers chromadb

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/ed/3e/93045d37eba24e0b5eb05312e30cd9e12805ea5f1ae9ba51ec8a7d2f5372/langchain-0.1.16-py3-none-any.whl.metadata
  Downloading langchain-0.1.16-py3-none-any.whl.metadata (13 kB)
Collecting chromadb
  Obtaining dependency information for chromadb from https://files.pythonhosted.org/packages/a4/e1/ce276f553811bd6c684cfe5f637a33ae6444750746f974a8f73d5dc92004/chromadb-0.5.0-py3-none-any.whl.metadata
  Downloading chromadb-0.5.0-py3-none-any.whl.metadata (7.3 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Obtaining dependency information for SQLAlchemy<3,>=1.4 from https://files.pythonhosted.org/packages/ea/3e/95278ef021d3b8bed98bcc5f10faf27e4c4bc0a63a6e0bd98529b1ba8d2e/SQLAlchemy-2.0.29-cp311-cp311-macosx_11_0_arm64.whl.metadata
  Downloading SQLAlchemy-2.0.29-cp311-cp311-macosx_11_0_arm64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Obta

In [2]:
from IPython.display import clear_output
import numpy as np
import os

import json
import requests
import pprint
import re
from bs4 import BeautifulSoup

from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import torch

from collections import Counter

In [5]:
os.environ["SERPER_API_KEY"] = '...'

In [6]:
## Web Querying & Scraping
import json
import requests
import pprint
import re
from bs4 import BeautifulSoup

# Make sure we don't scrape from known fact checking websites
SOURCE_BLACKLIST = ['politifact.org', 'factcheck.org']

def extract_website_name(url):
    """Extracts the website name from a given URL using regex"""
    match = re.search(r'(?P<url>https?://[^\s]+)', url)
    if match:
        url = match.group('url')
        return url.split('//')[1].split('/')[0].lower().replace('www.', '')
    return None

def scrape_text_from_website(url):
    """Scrapes text and metadata from a given website URL."""
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')

            # Remove script and style tags
            for script in soup(["script", "style"]):
                script.decompose()

            # Extract all text from the website
            text = soup.get_text()

            # Clean up whitespace
            text = re.sub(r'\s+', ' ', text).strip()

            return text
        else:
            print(f"Failed to retrieve content from the URL: {url}")
            return None
    except Exception as e:
        print(f"Error during website scraping: {e}")
        return None

def fetch_search_results(question, scrape_website=False):
    """
    Fetches search results for a given question using an API.

    Args:
        question (str): The question to search for.
        scrape_website (bool, optional): Whether to scrape the website content. Defaults to False.

    Returns:
        list: A list of organic search results.
    """
    api_key = os.environ.get("SERPER_API_KEY")

    headers = {
        "X-API-KEY": api_key,
        "Content-Type": "application/json",
    }

    payload = json.dumps({"q": question})
    try:
        response = requests.post("https://google.serper.dev/search", headers=headers, data=payload)
        result = json.loads(response.text)

        # Extract the organic search results and transform them into our desired format
        results = []
        for item in result['organic']:
            # ALSO while iterating through the results, remove any websites on our source blacklist
            source = extract_website_name(item.get('link', ''))
            if source in SOURCE_BLACKLIST: continue
            website_text = scrape_text_from_website(item.get('link', '')) if scrape_website else item.get('snippet', '')
            if website_text is None or website_text == '': # if we failed to scrape the website, use the snippet
                website_text = item.get('snippet', '')
            results.append({
                "title": item.get('title', ''),
                "source": source,
                "date_published": item.get('date', ''),
                "relevant_excerpt": item.get('snippet', ''),
                "text": website_text,
                "search_position": item.get('position', -1),
                "url": item.get('link', ''),
            })
        return results

    except Exception as e:
        print(f"Failed to fetch information: {e}")
        return []

# Example usage
question = "What is the estimated cost of the Green New Deal according to its proponents?"
search_results = fetch_search_results(question, scrape_website=True)
search_results

Failed to retrieve content from the URL: https://www.nytimes.com/2019/02/21/climate/green-new-deal-questions-answers.html
Failed to retrieve content from the URL: https://thehill.com/opinion/finance/427873-costs-of-the-green-new-deal/


[{'title': "Why it's hard to put a price tag on plans like the Green New Deal",
  'source': 'marketplace.org',
  'date_published': 'Oct 8, 2020',
  'relevant_excerpt': 'But during the presidential debates last week, President Donald Trump claimed that the Green New Deal would cost $100 trillion — a figure ...',
  'text': "What will the Green New Deal cost? - Marketplace Skip to content Listen Newsletters Shows Marketplace Marketplace Morning Report Marketplace Tech Make Me Smart This is Uncomfortable The Uncertain Hour How We Survive Financially Inclined Million Bazillion Marketplace Minute® Corner Office from Marketplace Marketplace Logo Newsletters Shows Marketplace Marketplace Morning Report Marketplace Tech Make Me Smart This is Uncomfortable The Uncertain Hour How We Survive Financially Inclined Million Bazillion Marketplace Minute® Corner Office from Marketplace Facebook Instagram Youtube TikTok Donate Search Menu Menu Search Explore Latest Stories Our Shows Collections Smart Spe

In [7]:
## Retrieval Augmented Generation (RAG) Retriever
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import torch

# Initialize embedding model for retrieval (sentence similarity)
BATCH_SIZE = 32
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
retriever_model_id='sentence-transformers/all-MiniLM-L6-v2'
retriever_model = HuggingFaceEmbeddings(
    model_name=retriever_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': BATCH_SIZE},
)

def retrieve_relevant_documents_using_rag(search_results, content_key, question, chunk_size=512, chunk_overlap=128, top_k=10):
    """
    Takes in search results and a query question, processes and splits the documents,
    and retrieves relevant documents using a RAG approach.

    Args:
        search_results (list of dict): A list of dictionaries containing web-scraped data.
        question (str): The query question for retrieving relevant documents.
        content_key (str): The key in the dictionary containing the text content.
        chunk_size (int): The maximum size of the text chunks.
        chunk_overlap (int): The overlap between consecutive text chunks.
        top_k (int): The number of relevant documents to retrieve.

    Returns:
        list: A list of relevant document chunks.
    """
    # Create LangChain documents from search results
    documents = []
    for result in search_results:
        page_content = result.pop(content_key, None)  # Extract the text content, remaining keys are metadata
        if page_content is not None:
            documents.append(Document(page_content=page_content, metadata=result))

    # Split documents into smaller chunks (if needed, based on document size)
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
    )
    split_documents = text_splitter.split_documents(documents)

    # Initialize ChromaDB vector store to index the document chunks
    db = Chroma.from_documents(
        documents=split_documents,
        embedding=retriever_model,
    )

    # Retrieve the most relevant chunks for the given question
    relevant_docs = db.max_marginal_relevance_search(question, k=top_k)

    return relevant_docs

# # Example usage
# question = "What is the estimated cost of the Green New Deal according to its proponents?"
# relevant_docs = retrieve_relevant_documents_using_rag(search_results, 'text', question)
# relevant_docs

Downloading modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading 1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
## Generate Fact Score Label for Statement (Statement Classification)

def generate_fact_score_label(verdicts):
    """
    Generates a fact score label based on the verdicts provided. The fact score label can be one of the following:
    - True: All atomic claims are true.
    - Mostly True: More than half of the atomic claims are true.
    - Half True: Half of the atomic claims are true.
    - Mostly False: More than half of the atomic claims are false.
    - Pants on Fire: All atomic claims are false.
    - Unverifiable: The number of unverifiable atomic claims is greater than or equal to the number of true/false atomic claims.

    Args:
        verdicts (list): A list of verdicts (True/False/Unverifiable) for each atomic claim within a statement.

    Returns:
        str: The fact score label.
    """

    label = 'Unknown'
    perc_unverified = 0
    v_cleaned = verdicts
    if 'Unveriable' in verdicts:
        v_cleaned = verdicts.remove('Unverifiable')
        perc_unverified = Counter(verdicts)['Unverifiable'] / len(verdicts)
    perc_true = Counter(verdicts)['True'] / len(verdicts)
    perc_false = Counter(verdicts)['False'] / len(verdicts)
    perc = [perc_true, perc_false, perc_unverified]
    winner = np.argwhere(perc == np.amax(perc))

    if len(winner) == 3: # three-way tie
        label = "Unverifiable"

    elif len(winner) == 2: # two-way tie
        if 0 in winner and 1 in winner: # half true
            label = 'Half True'
        elif 0 in winner and 2 in winner: # true & unverifable
            label = "Unverifiable"
        elif 1 in winner and 2 in winner: # false & unverifable
            label = "Unverifiable"

    elif winner == 0:
        if perc_true == 1: # all true
            label = "True"
        elif Counter(v_cleaned)['True'] / len(v_cleaned) > 0.5: # mostly true
            label = "Mostly True"

    elif winner == 1:
        if perc_false == 1: # all false
            label = "Pants on Fire"
        elif Counter(v_cleaned)['False'] / len(v_cleaned) > 0.5: # mostly false
            label = "Mostly False"

    elif winner == 2:
        label = 'Unverifiable'
    return label

## MAIN CODE BLOCK

In [9]:
fact_scores = []
statements = []
output_dict = []

In [33]:
from pandas.io.clipboard import clipboard_get, clipboard_set

# Interactive Fact-Checking Pipeline for LLMs on Web UI (ChatGPT, Claude, etc.)
# Just run this cell and follow the instructions on what to input here/what to paste into the LLM prompt box

# Redefine prompt templates (if you want to modify them for different LMs)
# NOTE: These also include hard-coded few shot examples which is a limitation for now
claim_atomization_template = """
You are a helpful assistant. Your task is to break down a set of statements given after <<<>>> into a minimal number of atomic claims.
These atomic claims need to be comprehensible independently (e.g., replace ambiguous pronouns with nouns).
You will only respond with the atomic claims in the format of a single, one-dimensional Python list. Do not provide any explanations or notes.

###
Here are some examples:
Statements: The Green New Deal proposed by Rep. Alexandria Ocasio-Cortez aims to achieve net-zero greenhouse gas emissions and create millions of green jobs.
Atomic Claims: ['The Green New Deal was proposed by Rep. Alexandria Ocasio-Cortez.', 'The Green New Deal aims to achieve net-zero greenhouse gas emissions.', 'The Green New Deal aims to create millions of green jobs.']
Statements: The Inflation Reduction Act, signed into law by President Biden in August 2022, is the largest investment in climate change mitigation in U.S. history.
Atomic Claims: ['The Inflation Reduction Act was signed into law by President Biden in August 2022.', 'The Inflation Reduction Act is the largest investment in climate change mitigation in U.S. history.']
Statements: President Trump's policies led to the highest inflation rate in over 40 years, resulting in economic hardship for millions of American families.
Atomic Claims: ["President Trump's policies led to the highest inflation rate in over 40 years.", "The high inflation rate caused by President Trump's policies resulted in economic hardship for millions of American families."]
###

<<<
Statements: {statement}
>>>
Atomic Claims: ["""

question_generation_template = """
You are a helpful assistant. Your task is to provide a set of unique, independent questions to search on the web to verify the claim given after <<<>>>.
Be as specific and concise as possible. Try to minimize the number of questions while ensuring that the claim can be verified.
You will only respond with the atomic claims in the format of a single, one-dimensional Python list. Do not provide any explanations or notes.

###
Here are some examples:
Claim: The high inflation rate caused by President Trump's policies resulted in economic hardship for millions of American families.
Questions: ["How did the high inflation rate under President Trump's policies affect American families?", "What economic challenges did millions of American families face due to the inflation rate under President Trump's policies?", 'What are the consequences of economic hardship caused by inflation under President Trump?']
Claim: President Trump's policies led to the highest inflation rate in over 40 years.
Questions: ["What impact did President Trump's policies have on the inflation rate?", "How does the inflation rate during President Trump's term compare to previous years?", "What is the significance of the inflation rate under President Trump's policies?"]
###

<<<
Claim: {claim}
>>>
Questions: ["""

answer_synthesis_template = """
You are a helpful assistant. Your task is to synthesize the documents (along with their source metadata) provided below to answer the question given after <<<>>>.
Only use the documents below to answer the question. In a separate section below your answer titled "Sources:", cite the relevant documents you used to answer the question."
If you cannot answer the question given the relevant documents, just say that you don't have enough information to answer the question. Do not make up an answer or sources.

Here are the relevant documents:
{documents}

<<<
Question: {question}
>>>
Answer: """

claim_classification_template = """
You are a logical reasoning assistant. Given the original claim, a set of questions to help verify the claim, and their answers, reason step-by-step to come to a verdict on whether the claim is true or false. Think step-by-step about your reasoning process.
Return the verdict after "Verdict:" and provide a clear explanation after "Reasoning:"
For the verdict, only classify the claim as "True", "False", or "Unverifiable".

Claim: {claim}

{questions_and_answers}

Verdict: """

# Utility function to highlight prompt text to paste (i.e. into ChatGPT prompt box)
def paste_to_prompt_box(prompt):
    clear_output()
    clipboard_set(prompt)
    print(f"""\nPASTE THE FOLLOWING INTO THE LLM PROMPT BOX:\n{'*'*120}\n{prompt}\n{'*'*120}""")
    # display(HTML(f"<button onclick=navigator.clipboard.writeText('{prompt}')>Click Here To Copy</button>"))


# Utility function to keep asking the user for input until they provide a valid formatted input
def input_until_valid(input_msg, error_msg, tries=3):
    parsed_input = None
    for i in range(tries):
        try:
            raw_input = input(input_msg)
            parsed_input = eval(raw_input)
        except:
            print(f"\nERROR PARSING INPUT: {raw_input} \n{error_msg}\n")
            continue
        break
    return parsed_input

# First, propmpt user for a statement to fact-check
statement = input("Enter a statement: ")
statements.append(statement)

# Tell user to manually prompt the LLM to generate atomic claims
paste_to_prompt_box(claim_atomization_template.format(statement=statement).strip())

# Prompt user to now paste the atomic claims generated by LLM
atomic_claims = input_until_valid(
    "Now, paste the atomic claims generated as a valid Python list: ",
    "Please make sure the input is a valid Python list."
)

# results = []  # List to store all the info for each atomic claim (claim, questions, answers, verdict, reasoning)
verdicts = []
output = []
for claim in atomic_claims:
    claim_output = {}
    claim_output['claim'] = claim
    # Tell user to manually prompt the LLM to generate questions
    paste_to_prompt_box(question_generation_template.format(claim=claim).strip())

    # Prompt user to now paste the questions generated by LLM
    questions = input_until_valid(
        "Paste the questions generated by the LLM here as a valid Python list: ",
        "Please make sure the input is a valid Python list."
    )

    claim_output['qa-pairs'] = {}
    claim_output['qa-pairs']['questions'] = questions

    answers = []
    for question in questions:
        # Get search results and use RAG to retrieve relevant documents (excerpts of the web pages)
        search_results = fetch_search_results(question)
        relevant_docs = retrieve_relevant_documents_using_rag(search_results, 'text', question)

        # DUPLICATE CODE TO GENERATE PROMPT FOR ANSWER GENERATION
        # Format the relevant documents for the prompt
        documents_text = ""
        for doc in relevant_docs:
            documents_text += f"Title: {doc.metadata.get('title', '')}\n"
            documents_text += f"URL: {doc.metadata.get('url', '')}\n"
            documents_text += f"Text: {doc.page_content.strip()}\n"
            documents_text += f"Date Published: {doc.metadata.get('date_published', '')}\n\n"

        # Fill in the prompt template with the relevant documents and the question
        prompt = answer_synthesis_template.format(documents=documents_text.strip(), question=question).strip()

        # Tell user to manually prompt the LLM to generate an answer
        paste_to_prompt_box(prompt)

        # Prompt user to now paste the answer generated by LLM
        answer = input("Paste the answer generated by the LLM here (ONLY THE ANSWER, NO SOURCES HERE): ")
        answers.append(answer)
    claim_output['qa-pairs']['answers'] = answers
    # Combine the questions and answers into a single string for the prompt
    questions_and_answers = ""
    for q, a in list(zip(questions, answers)):
        questions_and_answers += f"Question: {q}\n"
        questions_and_answers += f"Answer: {a}\n\n"

    # Tell user to manually prompt the LLM to generate a verdict
    paste_to_prompt_box(claim_classification_template.format(claim=claim, questions_and_answers=questions_and_answers.strip()).strip())

    # Prompt user to now paste the verdict generated by LLM
    verdict = input("Paste the verdict generated by the LLM here (True/False, NO REASONING HERE): ")
    verdicts.append(verdict)
    claim_output['verdict'] = verdict
    output.append(claim_output)

fact_score_label = generate_fact_score_label(verdicts)
output_dict.append(output)
fact_scores.append(fact_score_label)
print("*"*120)
print(f"Statement: {statement}")
print(f"Fact Score Label: {fact_score_label}")
print("*"*120)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



PASTE THE FOLLOWING INTO THE LLM PROMPT BOX:
************************************************************************************************************************
You are a logical reasoning assistant. Given the original claim, a set of questions to help verify the claim, and their answers, reason step-by-step to come to a verdict on whether the claim is true or false. Think step-by-step about your reasoning process.
Return the verdict after "Verdict:" and provide a clear explanation after "Reasoning:"
For the verdict, only classify the claim as "True" or "False".

Claim: The abortion ban in Arizona fails to protect women in cases of rape or incest.

Question: Does the new Arizona abortion ban include exceptions that allow abortion in cases of rape or incest?
Answer: Based on the consistent information provided across multiple documents, the revived 1864 territorial abortion ban in Arizona does not include any exceptions that would allow for abortion in cases of rape or incest. The

In [36]:
import pickle 
import os
# TODO: change file path
os.path.join('.../', 'src/')
pickle.dump(fact_scores, open("fact_scores.pkl", 'wb'))
pickle.dump(statements, open("statements.pkl", 'wb'))
pickle.dump(output_dict, open("output_dict.pkl", 'wb'))