In [4]:
%reload_ext autoreload
%autoreload 2
from termcolor import colored
import dotenv
import sys
import dspy
import os

from tqdm.auto import tqdm

sys.path.append('../pipeline_v2/')
import main 
dotenv.load_dotenv('../.env')

from utils import print_header

import pandas as pd

def print_final_result(statement, verdict, confidence, reasoning, gold_verdict=None):
    # Print final result
    print("\nFinal Fact-Check Result:")
    print_header(f"Statement: {colored(statement, 'white')}", level=1)
    print_header(f"Overall Verdict: {colored(verdict, 'green')}", level=1)
    print_header(f"Overall Confidence: {colored(str(confidence), 'yellow')}", level=1)
    print_header(f"Overall Reasoning: {colored(reasoning, 'cyan')}", level=1)
    if gold_verdict: print_header(f"Gold Verdict: {colored(gold_verdict, 'green')}", level=1)

In [14]:
### Load data
if os.path.exists('results_v2.pkl'):
    df = pd.read_pickle('results_v2.pkl')
else: 
    df = pd.read_csv('../data/[FINAL] Pilot - Pilot Claims copy.csv')

    # Drop unneeded columns
    df.drop(columns=['Assignee', 'questions to verify the statement', 'Gold Label', 'GPT-4-Label', 'Claude3-Sonnet-Label', 'mistral_fs_results', 'mistral_verdicts', 'mistral_fs_label', 'GPT3.5(Claude problem)'], inplace=True)

    # Reformat dates
    df['statement_date'] = pd.to_datetime(df['statement_date']).dt.strftime("%B %d, %Y")

df

Unnamed: 0,verdict,statement_originator,statement,statement_date,context,factchecker,factcheck_date,factcheck_analysis_link,gemini-1.5-pro_results
0,FALSE,Instagram posts,“The National Guard in the HISTORY of its life...,"April 02, 2024",Social Media,Politifact,4/8/2024,https://www.politifact.com/factchecks/2024/apr...,"[{'verdict': 'MOSTLY FALSE', 'confidence': 0.8..."
1,PANTS ON FIRE,ROBERT F. Kennedy Jr.,"""On Jan. 6, 2021, U.S. Capitol 'protestors car...","April 05, 2024",Written Copy on Website,Politifact,04/05/2024,,"[{'verdict': 'FALSE', 'confidence': 1.0, 'reas..."
2,FALSE,Threads Post,"""Not even one rocket (from Iran) hit Israel.""","April 14, 2024",Social Media,Politifact,4/15/2024,https://www.politifact.com/factchecks/2024/apr...,"[{'verdict': 'FALSE', 'confidence': 0.9, 'reas..."
3,FALSE,Instagram Post,"""326,000 migrants were flown to Florida with t...","April 04, 2024",Social Media,Politifact,4/12/2024,https://www.politifact.com/factchecks/2024/apr...,
4,FALSE,Donald Trump,"""Crime is down in Venezuela by 67% because the...","April 02, 2024",Speech,Politifact,4/10/2024,https://www.politifact.com/factchecks/2024/apr...,
...,...,...,...,...,...,...,...,...,...
78,FALSE,Nicole Shanahan,"""I will be the the youngest vice president in ...","March 26, 2024",Speech,factcheck.org,3/27/2024,https://factcheck.org/2024/03/factchecking-rfk...,
79,FALSE,Nicole Shanahan,"""Pharmaceutical medicine” was one of “three ma...","March 26, 2024",Speech,factcheck.org,3/27/2024,factcheck.org/2024/03/factchecking-rfk-jr-s-v-...,
80,FALSE,Donald Trump,"""This year, the typical family’s tax bill is t...","April 15, 2024",Truth Social,factcheck.org,4/17/2024,https://www.factcheck.org/2024/04/trumps-unfou...,
81,MOSTLY FALSE,Robert F. Kennedy Jr.,“Those policies that both of them engineered t...,"March 26, 2024",Speech,factcheck.org,3/27/2024,https://factcheck.org/2024/03/factchecking-rfk...,


In [6]:
# Set custom constants for whole pipeline
main.VERBOSE = False # Print intermediate results
# main.VERDICTS=["Supported", "Refuted", "Not Enough Evidence", "Conflicting Evidence/Cherry-picking"]

# Initialize DSPy
lm = dspy.LM('gemini/gemini-1.5-pro', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'), cache=False)
# lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='', cache=False)
# lm = dspy.LM('ollama_chat/llama3.1:8b', api_base='http://localhost:11434', api_key='', cache=False)
# lm = dspy.LM('ollama_chat/deepseek-r1:7b', api_base='http://localhost:11434', api_key='', cache=False)
dspy.settings.configure(lm=lm)

pipeline = main.FactCheckPipeline(
    search_provider=main.SearchProvider(provider="duckduckgo"),
    model_name=lm,
    embedding_model=main.EMBEDDING_MODEL,
    retriever_k=2
)

In [7]:
model = 'gemini-1.5-pro'
num_trials = 3

# If column doesn't exist, create it
if f'{model}_results' not in df.columns: df[f'{model}_results'] = None
df[f'{model}_results'] = df[f'{model}_results'].astype(object)

for index in tqdm(range(len(df))):
    # If results already exist, skip if num_trials is reached
    if df.loc[index, f'{model}_results'] is not None: 
        if len(df.loc[index, f'{model}_results']) == num_trials:
            continue
        else:
            results = df.loc[index, f'{model}_results']
    else: 
        results = []

    for trial_i in tqdm(range(num_trials), leave=False):
        statement = df.iloc[index]['statement']
        statement_originator = df.iloc[index]['statement_originator']
        statement_date = df.iloc[index]['statement_date']
        gold_verdict = df.iloc[index]['verdict']

        verdict, confidence, reasoning, claims = pipeline.fact_check(
            statement=statement, 
            context=f"Statement Originator: {statement_originator}, Date Claim Was Made: {statement_date}"
        )
        results.append({
            'verdict': verdict,
            'confidence': confidence,
            'reasoning': reasoning,
            'claims': claims
        })

        print_final_result(statement, verdict, confidence, reasoning, gold_verdict)

    # Update the dataframe
    df.at[index, f'{model}_results'] = results

  0%|          | 0/83 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]


[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m“The National Guard in the HISTORY of its life, gets called in AFTER a disaster, not BEFORE something happens.”[0m [0m
[36m   Overall Verdict: [32mMOSTLY FALSE[0m [0m
[36m   Overall Confidence: [33m0.8[0m [0m
[36m   Overall Reasoning: [36mThe claim states the National Guard is *only* called in after a disaster. While the provided sources don't give specific examples of pre-disaster deployments, they do mention the National Guard's role in pre-positioning resources, suggesting they prepare and potentially deploy *before* disasters. This contradicts the claim's absolute assertion. The lack of a clear definition of "disaster" further complicates the issue.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m“The National Guard in the HISTORY of its life, gets called in AFTER a disaster, not BEFORE something happens.”[0m [0m
[36m   Overall Verdict: [32mMOSTLY TRUE[0m [0m
[36m   Overall Confidence: [33m0.7[0m [0m
[36m   Overall Reasoning: [36mThe claim is mostly true because it correctly states the National Guard is deployed after disasters. However, it's too strong to say they are *never* deployed beforehand.  While the provided context lacks examples of pre-disaster deployments, this doesn't definitively rule them out.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m“The National Guard in the HISTORY of its life, gets called in AFTER a disaster, not BEFORE something happens.”[0m [0m
[36m   Overall Verdict: [32mMOSTLY TRUE[0m [0m
[36m   Overall Confidence: [33m0.7[0m [0m
[36m   Overall Reasoning: [36mThe provided information primarily discusses the National Guard's role in disaster response *after* an event. While the claim aligns with this general understanding, the information doesn't explicitly rule out the possibility of preemptive deployments in specific circumstances. The absence of examples or explicit statements about preemptive personnel deployment prevents a definitive confirmation of the claim.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m


  0%|          | 0/3 [00:00<?, ?it/s]


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"On Jan. 6, 2021, U.S. Capitol 'protestors carried no weapons.' "[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m1.0[0m [0m
[36m   Overall Reasoning: [36mKennedy's claim that January 6 protestors carried no weapons is false.  While the FBI testified that no firearms were confiscated *at the scene*, evidence shows that other weapons were present, including blunt objects, knives, and chemical irritants.  Some individuals also brought firearms onto the Capitol grounds or stored them nearby, leading to weapons charges.[0m [0m
[36m   Gold Verdict: [32mPANTS ON FIRE[0m [0m



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"On Jan. 6, 2021, U.S. Capitol 'protestors carried no weapons.' "[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m1.0[0m [0m
[36m   Overall Reasoning: [36mMultiple sources confirm that various weapons were confiscated from protesters at the U.S. Capitol on January 6, 2021. These included firearms, blunt objects, a spear, and chemical irritants. Several protesters also faced weapons charges. This directly contradicts the claim that protesters carried no weapons.[0m [0m
[36m   Gold Verdict: [32mPANTS ON FIRE[0m [0m



[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"On Jan. 6, 2021, U.S. Capitol 'protestors carried no weapons.' "[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m1.0[0m [0m
[36m   Overall Reasoning: [36mRobert F. Kennedy Jr.'s claim is refuted by substantial evidence documenting the presence of various weapons at the U.S. Capitol on January 6, 2021.  Multiple news outlets and court proceedings have confirmed the presence and use of weapons such as blunt objects, chemical irritants, and firearms during the attack.[0m [0m
[36m   Gold Verdict: [32mPANTS ON FIRE[0m [0m


  0%|          | 0/3 [00:00<?, ?it/s]


[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"Not even one rocket (from Iran) hit Israel."[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m0.9[0m [0m
[36m   Overall Reasoning: [36mNews reports and other sources indicate that Iran launched a significant attack against Israel overnight on April 13-14, 2024, which included over 120 ballistic missiles launched directly from Iran.  The claim that a Threads post on April 14, 2024, stated that no rockets from Iran hit Israel is not supported by evidence and appears to be fabricated.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m



[A
[A
[A
[A
[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"Not even one rocket (from Iran) hit Israel."[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m0.95[0m [0m
[36m   Overall Reasoning: [36mMultiple credible news sources reported a substantial attack by Iran on Israel on April 14, 2024, involving hundreds of missiles and drones. This contradicts the claim that no rockets hit Israel.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m



[A
[A
[A
[A


Final Fact-Check Result:
[36m   Statement: [97m"Not even one rocket (from Iran) hit Israel."[0m [0m
[36m   Overall Verdict: [32mFALSE[0m [0m
[36m   Overall Confidence: [33m1.0[0m [0m
[36m   Overall Reasoning: [36mMultiple news sources confirm that Iran launched missiles and drones at Israel on April 14, 2024, contradicting the claim that no rockets hit Israel.[0m [0m
[36m   Gold Verdict: [32mFALSE[0m [0m


  0%|          | 0/3 [00:00<?, ?it/s]


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

KeyboardInterrupt: 

In [11]:
df.to_pickle('results_v2.pkl')