# Performance Evaluation

## Helper Functions

In [1]:
# install necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from tqdm.notebook import tqdm
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [2]:
class_labels = ["unverifiable", "false", "mostly false", "half true", "mostly true", "true"]
class_2_index = {label: i for i, label in enumerate(class_labels)}
index_2_class = {i: label for i, label in enumerate(class_labels)}
num_labels = [class_2_index[label] for label in class_labels] # [0, 1, 2, 3, 4, 5]

In [3]:
def generate_cm(y_true, y_pred):
    '''
    Calculate the confusion matrix with the cost matrix
    '''
    cm = confusion_matrix(y_true, y_pred)
    return cm

def generate_metrics(y_true, y_pred):
    '''
    Calculate the weighted F1 score
    '''
    weighted_precision = precision_score(y_true, y_pred, average='weighted', sample_weight=None, labels=num_labels)
    weighted_recall = recall_score(y_true, y_pred, average='weighted', sample_weight=None, labels=num_labels)
    weighted_f1 = f1_score(y_true, y_pred, average='weighted', sample_weight=None, labels=num_labels)
    metrics = pd.DataFrame({'Precision': weighted_precision, 'Recall': weighted_recall, 'F1': weighted_f1}, index=class_labels)
    return metrics

## Label Generation

In [55]:
## Load the data
df = pd.read_csv('../data/[FINAL] Pilot - Pilot Claims copy.csv')
statements = df['statement'].to_list()
gold = df['verdict'].to_list()

### Gemini

In [28]:
%reload_ext autoreload
%autoreload 2
import dotenv
import sys
import dspy
import os
sys.path.append('../pipeline_v2/')
import main 
dotenv.load_dotenv('../.env')

# Initialize search provider
main.NUM_SEARCH_RESULTS = 10 # Number of search results to retrieve
main.SCRAPE_TIMEOUT = 5 # Timeout for scraping a webpage (in seconds)
search_provider = main.SearchProvider(provider="duckduckgo")

# Initialize DSPy
# lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'))
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='')
dspy.settings.configure(lm=lm)

# Initialize pipeline
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
main.VERBOSE = False # Print intermediate results
main.INTERACTIVE = False # Allow the user to provide feedback
main.USE_BM25 = True # Use BM25 for retrieval (in addition to cosine similarity)
main.BM25_WEIGHT = 0.5 # Weight for BM25 in the hybrid retrieval

pipeline = main.FactCheckPipeline(
    search_provider=search_provider,
    model_name=lm,
    embedding_model=embedding_model,
    retriever_k=2
)

# Example statement to fact-check
# statement = """And then there's the reality of the Trump economy, 
# where wages adjusted for inflation were rising. The wage gap between 
# rich and poor was shrinking. The savings rate for black Americans was 
# the highest in the history of our country."""

# statement = """The US economy is in a recession now in 2024."""
results = []
for index, statement in enumerate(tqdm(statements)):
    verdict = None
    for i in range(5):
        try:
            verdict, confidence, reasoning, claims = pipeline.fact_check(statement)
        except Exception as e:
            print(f"Error {e}: retrying for statement {index}, attempt {i+1}")
            continue 
        break   
    
    if verdict is None:
        results.append(index)
    else:
        results.append((verdict, confidence, reasoning, claims))
    with open('results_v2.pkl', 'wb') as f:
        pickle.dump(results, f)

In [None]:
### REGENERATE RESULTS
with open('results.pkl', 'rb') as f:
    results = pickle.load(f)
# statement = """The US economy is in a recession now in 2024."""
for index, statement in enumerate(tqdm(statements)):
    if type(results[index]) != int:
        continue
    verdict = None
    for i in range(5):
        try:
            verdict, confidence, reasoning, claims = pipeline.fact_check(statement)
        except Exception as e:
            print(f"Error {e}: retrying for statement {index}, attempt {i+1}")
            continue 
        break   
    
    if verdict is None:
        results.append(index)
    else:
        results.append((verdict, confidence, reasoning, claims))
    with open('results_v2.pkl', 'wb') as f:
        pickle.dump(results, f)

  0%|          | 0/83 [00:00<?, ?it/s]

[36m         Query: [33mNational Guard Hurricane Katrina deployment dates[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 25206.15it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mHurricane Katrina timeline National Guard response[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 95325.09it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mNational Guard deployment 9/11 date[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 117817.53it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33m9/11 timeline National Guard response[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 110086.72it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mNational Guard California wildfires 2020 deployment dates[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 123361.88it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mCalifornia wildfires 2020 timeline National Guard response[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 92589.49it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mJanuary 6th Capitol attack weapons seized[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 96866.14it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mweapons found on January 6th rioters[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 45051.60it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mimprovised weapons January 6th Capitol riot[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 7691.74it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mJanuary 6th Capitol attack improvised explosive devices[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 126334.46it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error litellm.InternalServerError: litellm.InternalServerError: VertexAIException - {
  "error": {
    "code": 503,
    "message": "The model is overloaded. Please try again later.",
    "status": "UNAVAILABLE"
  }
}
: retrying for statement 1, attempt 1
[36m         Query: [33mJanuary 6th Capitol attack weapons seized[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 109798.53it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mweapons found on January 6th rioters[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 78692.38it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error litellm.InternalServerError: litellm.InternalServerError: VertexAIException - {
  "error": {
    "code": 503,
    "message": "The model is overloaded. Please try again later.",
    "status": "UNAVAILABLE"
  }
}
: retrying for statement 1, attempt 2
[36m         Query: [33mJanuary 6th Capitol attack weapons seized[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 96866.14it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mweapons found on January 6th rioters[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 126334.46it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error litellm.InternalServerError: litellm.InternalServerError: VertexAIException - {
  "error": {
    "code": 503,
    "message": "The model is overloaded. Please try again later.",
    "status": "UNAVAILABLE"
  }
}
: retrying for statement 1, attempt 3
[36m         Query: [33mJanuary 6th Capitol attack weapons seized[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 112447.83it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mweapons found on January 6th rioters[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 105120.40it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mimprovised weapons January 6th Capitol riot[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 104077.02it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[36m         Query: [33mJanuary 6th Capitol attack improvised explosive devices[0m [0m


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 124830.48it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [27]:
len([i for i in results if type(i) == int])

55

In [65]:
%reload_ext autoreload
%autoreload 2
import dotenv
import sys
import dspy
import os
sys.path.append('../pipeline_v2/')
import main 
dotenv.load_dotenv('../.env')

# Initialize search provider
main.NUM_SEARCH_RESULTS = 10 # Number of search results to retrieve
main.SCRAPE_TIMEOUT = 5 # Timeout for scraping a webpage (in seconds)
search_provider = main.SearchProvider(provider="duckduckgo")

# Initialize DSPy
# lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'))
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='')
dspy.settings.configure(lm=lm)

# Initialize pipeline
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
main.VERBOSE = False # Print intermediate results
main.INTERACTIVE = False # Allow the user to provide feedback
main.USE_BM25 = True # Use BM25 for retrieval (in addition to cosine similarity)
main.BM25_WEIGHT = 0.5 # Weight for BM25 in the hybrid retrieval

pipeline = main.FactCheckPipeline(
    search_provider=search_provider,
    model_name=lm,
    embedding_model=embedding_model,
    retriever_k=2
)

# Example statement to fact-check
# statement = """And then there's the reality of the Trump economy, 
# where wages adjusted for inflation were rising. The wage gap between 
# rich and poor was shrinking. The savings rate for black Americans was 
# the highest in the history of our country."""

In [66]:
statement = 'The National Guard in the HISTORY of its life, gets called in AFTER a disaster, not BEFORE something happens.'
verdict, confidence, reasoning, claims = pipeline.fact_check(statement)

                                                          

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

                                                          

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

### Mistral

In [39]:
%reload_ext autoreload
%autoreload 2
import dotenv
import sys
import dspy
import os
sys.path.append('../pipeline_v2/')
import main 
dotenv.load_dotenv('../.env')

# Initialize search provider
main.NUM_SEARCH_RESULTS = 10 # Number of search results to retrieve
main.SCRAPE_TIMEOUT = 5 # Timeout for scraping a webpage (in seconds)
search_provider = main.SearchProvider(provider="duckduckgo")

# Initialize DSPy
# lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'))
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='')
dspy.settings.configure(lm=lm)

# Initialize pipeline
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
main.VERBOSE = False # Print intermediate results
main.INTERACTIVE = False # Allow the user to provide feedback
main.USE_BM25 = True # Use BM25 for retrieval (in addition to cosine similarity)
main.BM25_WEIGHT = 0.5 # Weight for BM25 in the hybrid retrieval

pipeline = main.FactCheckPipeline(
    search_provider=search_provider,
    model_name=lm,
    embedding_model=embedding_model,
    retriever_k=2
)

# Example statement to fact-check
# statement = """And then there's the reality of the Trump economy, 
# where wages adjusted for inflation were rising. The wage gap between 
# rich and poor was shrinking. The savings rate for black Americans was 
# the highest in the history of our country."""

with open('mistral_results.pkl', 'rb') as f:
    mistral_results = pickle.load(f)

# statement = """The US economy is in a recession now in 2024."""
for index, statement in enumerate(tqdm(statements)):
    if len(mistral_results) <= index+1 and type(mistral_results[index]) != int:
        continue
    verdict = None
    for i in range(5):
        try:
            verdict, confidence, reasoning, claims = pipeline.fact_check(statement)
        except Exception as e:
            print(f"Error {e}: retrying for statement {index}, attempt {i+1}")
            continue 
        break   
    
    if verdict is None:
        mistral_results.append(index)
    else:
        mistral_results.append((verdict, confidence, reasoning, claims))
    with open('mistral_results.pkl', 'wb') as f:
        pickle.dump(mistral_results, f)

  0%|          | 0/83 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 76818.75it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 185588.67it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 140748.46it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 100102.72it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 75166.74it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 44858.87it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 58826.14it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 203606.99it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 80659.69it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 138884.24it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 80659.69it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 212908.83it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 49286.77it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 73584.28it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 81920.00it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 199728.76it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 76260.07it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 137068.76it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 75846.37it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 131482.88it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 116185.71it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 140748.46it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 35575.10it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 123725.78it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 90982.73it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 80815.11it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 42196.22it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 64428.63it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 54330.36it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 221920.85it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error list indices must be integers or slices, not str: retrying for statement 4, attempt 1


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 22745.68it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 204600.20it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 85598.04it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 114912.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 42027.09it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 122282.92it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 135300.13it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 116508.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 80659.69it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 103054.15it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 42538.58it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 99864.38it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error list indices must be integers or slices, not str: retrying for statement 4, attempt 2
Search error: https://html.duckduckgo.com/html RuntimeError: error sending request for url (https://html.duckduckgo.com/html): client error (Connect)

Caused by:
    0: client error (Connect)
    1: dns error: failed to lookup address information: nodename nor servname provided, or not known
    2: failed to lookup address information: nodename nor servname provided, or not known, waiting 2s before retry 1/3
Search error: https://lite.duckduckgo.com/lite/ RuntimeError: error sending request for url (https://lite.duckduckgo.com/lite/): client error (Connect)

Caused by:
    0: client error (Connect)
    1: dns error: failed to lookup address information: nodename nor servname provided, or not known
    2: failed to lookup address information: nodename nor servname provided, or not known, waiting 2s before retry 2/3
Error Search error: https://lite.duckduckgo.com/lite/ RuntimeError: error sending 

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 12438.62it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 212908.83it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 16611.10it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 24686.90it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 57143.11it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 90006.52it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 131482.88it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 63262.50it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 5020.11it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 24862.50it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 100102.72it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 81920.00it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 56148.65it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 151418.92it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 47608.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 94466.31it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 69905.07it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 96199.63it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 68534.38it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 164482.51it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error list indices must be integers or slices, not str: retrying for statement 5, attempt 1


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 40840.35it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 120525.98it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 87563.76it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 76398.98it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 23981.15it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 85423.71it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 31655.12it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 112147.17it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error list indices must be integers or slices, not str: retrying for statement 5, attempt 2


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 34606.47it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 85423.71it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error string indices must be integers: retrying for statement 5, attempt 3


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 69442.12it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 19152.07it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error string indices must be integers: retrying for statement 5, attempt 4


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 86838.59it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 136622.28it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 84733.41it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 101067.57it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 46968.69it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 48044.72it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 7189.41it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 129854.61it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 93414.34it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 161319.38it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 87930.90it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 97090.37it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 50533.78it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 95325.09it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 107271.20it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 13929.94it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error string indices must be integers: retrying for statement 6, attempt 1


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 36126.65it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 127875.12it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 50963.60it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 68985.26it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 46707.17it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 143150.31it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 63743.22it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 184771.10it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 73584.28it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 76260.07it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 185588.67it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Error string indices must be integers: retrying for statement 6, attempt 2


Processing sources: 100%|██████████| 10/10 [00:00<00:00, 18315.74it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 158875.15it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 49519.53it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 123725.78it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 92589.49it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 97769.32it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 233016.89it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 192399.27it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 38479.85it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 114912.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 142663.40it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 66576.25it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 91578.69it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 102051.19it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 45491.37it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 47393.27it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 74104.31it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 114912.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 51275.11it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 145131.63it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 171897.70it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 144631.17it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 48265.87it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 17505.44it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 139345.65it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 76260.07it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 16396.81it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 147168.56it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 72944.42it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 42069.25it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 58416.49it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 44431.19it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 43151.28it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 68871.99it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 15528.71it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 51212.50it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 30885.89it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 54971.22it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 18055.55it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 98922.26it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 38764.36it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 64133.09it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 19082.37it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 36631.48it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 56223.91it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 103054.15it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 154202.35it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 63743.22it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 17704.96it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 49056.19it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 69327.34it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 200684.40it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 22133.53it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 47180.02it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 49056.19it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 61052.46it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 51781.53it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 50291.41it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 70849.73it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 85423.71it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 13311.03it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 107546.26it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 16282.24it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 73973.62it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 9293.83it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 120873.31it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 66260.73it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 41282.52it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 31161.25it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 87563.76it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 81442.80it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Processing sources: 100%|██████████| 10/10 [00:00<00:00, 81284.96it/s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
with open('mistral_results.pkl', 'rb') as f:
    mistral_results = pickle.load(f)

In [47]:
mistral_results[:2]

[('MOSTLY TRUE',
  0.9,
  'The claim is mostly true because the usual procedure for deploying the National Guard involves preparation stages before a disaster, but these are not for immediate response to an ongoing disaster. The National Guard is indeed deployed in disaster situations, but this usually happens after something has occurred. However, there are instances where the National Guard has been deployed before a disaster.',
  [Claim(text='The National Guard is typically called in after a disaster, not before something happens.', components=[ClaimComponent(question_text='What is the usual procedure for deploying the National Guard in disaster situations?', search_queries=['National Guard deployment procedure', 'Typical National Guard deployment in disaster'], component_type=None, answer=Answer(text='The usual procedure for deploying the National Guard in disaster situations involves a series of stages: predeployment, deployment, reunion/reintegration, activation, mobilization, an

In [48]:
mistral_results[20:22]

[('MIXED',
  1.0,
  "The first claim is supported by an article published in The Atlantic in 2020, which states that former President Donald Trump used the phrases 'suckers' and 'losers' to refer to American soldiers. The article also provides quotes from the president that support this claim.\n\nThe second claim is contradicted by evidence that shows Donald Trump visited an American cemetery outside of Paris during his presidency, as stated in the question-answer pairs provided.",
  [Claim(text="Donald Trump referred to American soldiers who gave their lives as 'suckers' and 'losers.'", components=[ClaimComponent(question_text="What is the source of the claim that Donald Trump referred to American soldiers as 'suckers' and 'losers'?", search_queries=['Donald Trump soldiers suckers losers quote'], component_type=None, answer=Answer(text="The source of the claim that Donald Trump referred to American soldiers as 'suckers' and 'losers' is an article published in The Atlantic in 2020.", c

In [49]:
mistral_results[40:42]

[('FALSE',
  1.0,
  'The claim that 326,000 migrants were flown to Florida using taxpayer dollars under President Joe Biden’s secret migrant flight program is not supported by the provided evidence.',
  [Claim(text='326,000 migrants were flown to Florida using taxpayer dollars under President Joe Biden’s secret migrant flight program.', components=[ClaimComponent(question_text="How many migrants have been flown to Florida under President Joe Biden's administration?", search_queries=['Number of migrants flown to Florida under Biden', 'Flight data for migrants in Florida during Biden presidency'], component_type=None, answer=Answer(text="The number of migrants flown to Florida under President Joe Biden's administration is not explicitly stated in the provided documents, but it can be inferred from the information that 'The bulk of migrants who qualify for the Biden administration's mass parole program are flying into Florida' (source 1).", citations=[Citation(snippet="The bulk of migrant

In [58]:
mistral_results_df = pd.DataFrame(data=mistral_results[:5], columns=['verdict', 'confidence', 'reasoning', 'claims'])

In [59]:
mistral_results_df['GOLD'] = gold[:5]

In [64]:
print(mistral_results_df['reasoning'][0])

The claim is mostly true because the usual procedure for deploying the National Guard involves preparation stages before a disaster, but these are not for immediate response to an ongoing disaster. The National Guard is indeed deployed in disaster situations, but this usually happens after something has occurred. However, there are instances where the National Guard has been deployed before a disaster.


In [6]:
main.VERBOSE = True

In [62]:
s = 'In New York, there are no barriers to law enforcement to work with the federal government on immigration laws, and there are 100 crimes where migrants can be handed over.'

In [9]:
s = 'Support for Roe is higher today in America than it has ever been.'
lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'))

In [18]:
%reload_ext autoreload
%autoreload 2
import dotenv
import sys
import dspy
import os
sys.path.append('../pipeline_v2/')
import main 
dotenv.load_dotenv('../.env')

# Initialize search provider
main.NUM_SEARCH_RESULTS = 10 # Number of search results to retrieve
main.SCRAPE_TIMEOUT = 5 # Timeout for scraping a webpage (in seconds)
search_provider = main.SearchProvider(provider="duckduckgo")

# Initialize DSPy
lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'))
# lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='')
dspy.settings.configure(lm=lm)

# Initialize pipeline
embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
main.VERBOSE = False # Print intermediate results
main.INTERACTIVE = False # Allow the user to provide feedback
main.USE_BM25 = True # Use BM25 for retrieval (in addition to cosine similarity)
main.BM25_WEIGHT = 0.5 # Weight for BM25 in the hybrid retrieval

pipeline = main.FactCheckPipeline(
    search_provider=search_provider,
    model_name=lm,
    embedding_model=embedding_model,
    retriever_k=2
)

# Example statement to fact-check
# statement = """And then there's the reality of the Trump economy, 
# where wages adjusted for inflation were rising. The wage gap between 
# rich and poor was shrinking. The savings rate for black Americans was 
# the highest in the history of our country."""

# Print final result
print("\nFinal Fact-Check Result:")


Final Fact-Check Result:


In [21]:
verdict, confidence, reasoning, claims = pipeline.fact_check(s)

Exception: Failed to extract claim: The statement "Support for Roe is higher today in America than it has ever been" is a claim about the level of public support for Roe v. Wade throughout US history.  To extract a verifiable claim, I need to specify a time period for comparison.  Since the statement implies a comparison across all of US history, it's impossible to verify without access to comprehensive historical polling data across the entire history of the United States.  Therefore, I cannot create a claim that meets the criteria of being specific, testable, and containing sufficient context for verification.  The claim is too broad.

In [None]:
### REGENERATE RESULTS



[('UNVERIFIABLE',
  0.3,
  'The claim cannot be definitively verified or refuted based on the provided information.',
  [Claim(text='Taxpayer dollars will be used to pay for illegal immigrants to fly into a specific town.', components=[ClaimComponent(question_text='Is there any evidence of taxpayer dollars being used for flights?', search_queries=['taxpayer funds flight', 'government funding air travel'], component_type=None, answer=Answer(text='There is evidence that taxpayer dollars have been used for flights, as Secretary Buttigieg has come under investigation for using nearly $41,905.20 in taxpayer funds on private jet flights since February 2021 (source: <https://www.atr.org/buttigieg-under-investigation-taxpayer-funded-private-jet-use/>).', citations=[Citation(snippet='Secretary Buttigieg came under fire in December when it was revealed that he had squandered taxpayer funds on nearly 20 flights aboard private jets since assuming office in February 2021. After months of silence, D

In [54]:
for i in range(5):
    try:
        verdict, confidence, reasoning, claims = pipeline.fact_check("water is not wet")
    except Exception as e:
        print(f"Error: {e}, attempt {i+1}")
        continue 
    break   

[36m===== Starting Fact Check Pipeline =====[0m
[36m Original Statement: [97mwater is not wet[0m [0m
[36m  ===== Atomic Claim Extraction =====[0m
[36m   Extracted Claims (0):  [0m
Error: list index out of range, attempt 1
[36m===== Starting Fact Check Pipeline =====[0m
[36m Original Statement: [97mwater is not wet[0m [0m
[36m  ===== Atomic Claim Extraction =====[0m
[36m   Extracted Claims (0):  [0m
Error: list index out of range, attempt 2
[36m===== Starting Fact Check Pipeline =====[0m
[36m Original Statement: [97mwater is not wet[0m [0m
[36m  ===== Atomic Claim Extraction =====[0m
[36m   Extracted Claims (0):  [0m
Error: list index out of range, attempt 3
[36m===== Starting Fact Check Pipeline =====[0m
[36m Original Statement: [97mwater is not wet[0m [0m
[36m  ===== Atomic Claim Extraction =====[0m
[36m   Extracted Claims (0):  [0m
Error: list index out of range, attempt 4
[36m===== Starting Fact Check Pipeline =====[0m
[36m Original Stateme