In [101]:
from dspy import Retrieve, Prediction
import dspy
import dotenv
import os
import sys
import pandas as pd

In [102]:
dotenv.load_dotenv('../.env')
# lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'), cache=False)
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='', cache=False)
dspy.configure(lm=lm)

#### Extract verdicts

In [103]:
# import eval data
df_gemini = pd.read_pickle('../benchmark/results_v2_gemini.pkl')
df_mistral = pd.read_pickle('../benchmark/results_v2_mistral.pkl')

In [104]:
# Define the ordinal mapping
VERDICT_MAP = {
    "TRUE": 5,
    "MOSTLY TRUE": 4,
    "HALF TRUE": 3,
    "MOSTLY FALSE": 2,
    "FALSE": 1,
    "UNVERIFIABLE": 0,
    # Weird cases
    "PANTS ON FIRE": 1, # Pants on fire is the same as false
    "MOSTLY UNVERIFIABLE": 0,
    "INDIFFERENT": 0,
    'MOSTLY HALF TRUE': 4,
    'PARTIALLY TRUE': 3,
}

def map_df(model, df):
    '''map the dataframe to extract the verdicts and calculate the errors'''

    # Extract the verdicts from the results columns
    df['pred_verdicts_baseline'] = df[f'{model}_baseline_results'].apply(lambda x: [result['verdict'] for result in x] if x else None)
    df['pred_verdicts_pipeline'] = df[f'{model}_pipeline_results'].apply(lambda x: [result['verdict'] for result in x] if x else None)

    # Clean up verdicts with extraneous text (not the cleanest/fastest method but it works)
    # "UNVERIFIABLE (as of the time of writing, the statement cannot be definitively verified or refuted)": 0,
    # 'MOSTLY TRUE - Kelly Ayotte accurately mentioned a relevant bill regarding sanctuary states, but it is unclear if Joyce Craig opposed the bill since she was no longer in office when it was introduced.': 4,
    for i, row in df.iterrows():
        cols = ['pred_verdicts_baseline', 'pred_verdicts_pipeline']
        for col in cols: 
            verdicts = row[col]
            if verdicts:
                verdicts = [v.split(':')[0].split('-')[0].split('(')[0].split('.')[0].strip() if len(v) > 12 else v for v in verdicts]
            df.at[i, col] = verdicts

    df.dropna(subset=['pred_verdicts_pipeline', 'pred_verdicts_baseline'], inplace=True)

    df[['verdict', 'pred_verdicts_baseline', 'pred_verdicts_pipeline']]

    for i in range(len(df)):
        true_val = VERDICT_MAP[df.iloc[i]['verdict']]
        
        # Get pass@1 predictions
        baseline_pred = df['pred_verdicts_baseline'][i][0]
        pipeline_pred = df['pred_verdicts_pipeline'][i][0]
        # Set pass@1 predictions to its own column
        df.at[i, 'baseline_pass1_verdict'] = baseline_pred
        df.at[i, 'pipeline_pass1_verdict'] = pipeline_pred

        # Calculate pass@1 errors
        df.at[i, 'baseline_pass1_dist'] = abs(true_val - VERDICT_MAP[baseline_pred]) if baseline_pred != "UNVERIFIABLE" else None
        df.at[i, 'pipeline_pass1_dist'] = abs(true_val - VERDICT_MAP[pipeline_pred]) if pipeline_pred != "UNVERIFIABLE" else None
        df.at[i, 'baseline_pass1_MSE'] = (true_val - VERDICT_MAP[baseline_pred])**2 if baseline_pred != "UNVERIFIABLE" else None
        df.at[i, 'pipeline_pass1_MSE'] = (true_val - VERDICT_MAP[pipeline_pred])**2 if pipeline_pred != "UNVERIFIABLE" else None
        
        # Get pass@3 predictions
        sorted_baseline_results = sorted(df.at[i, 'pred_verdicts_baseline'], key=lambda x: (VERDICT_MAP[x] - VERDICT_MAP[df.at[i, 'verdict']])**2 if x != 'UNVERIFIABLE' else 100)
        sorted_pipeline_results = sorted(df.at[i, 'pred_verdicts_pipeline'], key=lambda x: (VERDICT_MAP[x] - VERDICT_MAP[df.at[i, 'verdict']])**2 if x != 'UNVERIFIABLE' else 100)
        best_baseline_pred = sorted_baseline_results[0]
        best_pipeline_pred = sorted_pipeline_results[0]

        # Set pass@3 predictions
        df.at[i, 'baseline_pass3_verdict'] = best_baseline_pred
        df.at[i, 'pipeline_pass3_verdict'] = best_pipeline_pred
        
        # Set pass@3 errors
        df.at[i, 'baseline_pass3_dist'] = abs(true_val - VERDICT_MAP[best_baseline_pred]) if baseline_pred != "UNVERIFIABLE" else None
        df.at[i, 'pipeline_pass3_dist'] = abs(true_val - VERDICT_MAP[best_pipeline_pred]) if pipeline_pred != "UNVERIFIABLE" else None
        df.at[i, 'baseline_pass3_MSE'] = (true_val - VERDICT_MAP[best_baseline_pred])**2 if best_baseline_pred != "UNVERIFIABLE" else None
        df.at[i, 'pipeline_pass3_MSE'] = (true_val - VERDICT_MAP[best_pipeline_pred])**2 if best_pipeline_pred != "UNVERIFIABLE" else None

    # Now you can sort by errors, for example:
    print("\nTop 5 statements with highest pipeline pass@1 errors:")
    display(df.nlargest(5, 'pipeline_pass1_MSE')[['statement', 'verdict', 'pred_verdicts_pipeline', 'pipeline_pass1_MSE']])

    print("\nTop 5 statements with highest pipeline pass@3 errors:")
    display(df.nlargest(5, 'pipeline_pass3_MSE')[['statement', 'verdict', 'pred_verdicts_pipeline', 'pipeline_pass3_MSE']])

    return df

In [105]:
gemini_df = map_df('gemini', df_gemini)
mistral_df = map_df('mistral', df_mistral)


Top 5 statements with highest pipeline pass@1 errors:


Unnamed: 0,statement,verdict,pred_verdicts_pipeline,pipeline_pass1_MSE
147,“Nearly 90% of all UW graduates stay in Wiscon...,TRUE,"[FALSE, UNVERIFIABLE, FALSE]",16.0
38,"Tim Walz said he carried weapons in war, but “...",TRUE,"[MOSTLY FALSE, MOSTLY TRUE, MOSTLY FALSE]",9.0
119,"“Remember in 2020, 55 of the biggest companies...",FALSE,"[MOSTLY TRUE, MOSTLY FALSE, MOSTLY TRUE]",9.0
13,Says opponent Eric Hovde “supports a $4 trilli...,MOSTLY TRUE,"[MOSTLY FALSE, MOSTLY TRUE, MOSTLY TRUE]",4.0
18,“There was a bill to basically create a ban to...,MOSTLY TRUE,"[MOSTLY FALSE, MOSTLY TRUE, UNVERIFIABLE]",4.0



Top 5 statements with highest pipeline pass@3 errors:


Unnamed: 0,statement,verdict,pred_verdicts_pipeline,pipeline_pass3_MSE
147,“Nearly 90% of all UW graduates stay in Wiscon...,TRUE,"[FALSE, UNVERIFIABLE, FALSE]",16.0
32,“Dave McCormick is fully against abortion.”,MOSTLY FALSE,"[MOSTLY TRUE, MOSTLY TRUE, UNVERIFIABLE]",4.0
35,"“400,000 workers are now in a union that were ...",MOSTLY TRUE,"[MOSTLY FALSE, MOSTLY FALSE, UNVERIFIABLE]",4.0
41,"“Even before the pandemic, America went into a...",MOSTLY TRUE,"[UNVERIFIABLE, UNVERIFIABLE, MOSTLY FALSE]",4.0
115,"""[The Trump Administration] added more to the ...",HALF TRUE,"[UNVERIFIABLE, FALSE, UNVERIFIABLE]",4.0



Top 5 statements with highest pipeline pass@1 errors:


Unnamed: 0,statement,verdict,pred_verdicts_pipeline,pipeline_pass1_MSE
108,"""Pharmaceutical medicine has its place, but no...",False,"[MOSTLY TRUE, MOSTLY TRUE, MOSTLY FALSE]",9.0
123,"""We’ve had 12 elections in 24 years in Wiscons...",True,"[MOSTLY FALSE, MOSTLY FALSE, MOSTLY FALSE]",9.0
132,"""In February 2024, Nikki Haley lost the Nevada...",True,"[MOSTLY FALSE, MOSTLY FALSE, FALSE]",9.0
138,"""Former U.S. President Donald Trump's margin o...",False,"[MOSTLY TRUE, MOSTLY TRUE, UNVERIFIABLE]",9.0
4,“The Universities of Wisconsin are 43rd out of...,True,"[HALF TRUE, MOSTLY TRUE, MOSTLY TRUE]",4.0



Top 5 statements with highest pipeline pass@3 errors:


Unnamed: 0,statement,verdict,pred_verdicts_pipeline,pipeline_pass3_MSE
123,"""We’ve had 12 elections in 24 years in Wiscons...",TRUE,"[MOSTLY FALSE, MOSTLY FALSE, MOSTLY FALSE]",9.0
132,"""In February 2024, Nikki Haley lost the Nevada...",TRUE,"[MOSTLY FALSE, MOSTLY FALSE, FALSE]",9.0
138,"""Former U.S. President Donald Trump's margin o...",FALSE,"[MOSTLY TRUE, MOSTLY TRUE, UNVERIFIABLE]",9.0
29,"“Less than three months ago, Kamala Harris and...",MOSTLY FALSE,"[MOSTLY TRUE, UNVERIFIABLE, MOSTLY TRUE]",4.0
31,"""Typically you have three to four debates in a...",MOSTLY TRUE,"[MOSTLY FALSE, MOSTLY FALSE, MOSTLY FALSE]",4.0


In [288]:
from typing import List, Literal
import dspy
from pydantic import ValidationError
from utils import retry_function

# Define the fallacy types once to avoid repetition
FallacyType = Literal[
    'ad hominem', 'appeal to emotion', 'hasty generalization', 
    'irrelevant authority', 'red herring', 'black and white fallacy',
    'causal oversimplification', 'doubt', 'exaggeration or minimization',
    'appeal to fear/prejudice', 'flag-waving', 'loaded language',
    'name calling or labeling', 'reductio ad hitlerum', 'slogans',
    'strawman', 'thought-terminating cliches', 'whataboutism',
    'ad populum', 'circular reasoning', 'deductive fallacy',
    'equivocation', 'fallacy of extension', 'intentional fallacy',
    'evading burden of proof', 'cherrypicking', 
    'post hoc (causal oversimplification)', 'vagueness', 'none'
]

class FallacyDetectionWithReasoning(dspy.Signature):
    """Classify logical fallacies given the statement"""
    
    statement: str = dspy.InputField(desc="Statement to analyze")
    category: List[FallacyType] = dspy.OutputField(
        desc="Choose from the specified list of fallacies. Try to choose the most specific one."
    )
    confidence: float = dspy.OutputField(desc="0-1 confidence score")
    rationale: str = dspy.OutputField(desc="Step-by-step reasoning")

class FallacyDetector(dspy.Module):
    def __init__(self):
        self.classify = dspy.ChainOfThought(FallacyDetectionWithReasoning)
        self.retry = 3  # max attempts
    
    def forward(self, statement):
        for attempt in range(self.retry):
            try:
                result = self.classify(statement=statement)
                return result
            except ValidationError as e:
                if attempt < self.retry - 1:
                    print(f"Validation error: {e}. Retrying...")
                    continue
        # If all attempts failed, make one final attempt using just 'none' as the category
        try:
            return self.classify(
                statement=statement,
                category='none',
                confidence=0.5,
                rationale="Failed to classify fallacies after multiple attempts."
            )
        except Exception:
            raise ValueError("Unable to process fallacy detection.")
        

In [None]:
fallacy_detector = FallacyDetector()

In [297]:
for i, row in mistral_df.iterrows():
    if mistral_df.at[i, 'fallacy_class'] == None:
        mistral_df.at[i, 'fallacy_class'] = retry_function(fallacy_detector, statement=row['statement'])

In [298]:
mistral_df['fallacy_class'].value_counts()

fallacy_class
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
                                               ..
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
[reasoning, category, confidence, rationale]    1
Name: count, Length: 150, dtype: int64

In [322]:
# mistral_df['fallacy'] = mistral_df['fallacy_class'].apply(lambda x: x.category)
# mistral_df['confidence'] = mistral_df['fallacy_class'].apply(lambda x: x.confidence)
# mistral_df['rationale'] = mistral_df['fallacy_class'].apply(lambda x: x.rationale)
# mistral_df['reasoning'] = mistral_df['fallacy_class'].apply(lambda x: x.reasoning)
# mistral_df.to_pickle('../benchmark/mistral_fallacy.pkl')
mistral_df_1 = mistral_df[['statement', 'verdict', 'pipeline_pass3_verdict', 'pipeline_pass3_dist', 'fallacy_cleaned', 'confidence', 'rationale', 'reasoning']]
mistral_df_1.to_excel('../benchmark/mistral_fallacy.xlsx')

In [321]:
mistral_df['fallacy_cleaned'] = mistral_df['fallacy'].apply(lambda x: x[0] if type(x) == list else x)

In [307]:
mistral_df_1[mistral_df_1['verdict'] == 'TRUE']['fallacy'].value_counts()

fallacy
exaggeration or minimization    14
none                             4
irrelevant authority             3
red herring                      2
causal oversimplification        1
hasty generalization             1
appeal to emotion                1
name calling or labeling         1
Name: count, dtype: int64

In [308]:
mistral_df_1[mistral_df_1['verdict'] == 'MOSTLY TRUE']['fallacy'].value_counts()

fallacy
exaggeration or minimization            10
name calling or labeling                 5
causal oversimplification                3
hasty generalization                     3
irrelevant authority                     1
cherrypicking                            1
ad populum                               1
appeal to fear/prejudice                 1
whataboutism                             1
post hoc (causal oversimplification)     1
Name: count, dtype: int64

In [309]:
mistral_df_1[mistral_df_1['verdict'] == 'HALF TRUE']['fallacy'].value_counts()

fallacy
exaggeration or minimization                                                 5
name calling or labeling                                                     4
post hoc (causal oversimplification)                                         4
ad hominem                                                                   2
red herring                                                                  2
causal oversimplification                                                    2
cherrypicking                                                                2
[exaggeration or minimization]                                               2
vagueness                                                                    1
[name calling or labeling, loaded language, exaggeration or minimization]    1
black and white fallacy                                                      1
whataboutism                                                                 1
Name: count, dtype: int64

In [310]:
mistral_df_1[mistral_df_1['verdict'] == 'MOSTLY FALSE']['fallacy'].value_counts()

fallacy
exaggeration or minimization            7
hasty generalization                    6
name calling or labeling                4
ad hominem                              3
post hoc (causal oversimplification)    2
irrelevant authority                    2
appeal to fear/prejudice                1
none                                    1
appeal to emotion                       1
Name: count, dtype: int64

In [311]:
mistral_df_1[mistral_df_1['verdict'] == 'FALSE']['fallacy'].value_counts()

fallacy
exaggeration or minimization                                                                                              17
name calling or labeling                                                                                                   8
post hoc (causal oversimplification)                                                                                       3
ad hominem                                                                                                                 2
irrelevant authority                                                                                                       2
appeal to emotion                                                                                                          2
causal oversimplification                                                                                                  2
appeal to fear/prejudice                                                                                             

In [205]:
gemini_df['fallacy'] = gemini_df['fallacy_class'].apply(lambda x: x.category)
gemini_df['confidence'] = gemini_df['fallacy_class'].apply(lambda x: x.confidence)
gemini_df['rationale'] = gemini_df['fallacy_class'].apply(lambda x: x.rationale)
gemini_df['reasoning'] = gemini_df['fallacy_class'].apply(lambda x: x.reasoning)
gemini_df.drop(columns=['fallacy_class'], inplace=True)

In [377]:
mapping = {"MOSTLY TRUE": "TRUE", 
            "HALF TRUE": "FALSE",
            "MOSTLY FALSE": "FALSE",
            "TRUE": "TRUE",
            "FALSE": "FALSE"
            }
gemini_df['binary'] = gemini_df['verdict'].map(mapping)
gemini_df['binary_preds_pass3_pipe'] = gemini_df['pipeline_pass3_verdict'].map(mapping)
gemini_df['binary_preds_pass3_base'] = gemini_df['baseline_pass3_verdict'].map(mapping)
gemini_df['binary_preds_pass1_pipe'] = gemini_df['pipeline_pass1_verdict'].map(mapping)
gemini_df['binary_preds_pass1_base'] = gemini_df['baseline_pass1_verdict'].map(mapping)

In [381]:
gemini_df_binary_preds_pass3_pipe = gemini_df[~gemini_df['binary_preds_pass3_pipe'].isna()]
gemini_df_binary_preds_pass3_base = gemini_df[~gemini_df['binary_preds_pass3_base'].isna()]
gemini_df_binary_preds_pass1_pipe = gemini_df[~gemini_df['binary_preds_pass1_pipe'].isna()]
gemini_df_binary_preds_pass1_base = gemini_df[~gemini_df['binary_preds_pass1_base'].isna()]

In [386]:
print('\npass@1 base')
print((gemini_df_binary_preds_pass1_base['binary']==gemini_df_binary_preds_pass1_base['binary_preds_pass1_base']).value_counts())
print('\npass@1 pipe')
print((gemini_df_binary_preds_pass1_pipe['binary']==gemini_df_binary_preds_pass1_pipe['binary_preds_pass1_pipe']).value_counts())
print('\npass@3 base')
print((gemini_df_binary_preds_pass3_base['binary']==gemini_df_binary_preds_pass3_base['binary_preds_pass3_base']).value_counts())
print('\npass@3 pipe')
print((gemini_df_binary_preds_pass3_pipe['binary']==gemini_df_binary_preds_pass3_pipe['binary_preds_pass3_pipe']).value_counts())


pass@1 base
True     58
False    30
Name: count, dtype: int64

pass@1 pipe
True     75
False    26
Name: count, dtype: int64

pass@3 base
True     62
False    33
Name: count, dtype: int64

pass@3 pipe
True     99
False    22
Name: count, dtype: int64


In [206]:
gemini_df.to_pickle('../benchmark/gemini_fallacy.pkl')

In [385]:
mapping = {"MOSTLY TRUE": "TRUE", 
            "HALF TRUE": "FALSE",
            "MOSTLY FALSE": "FALSE",
            "TRUE": "TRUE",
            "FALSE": "FALSE"
            }
mistral_df['binary'] = mistral_df['verdict'].map(mapping)
mistral_df['binary_preds_pass3_pipe'] = mistral_df['pipeline_pass3_verdict'].map(mapping)
mistral_df['binary_preds_pass3_base'] = mistral_df['baseline_pass3_verdict'].map(mapping)
mistral_df['binary_preds_pass1_pipe'] = mistral_df['pipeline_pass1_verdict'].map(mapping)
mistral_df['binary_preds_pass1_base'] = mistral_df['baseline_pass1_verdict'].map(mapping)

mistral_df_binary_preds_pass3_pipe = mistral_df[~mistral_df['binary_preds_pass3_pipe'].isna()]
mistral_df_binary_preds_pass3_base = mistral_df[~mistral_df['binary_preds_pass3_base'].isna()]
mistral_df_binary_preds_pass1_pipe = mistral_df[~mistral_df['binary_preds_pass1_pipe'].isna()]
mistral_df_binary_preds_pass1_base = mistral_df[~mistral_df['binary_preds_pass1_base'].isna()]


print('\npass@1 base')
print((mistral_df_binary_preds_pass1_base['binary']==mistral_df_binary_preds_pass1_base['binary_preds_pass1_base']).value_counts())
print('\npass@1 pipe')
print((mistral_df_binary_preds_pass1_pipe['binary']==mistral_df_binary_preds_pass1_pipe['binary_preds_pass1_pipe']).value_counts())
print('\npass@3 base')
print((mistral_df_binary_preds_pass3_base['binary']==mistral_df_binary_preds_pass3_base['binary_preds_pass3_base']).value_counts())
print('\npass@3 pipe')
print((mistral_df_binary_preds_pass3_pipe['binary']==mistral_df_binary_preds_pass3_pipe['binary_preds_pass3_pipe']).value_counts())


pass@1 base
True     40
False    24
Name: count, dtype: int64

pass@1 pipe
True     92
False    40
Name: count, dtype: int64

pass@3 base
True     49
False    32
Name: count, dtype: int64

pass@3 pipe
True     112
False     33
Name: count, dtype: int64


In [387]:
prediction = fallacy_detector(statement = "Tens of thousands died with, or of, measles annually in 19th Century America. By 1960 -- before the vaccine's introduction -- improvements in sanitation and nutrition had eliminated 98% of measles deaths. Good nutrition remains a best defense against most chronic and infectious illnesses. Vitamins A, C, and D, and foods rich in vitamins B12, C, and E should be part of a balanced diet.")

In [1]:
from typing import List, Literal
import dspy
from pydantic import ValidationError
from utils import retry_function

class OpenEndedFallacyDetectionWithReasoning(dspy.Signature):
    """Classify logical fallacies given the statement"""
    
    statement: str = dspy.InputField(desc="Statement to analyze")
    fallacy: str = dspy.OutputField(
        desc="A fallacy or a fallacious argument is one that seems valid but is not. Indentify the fallacy in the statement. If no fallacy is present, return 'none'."
    )
    confidence: float = dspy.OutputField(desc="0-1 confidence score")
    rationale: str = dspy.OutputField(desc="Step-by-step reasoning")

class FallacyDetector(dspy.Module):
    def __init__(self):
        self.classify = dspy.ChainOfThought(OpenEndedFallacyDetectionWithReasoning)
        self.retry = 3  # max attempts
    
    def forward(self, statement):
        for attempt in range(self.retry):
            try:
                result = self.classify(statement=statement)
                return result
            except ValidationError as e:
                if attempt < self.retry - 1:
                    print(f"Validation error: {e}. Retrying...")
                    continue
        # If all attempts failed, make one final attempt using just 'none' as the category
        try:
            return self.classify(
                statement=statement,
                category='none',
                confidence=0.5,
                rationale="Failed to classify fallacies after multiple attempts."
            )
        except Exception:
            raise ValueError("Unable to process fallacy detection.")
        

* 'fields' has been removed


In [6]:
dotenv.load_dotenv('../.env')
# lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'), cache=False)
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='', cache=False)
dspy.configure(lm=lm)

In [15]:
lm = dspy.LM('ollama_chat/mistral', api_base='http://localhost:11434', api_key='', cache=False)
dspy.configure(lm=lm)
mistral_df['fallacy_detect'] = None
fallacy_detector = FallacyDetector()
for i, row in mistral_df.iterrows():
    if mistral_df.at[i, 'fallacy_detect'] == None:
        mistral_df.at[i, 'fallacy_detect'] = retry_function(fallacy_detector, statement=row['statement'])
mistral_df['fallacy'] = mistral_df['fallacy_detect'].apply(lambda x: x.fallacy)
mistral_df['confidence'] = mistral_df['fallacy_detect'].apply(lambda x: x.confidence)
mistral_df['rationale'] = mistral_df['fallacy_detect'].apply(lambda x: x.rationale)

In [None]:
gemini_df['fallacy'].to_list()

In [21]:
mistral_df['fallacy_detect'][0]

Prediction(
    reasoning="The statement is presented as if the governor's actions are solely responsible for the positive balance in the checking account and the record-high balance in the state savings account. However, it does not provide any context or evidence to support this claim. It is possible that other factors such as economic growth, tax increases, or budget cuts have also contributed to these balances.",
    fallacy='Hasty Generalization Fallacy',
    confidence=0.85,
    rationale='The statement makes a broad claim (the governor is solely responsible for the positive balance) based on a limited data point (the years when the governor was in office). A more thorough analysis would consider other factors that could have influenced the financial situation of Wisconsin.'
)

In [16]:
lm = dspy.LM('gemini/gemini-1.5-flash', api_key=os.getenv('GOOGLE_GEMINI_API_KEY'), cache=False)
dspy.configure(lm=lm)
gemini_df['fallacy_detect'] = None
fallacy_detector = FallacyDetector()
for i, row in gemini_df.iterrows():
    if gemini_df.at[i, 'fallacy_detect'] == None:
        gemini_df.at[i, 'fallacy_detect'] = retry_function(fallacy_detector, statement=row['statement'])
gemini_df['fallacy'] = gemini_df['fallacy_detect'].apply(lambda x: x.fallacy)
gemini_df['confidence'] = gemini_df['fallacy_detect'].apply(lambda x: x.confidence)
gemini_df['rationale'] = gemini_df['fallacy_detect'].apply(lambda x: x.rationale)

[31m Attempt 1 failed: [0m
[31m Error: litellm.RateLimitError: litellm.RateLimitError: VertexAIException - {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.QuotaFailure",
        "violations": [
          {
            "quotaMetric": "generativelanguage.googleapis.com/generate_content_free_tier_requests",
            "quotaId": "GenerateRequestsPerMinutePerProjectPerModel-FreeTier",
            "quotaDimensions": {
              "location": "global",
              "model": "gemini-1.5-flash"
            },
            "quotaValue": "15"
          }
        ]
      },
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about

In [19]:
gemini_df['fallacy'].value_counts()
mistral_df['fallacy'].value_counts()

fallacy
Hasty Generalization                                                                                                                                    12
Argument from Ignorance                                                                                                                                  7
Slippery Slope Fallacy                                                                                                                                   7
none                                                                                                                                                     4
Ad Hominem Fallacy                                                                                                                                       4
                                                                                                                                                        ..
Causal Fallacy (also known as Post Hoc Ergo Propter Hoc) - ass

In [None]:
from typing import List, Literal # Literal is not directly used in OutputField for dynamic lists
import dspy
from pydantic import ValidationError

PREDEFINED_FALLACIES = ['ad hominem', 'appeal to emotion', 'hasty generalization', 
    'irrelevant authority', 'red herring', 'black and white fallacy',
    'causal oversimplification', 'doubt', 'exaggeration or minimization',
    'appeal to fear/prejudice', 'flag-waving', 'loaded language',
    'name calling or labeling', 'reductio ad hitlerum', 'slogans',
    'strawman', 'thought-terminating cliches', 'whataboutism',
    'ad populum', 'circular reasoning', 'deductive fallacy',
    'equivocation', 'fallacy of extension', 'intentional fallacy',
    'evading burden of proof', 'cherrypicking', 
    'post hoc (causal oversimplification)', 'vagueness', 'none']

class OpenEndedFallacyDetectionWithReasoning(dspy.Signature):
    """Classify logical fallacies given the statement"""
    statement: str = dspy.InputField(desc="Statement to analyze")
    fallacy: str = dspy.OutputField(
        desc="A fallacy or a fallacious argument is one that seems valid but is not. Identify the fallacy in the statement. If no fallacy is present, return 'none'."
    )
    confidence: float = dspy.OutputField(desc="0-1 confidence score")
    rationale: str = dspy.OutputField(desc="Step-by-step reasoning")

class FallacyDetector(dspy.Module):
    def __init__(self):
        super().__init__() # Added super().__init__()
        self.classify = dspy.ChainOfThought(OpenEndedFallacyDetectionWithReasoning)
        self.retry = 3  # max attempts

    def forward(self, statement):
        for attempt in range(self.retry):
            try:
                result = self.classify(statement=statement)
                return result
            except ValidationError as e:
                if attempt < self.retry - 1:
                    print(f"Validation error: {e}. Retrying attempt {attempt + 1}/{self.retry}...")
                    continue
                else: # Last attempt failed
                    print(f"Validation error on last attempt: {e}. Falling back.")
                    # Fallback strategy for the original detector
                    # This specific fallback in the original question might be problematic
                    # as it tries to call self.classify with extra arguments not in the signature.
                    # A more robust fallback for the original detector would be:
                    return OpenEndedFallacyDetectionWithReasoning(
                        statement=statement,
                        fallacy='none',
                        confidence=0.1, # Low confidence for fallback
                        rationale="Failed to classify fallacy due to repeated validation errors; assuming no fallacy."
                    )
                
class FallacyCategorizationSignature(dspy.Signature):
    """
    Categorize an open-ended fallacy description into a predefined list of fallacy types.
    If the detected fallacy does not clearly fit into any of the predefined categories,
    classify it as 'Other'.
    """
    open_ended_fallacy: str = dspy.InputField(
        desc="The name or description of the fallacy detected by an open-ended system (e.g., 'This is an ad hominem because...', or 'Attacking the person instead of the argument', or 'none')."
    )
    target_categories: List[str] = dspy.InputField(
        desc="A list of predefined fallacy categories to map the detected fallacy into."
    )
    categorized_fallacy: str = dspy.OutputField(
        desc=f"The category from the target_categories list that best matches the detected fallacy. If the open_ended_fallacy is 'none' or doesn't fit any category, return 'Other' or 'None Detected' as appropriate."
    )
    confidence: float = dspy.OutputField(
        desc="0-1 confidence score for this categorization."
    )
    rationale: str = dspy.OutputField(
        desc="Step-by-step reasoning for choosing the category, or for choosing 'Other'/'None Detected'."
    )

class FallacyCategorizer(dspy.Module):
    def __init__(self):
        super().__init__()
        self.categorize_program = dspy.ChainOfThought(FallacyCategorizationSignature)
        self.retry_attempts = 2 # Number of attempts to get a valid Pydantic object

    def forward(self, open_ended_fallacy: str, target_categories: List[str]):
        """
        Categorizes an open-ended fallacy string.

        Args:
            open_ended_fallacy (str): The fallacy string from the FallacyDetector.
            target_categories (List[str]): The list of predefined fallacy types.
                                           The LLM is encouraged to pick from this,
                                           or use 'Other' or 'None Detected'.
        """
        effective_target_categories = target_categories + ["Other", "None Detected"]

        for attempt in range(self.retry_attempts):
            try:
                prediction = self.categorize_program(
                    open_ended_fallacy=open_ended_fallacy,
                    target_categories=target_categories # Pass the original list here for the prompt
                )

                # Check if the LLM's output is one of the expected categories
                # This check is now more for logging/awareness if we decide to keep the original.
                if prediction.categorized_fallacy not in effective_target_categories:
                    if attempt < self.retry_attempts - 1:
                        print(f"Warning: LLM returned '{prediction.categorized_fallacy}' which is not in the target list ({effective_target_categories}). Retrying attempt {attempt + 1}/{self.retry_attempts}...")
                        # Optionally, provide negative feedback if optimizing later
                        # dspy.Suggest(False, f"The categorized_fallacy '{prediction.categorized_fallacy}' is not one of the allowed categories. Please choose from {effective_target_categories} or provide a more standard single fallacy name.")
                        continue # Retry
                    else:
                        # THIS IS THE MODIFIED BEHAVIOR:
                        # On the last attempt, if the category is still not in the predefined list,
                        # we keep the LLM's original output instead of forcing to 'Other'.
                        print(f"Warning: LLM returned category '{prediction.categorized_fallacy}' which is not in target list or 'Other'/'None Detected'. Keeping LLM's original output after retries.")
                        # The prediction.categorized_fallacy is already what the LLM returned.
                        # We can add a note to the rationale.
                        prediction.rationale += (
                            f" (Note: This category '{prediction.categorized_fallacy}' is the LLM's direct output "
                            f"and was not found in the predefined list: {target_categories} + ['Other', 'None Detected'].)"
                        )
                        # We keep the LLM's original confidence.
                        # If you wanted to penalize confidence for non-standard outputs:
                        # prediction.confidence = max(0.1, prediction.confidence * 0.7) # Example
                return prediction # Return the prediction (either matched or kept original after retries)

            except ValidationError as e:
                if attempt < self.retry_attempts - 1:
                    print(f"Categorizer Pydantic Validation error: {e}. Retrying attempt {attempt + 1}/{self.retry_attempts}...")
                    continue
                else:
                    print(f"Categorizer Pydantic Validation error on last attempt: {e}. Falling back to raw input or 'Error'.")
                    # Fallback for Pydantic validation failure after all retries
                    return FallacyCategorizationSignature(
                        open_ended_fallacy=open_ended_fallacy,
                        target_categories=target_categories,
                        categorized_fallacy=f"Error: Could not parse LLM output (original: {open_ended_fallacy[:50]}...)", # Or just open_ended_fallacy
                        confidence=0.0,
                        rationale=f"Failed to categorize due to repeated Pydantic validation errors: {str(e)}. The LLM might have produced an unparseable output."
                    )
            except Exception as e: # Catch other potential errors from dspy/LLM
                if attempt < self.retry_attempts - 1:
                    print(f"Categorizer general error: {e}. Retrying attempt {attempt + 1}/{self.retry_attempts}...")
                    continue
                else:
                    print(f"Categorizer general error on last attempt: {e}. Falling back.")
                    return FallacyCategorizationSignature(
                        open_ended_fallacy=open_ended_fallacy,
                        target_categories=target_categories,
                        categorized_fallacy=f"Error: Categorization failed (original: {open_ended_fallacy[:50]}...)", # Or just open_ended_fallacy
                        confidence=0.0,
                        rationale=f"Failed to categorize due to an unexpected error: {str(e)}"
                    )
        
        # This should ideally not be reached if fallbacks are comprehensive
        # For safety, a final fallback if loop finishes without returning
        print("Critical: FallacyCategorizer loop completed without returning a prediction or hitting fallback. This should not happen.")
        return FallacyCategorizationSignature(
            open_ended_fallacy=open_ended_fallacy,
            target_categories=target_categories,
            categorized_fallacy=f"Error: Unexpected state (original: {open_ended_fallacy[:50]}...)",
            confidence=0.0,
            rationale="Unexpected internal error in FallacyCategorizer."
        )

In [None]:
from utils import retry_function
mistral_df['fallacy_detect_obj'] = None # Store the whole object
fallacy_detector = FallacyDetector()

for i, row in mistral_df.iterrows():
    # Check if already processed, useful for re-runs
    if pd.isna(mistral_df.at[i, 'fallacy_detect_obj']): # Check against the object column
        print(f"Detecting fallacy for statement: \"{row['statement'][:50]}...\"")
        # Using your retry_function pattern
        detected_output = retry_function(fallacy_detector, statement=row['statement'])
        mistral_df.at[i, 'fallacy_detect_obj'] = detected_output

# Extract fields from the detected object
mistral_df['open_ended_fallacy'] = mistral_df['fallacy_detect_obj'].apply(lambda x: x.fallacy if x else "Error/None")
mistral_df['detection_confidence'] = mistral_df['fallacy_detect_obj'].apply(lambda x: x.confidence if x else 0.0)
mistral_df['detection_rationale'] = mistral_df['fallacy_detect_obj'].apply(lambda x: x.rationale if x else "Error/None")

print("\n--- DataFrame after initial detection: ---")
print(mistral_df[['statement', 'open_ended_fallacy', 'detection_confidence']].head())

Detecting fallacy for statement: "“After 30 years of Wisconsin’s checking account ru..."
Detecting fallacy for statement: "Government shutdowns in 2013 and 2018 “cost our ec..."
Detecting fallacy for statement: "About 1% of federal employees are “actually workin..."
Detecting fallacy for statement: "North Carolina Republicans “took money out of west..."
Detecting fallacy for statement: "“The Universities of Wisconsin are 43rd out of 50 ..."
Detecting fallacy for statement: "We’ve created 732,000 jobs since I've been governo..."
Detecting fallacy for statement: "“ICE officials have been ordered NOT to wear their..."
Detecting fallacy for statement: "“The lady who leaked passwords for voting systems ..."
Detecting fallacy for statement: ""As of today, we have cut the flow of immigration ..."
Detecting fallacy for statement: "“I’ve not gotten a single call from the White Hous..."
Detecting fallacy for statement: "Says opponent Eric Hovde “opposes efforts to negot..."
Detecting fallacy for

KeyboardInterrupt: 

In [17]:

# --- 2. Categorize the Detected Fallacies ---
print("\n--- Running Fallacy Categorization ---")
fallacy_categorizer = FallacyCategorizer()

# Initialize new columns for categorized results
mistral_df['categorized_fallacy_obj'] = None # To store the full categorizer output object
mistral_df['categorized_fallacy'] = None
mistral_df['category_confidence'] = None
mistral_df['category_rationale'] = None

for i, row in mistral_df.iterrows():
    if pd.isna(mistral_df.at[i, 'categorized_fallacy_obj']): # Check if already processed
        open_fallacy_description = row['open_ended_fallacy']

        # Handle cases where the initial detection might have failed or returned None/NaN
        if pd.isna(open_fallacy_description) or open_fallacy_description in ["Error/None", "Error in Detection"]:
            print(f"Skipping categorization for row {i} due to previous detection error or no fallacy detected ('{open_fallacy_description}').")
            mistral_df.at[i, 'categorized_fallacy'] = "Not Processed" if open_fallacy_description in ["Error/None", "Error in Detection"] else "None Detected"
            mistral_df.at[i, 'category_confidence'] = 0.0
            mistral_df.at[i, 'category_rationale'] = "Skipped due to upstream detection issue or no fallacy."
            # Create a dummy object for categorized_fallacy_obj to avoid lambda errors later if needed
            mistral_df.at[i, 'categorized_fallacy_obj'] = FallacyCategorizationSignature(
                open_ended_fallacy=str(open_fallacy_description),
                target_categories=PREDEFINED_FALLACIES,
                categorized_fallacy=mistral_df.at[i, 'categorized_fallacy'],
                confidence=mistral_df.at[i, 'category_confidence'],
                rationale=mistral_df.at[i, 'category_rationale']
            )
            continue

        print(f"Categorizing open-ended fallacy: \"{open_fallacy_description[:50]}...\"")
        
        # You can use your retry_function here as well if desired
        categorized_output = retry_function(
            fallacy_categorizer,
            open_ended_fallacy=str(open_fallacy_description), # Ensure it's a string
            target_categories=PREDEFINED_FALLACIES
        )
        mistral_df.at[i, 'categorized_fallacy_obj'] = categorized_output

# Extract fields from the categorized object
mistral_df['categorized_fallacy'] = mistral_df['categorized_fallacy_obj'].apply(lambda x: x.categorized_fallacy if x else "Error/None")
mistral_df['category_confidence'] = mistral_df['categorized_fallacy_obj'].apply(lambda x: x.confidence if x else 0.0)
mistral_df['category_rationale'] = mistral_df['categorized_fallacy_obj'].apply(lambda x: x.rationale if x else "Error/None")


print("\n--- Final DataFrame with Categorized Fallacies: ---")
print(mistral_df[[
    'statement',
    'open_ended_fallacy',
    'detection_confidence',
    'categorized_fallacy',
    'category_confidence'
]].head())


--- Running Fallacy Categorization ---
Categorizing open-ended fallacy: "Hasty Generalization Fallacy..."
Categorizing open-ended fallacy: "Causal fallacy (specifically, post hoc ergo propte..."
Categorizing open-ended fallacy: "False Dichotomy (also known as Black-or-White Fall..."
Categorizing open-ended fallacy: "Fallacy: Misattribution of a Cause (also known as ..."
Categorizing open-ended fallacy: "Circular reasoning fallacy..."
Categorizing open-ended fallacy: "Circumstantial evidence fallacy (also known as has..."
Categorizing open-ended fallacy: "Slippery Slope Fallacy..."
Categorizing open-ended fallacy: "Guilt by Association..."
Categorizing open-ended fallacy: "Potential fallacy: Exaggeration or Overstatement..."
Categorizing open-ended fallacy: "False Cause (Post Hoc Ergo Propter Hoc)..."
Categorizing open-ended fallacy: "Ad Hominem Fallacy..."
Categorizing open-ended fallacy: "Argument from Ignorance Fallacy..."
Categorizing open-ended fallacy: "Ad Hominem Fallacy..."
Cat

In [71]:
mistral_df['categorized_fallacy'] = mistral_df['categorized_fallacy'].apply(lambda x: x.lower().replace("'","").replace('"','') if isinstance(x, str) else x)

In [72]:
mistral_df[[
    'statement',
    'open_ended_fallacy',
    'detection_confidence',
    'categorized_fallacy',
    'category_confidence'
]].head()

Unnamed: 0,statement,open_ended_fallacy,detection_confidence,categorized_fallacy,category_confidence
0,“After 30 years of Wisconsin’s checking accoun...,Hasty Generalization Fallacy,0.85,hasty generalization,1.0
1,Government shutdowns in 2013 and 2018 “cost ou...,"Causal fallacy (specifically, post hoc ergo pr...",0.8,post hoc (causal oversimplification),1.0
2,About 1% of federal employees are “actually wo...,False Dichotomy (also known as Black-or-White ...,0.95,black and white fallacy,1.0
3,North Carolina Republicans “took money out of ...,Fallacy: Misattribution of a Cause (also known...,0.95,post hoc (causal oversimplification),1.0
4,“The Universities of Wisconsin are 43rd out of...,Circular reasoning fallacy,0.8,circular reasoning,1.0


In [73]:
gold_pred_pairs = []
for i, j in zip(mistral_df['verdict'].to_list(), mistral_df['pipeline_pass3_verdict'].to_list()):
    gold_pred_pairs.append(f"{i} -> {j}")
mistral_df['gold_pred_pairs'] = gold_pred_pairs

gold_pred_pairs = []
for i, j in zip(mistral_df['verdict'].to_list(), mistral_df['baseline_pass1_verdict'].to_list()):
    gold_pred_pairs.append(f"{i} -> {j}")
mistral_df['gold_pred_pairs_base1'] = gold_pred_pairs

gold_pred_pairs = []
for i, j in zip(mistral_df['verdict'].to_list(), mistral_df['baseline_pass3_verdict'].to_list()):
    gold_pred_pairs.append(f"{i} -> {j}")
mistral_df['gold_pred_pairs_base3'] = gold_pred_pairs

gold_pred_pairs = []
for i, j in zip(mistral_df['verdict'].to_list(), mistral_df['pipeline_pass1_verdict'].to_list()):
    gold_pred_pairs.append(f"{i} -> {j}")
mistral_df['gold_pred_pairs_pipe1'] = gold_pred_pairs

In [41]:
mistral_df.gold_pred_pairs.value_counts()

gold_pred_pairs
FALSE -> MOSTLY FALSE              20
FALSE -> FALSE                     19
MOSTLY TRUE -> MOSTLY TRUE         17
MOSTLY FALSE -> MOSTLY FALSE       14
TRUE -> MOSTLY TRUE                12
HALF TRUE -> MOSTLY TRUE           11
HALF TRUE -> MOSTLY FALSE           9
HALF TRUE -> HALF TRUE              7
MOSTLY TRUE -> HALF TRUE            6
TRUE -> TRUE                        6
TRUE -> HALF TRUE                   6
MOSTLY FALSE -> HALF TRUE           5
MOSTLY FALSE -> MOSTLY TRUE         4
MOSTLY TRUE -> MOSTLY FALSE         3
MOSTLY FALSE -> UNVERIFIABLE        2
FALSE -> HALF TRUE                  2
TRUE -> MOSTLY FALSE                2
MOSTLY FALSE -> FALSE               1
TRUE -> UNVERIFIABLE                1
MOSTLY TRUE -> MOSTLY HALF TRUE     1
FALSE -> MOSTLY TRUE                1
MOSTLY FALSE -> PARTIALLY TRUE      1
Name: count, dtype: int64

In [44]:
temp = mistral_df[(mistral_df['verdict'] != 'MOSTLY TRUE') & (mistral_df['pipeline_pass3_verdict'] == 'MOSTLY TRUE')]

In [48]:
mistral_df[mistral_df['verdict'] == 'MOSTLY TRUE']['categorized_fallacy'].value_counts()

categorized_fallacy
hasty generalization                                                                                               6
doubt                                                                                                              4
ad hominem                                                                                                         3
appeal to emotion                                                                                                  2
causal oversimplification                                                                                          2
slippery slope fallacy                                                                                             1
argument from lack of evidence (with an emphasis on the absence of supporting evidence for the authority cited)    1
equivocation                                                                                                       1
assumption (also known as unproven premise) 

In [54]:
df = pd.DataFrame(mistral_df.groupby('verdict')['categorized_fallacy'].value_counts())

In [63]:
pd.set_option('display.max_rows', 500)
display(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
verdict,categorized_fallacy,Unnamed: 2_level_1
FALSE,causal oversimplification,7
FALSE,hasty generalization,6
FALSE,doubt,5
FALSE,exaggeration or minimization,3
FALSE,irrelevant authority,3
FALSE,"""causal oversimplification""",2
FALSE,ad hominem fallacy and false cause fallacy,2
FALSE,ad hominem,1
FALSE,"ad hominem, hasty generalization",1
FALSE,ambiguity fallacy (begging the question),1


In [83]:
temp = mistral_df[mistral_df['verdict'] != mistral_df['baseline_pass1_verdict']]
misclassified_df_base1 = pd.DataFrame(temp.groupby('gold_pred_pairs_base1')['categorized_fallacy'].value_counts())
display(misclassified_df_base1)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
gold_pred_pairs_base1,categorized_fallacy,Unnamed: 2_level_1
FALSE -> MOSTLY FALSE,"ad hominem, hasty generalization",1
FALSE -> MOSTLY FALSE,doubt,1
FALSE -> MOSTLY FALSE,emotional appeal fallacy,1
FALSE -> MOSTLY FALSE,evading burden of proof,1
FALSE -> MOSTLY FALSE,hasty generalization,1
FALSE -> MOSTLY FALSE,red herring,1
FALSE -> MOSTLY TRUE,irrelevant authority,1
FALSE -> MOSTLY TRUE,slippery slope fallacy,1
FALSE -> MOSTLY UNVERIFIABLE,causal oversimplification,1
FALSE -> UNVERIFIABLE,causal oversimplification,4


In [84]:
temp.gold_pred_pairs_base1.value_counts()

gold_pred_pairs_base1
FALSE -> UNVERIFIABLE                  18
TRUE -> UNVERIFIABLE                   17
MOSTLY FALSE -> UNVERIFIABLE           16
MOSTLY TRUE -> UNVERIFIABLE            14
HALF TRUE -> UNVERIFIABLE              13
FALSE -> MOSTLY FALSE                   6
MOSTLY TRUE -> MOSTLY FALSE             6
HALF TRUE -> FALSE                      4
TRUE -> FALSE                           4
MOSTLY FALSE -> MOSTLY TRUE             4
TRUE -> MOSTLY FALSE                    4
HALF TRUE -> MOSTLY FALSE               4
HALF TRUE -> MOSTLY TRUE                3
MOSTLY TRUE -> TRUE                     3
HALF TRUE -> MOSTLY UNVERIFIABLE        3
MOSTLY TRUE -> MOSTLY UNVERIFIABLE      2
MOSTLY FALSE -> MOSTLY UNVERIFIABLE     2
FALSE -> MOSTLY TRUE                    2
MOSTLY TRUE -> FALSE                    1
TRUE -> MOSTLY TRUE                     1
FALSE -> MOSTLY UNVERIFIABLE            1
MOSTLY FALSE -> FALSE                   1
Name: count, dtype: int64

In [87]:
temp = mistral_df[mistral_df['verdict'] != mistral_df['baseline_pass3_verdict']]
misclassified_df_base3 = pd.DataFrame(temp.groupby('gold_pred_pairs_base3')['categorized_fallacy'].value_counts())
display(misclassified_df_base3)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
gold_pred_pairs_base3,categorized_fallacy,Unnamed: 2_level_1
FALSE -> MOSTLY FALSE,"ad hominem, hasty generalization",1
FALSE -> MOSTLY FALSE,doubt,1
FALSE -> MOSTLY FALSE,emotional appeal fallacy,1
FALSE -> MOSTLY FALSE,evading burden of proof,1
FALSE -> MOSTLY FALSE,hasty generalization,1
FALSE -> MOSTLY FALSE,red herring,1
FALSE -> MOSTLY TRUE,hasty generalization,1
FALSE -> MOSTLY TRUE,irrelevant authority,1
FALSE -> MOSTLY TRUE,slippery slope fallacy,1
FALSE -> MOSTLY UNVERIFIABLE,causal oversimplification,1


In [88]:
temp.gold_pred_pairs_base3.value_counts()

gold_pred_pairs_base3
FALSE -> UNVERIFIABLE                  15
MOSTLY FALSE -> UNVERIFIABLE           15
TRUE -> UNVERIFIABLE                   13
HALF TRUE -> UNVERIFIABLE              11
MOSTLY TRUE -> MOSTLY FALSE             9
MOSTLY TRUE -> UNVERIFIABLE             7
TRUE -> MOSTLY FALSE                    7
FALSE -> MOSTLY FALSE                   6
HALF TRUE -> MOSTLY FALSE               5
HALF TRUE -> MOSTLY UNVERIFIABLE        4
HALF TRUE -> FALSE                      4
FALSE -> MOSTLY TRUE                    3
HALF TRUE -> MOSTLY TRUE                3
MOSTLY FALSE -> MOSTLY TRUE             3
MOSTLY TRUE -> TRUE                     3
MOSTLY TRUE -> FALSE                    3
TRUE -> FALSE                           3
TRUE -> MOSTLY TRUE                     3
MOSTLY FALSE -> MOSTLY UNVERIFIABLE     2
FALSE -> MOSTLY UNVERIFIABLE            1
FALSE -> TRUE                           1
MOSTLY FALSE -> FALSE                   1
MOSTLY TRUE -> MOSTLY UNVERIFIABLE      1
Name: count,

In [89]:
temp = mistral_df[mistral_df['verdict'] != mistral_df['pipeline_pass1_verdict']]
misclassified_df_pipe1 = pd.DataFrame(temp.groupby('gold_pred_pairs_pipe1')['categorized_fallacy'].value_counts())
display(misclassified_df_pipe1)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
gold_pred_pairs_pipe1,categorized_fallacy,Unnamed: 2_level_1
FALSE -> HALF TRUE,causal oversimplification,2
FALSE -> HALF TRUE,hasty generalization,2
FALSE -> HALF TRUE,emotional appeal fallacy,1
FALSE -> HALF TRUE,exaggeration or minimization,1
FALSE -> HALF TRUE,slippery slope fallacy,1
FALSE -> MOSTLY FALSE,causal oversimplification,5
FALSE -> MOSTLY FALSE,hasty generalization,2
FALSE -> MOSTLY FALSE,"ad hominem, hasty generalization",1
FALSE -> MOSTLY FALSE,black and white fallacy,1
FALSE -> MOSTLY FALSE,doubt,1


In [90]:
temp.gold_pred_pairs_pipe1.value_counts()

gold_pred_pairs_pipe1
FALSE -> MOSTLY FALSE                  17
TRUE -> MOSTLY TRUE                    12
HALF TRUE -> MOSTLY TRUE                9
HALF TRUE -> MOSTLY FALSE               9
MOSTLY TRUE -> MOSTLY FALSE             8
TRUE -> HALF TRUE                       7
FALSE -> HALF TRUE                      7
MOSTLY FALSE -> MOSTLY TRUE             6
MOSTLY FALSE -> HALF TRUE               5
MOSTLY TRUE -> HALF TRUE                4
MOSTLY FALSE -> UNVERIFIABLE            4
FALSE -> UNVERIFIABLE                   4
TRUE -> UNVERIFIABLE                    3
TRUE -> MOSTLY FALSE                    2
FALSE -> MOSTLY TRUE                    2
HALF TRUE -> TRUE                       2
MOSTLY FALSE -> FALSE                   2
MOSTLY TRUE -> UNVERIFIABLE             2
HALF TRUE -> UNVERIFIABLE               2
HALF TRUE -> FALSE                      1
MOSTLY FALSE -> MOSTLY UNVERIFIABLE     1
MOSTLY TRUE -> MOSTLY HALF TRUE         1
MOSTLY FALSE -> PARTIALLY TRUE          1
Name: count,

In [100]:
temp = mistral_df[mistral_df['verdict'] != mistral_df['pipeline_pass3_verdict']]
misclassified_df_pipe3 = pd.DataFrame(temp.groupby('gold_pred_pairs')['categorized_fallacy'].value_counts())
display(misclassified_df_pipe3)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
gold_pred_pairs,categorized_fallacy,Unnamed: 2_level_1
FALSE -> HALF TRUE,hasty generalization,1
FALSE -> HALF TRUE,slippery slope fallacy,1
FALSE -> MOSTLY FALSE,causal oversimplification,6
FALSE -> MOSTLY FALSE,hasty generalization,3
FALSE -> MOSTLY FALSE,ad hominem fallacy and false cause fallacy,1
FALSE -> MOSTLY FALSE,"ad hominem, hasty generalization",1
FALSE -> MOSTLY FALSE,doubt,1
FALSE -> MOSTLY FALSE,emotional appeal fallacy,1
FALSE -> MOSTLY FALSE,equivocation,1
FALSE -> MOSTLY FALSE,evading burden of proof,1


In [93]:
temp.gold_pred_pairs.value_counts()

gold_pred_pairs
FALSE -> MOSTLY FALSE              20
TRUE -> MOSTLY TRUE                12
HALF TRUE -> MOSTLY TRUE           11
HALF TRUE -> MOSTLY FALSE           9
TRUE -> HALF TRUE                   6
MOSTLY TRUE -> HALF TRUE            6
MOSTLY FALSE -> HALF TRUE           5
MOSTLY FALSE -> MOSTLY TRUE         4
MOSTLY TRUE -> MOSTLY FALSE         3
MOSTLY FALSE -> UNVERIFIABLE        2
FALSE -> HALF TRUE                  2
TRUE -> MOSTLY FALSE                2
MOSTLY FALSE -> FALSE               1
TRUE -> UNVERIFIABLE                1
MOSTLY TRUE -> MOSTLY HALF TRUE     1
FALSE -> MOSTLY TRUE                1
MOSTLY FALSE -> PARTIALLY TRUE      1
Name: count, dtype: int64

In [97]:
gemini_copy = gemini_df.copy()
gold_pred_pairs = []
for i, j in zip(gemini_copy['verdict'].to_list(), gemini_copy['pipeline_pass3_verdict'].to_list()):
    gold_pred_pairs.append(f"{i} -> {j}")
gemini_copy['gold_pred_pairs_pipe3'] = gold_pred_pairs
gemini_copy['categorized_fallacy'] = mistral_df['categorized_fallacy'].to_list()
temp = gemini_copy[gemini_copy['verdict'] != gemini_copy['pipeline_pass3_verdict']]

In [99]:
temp.gold_pred_pairs_pipe3.value_counts()

gold_pred_pairs_pipe3
HALF TRUE -> MOSTLY TRUE        14
TRUE -> MOSTLY TRUE             12
FALSE -> MOSTLY FALSE           11
MOSTLY TRUE -> UNVERIFIABLE      7
MOSTLY FALSE -> FALSE            6
TRUE -> UNVERIFIABLE             6
HALF TRUE -> UNVERIFIABLE        6
HALF TRUE -> MOSTLY FALSE        6
MOSTLY FALSE -> UNVERIFIABLE     5
FALSE -> UNVERIFIABLE            5
MOSTLY TRUE -> MOSTLY FALSE      4
MOSTLY FALSE -> MOSTLY TRUE      2
MOSTLY TRUE -> HALF TRUE         1
MOSTLY TRUE -> TRUE              1
HALF TRUE -> FALSE               1
TRUE -> FALSE                    1
Name: count, dtype: int64

In [98]:
misclassified_gemini = pd.DataFrame(temp.groupby('gold_pred_pairs_pipe3')['categorized_fallacy'].value_counts())
display(misclassified_gemini)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
gold_pred_pairs_pipe3,categorized_fallacy,Unnamed: 2_level_1
FALSE -> MOSTLY FALSE,hasty generalization,4
FALSE -> MOSTLY FALSE,causal oversimplification,2
FALSE -> MOSTLY FALSE,emotional appeal fallacy,1
FALSE -> MOSTLY FALSE,exaggeration or minimization,1
FALSE -> MOSTLY FALSE,irrelevant authority,1
FALSE -> MOSTLY FALSE,slippery slope fallacy,1
FALSE -> MOSTLY FALSE,"strawman, hasty generalization, appeal to fear/prejudice",1
FALSE -> UNVERIFIABLE,causal oversimplification,1
FALSE -> UNVERIFIABLE,deductive fallacy,1
FALSE -> UNVERIFIABLE,equivocation,1


In [None]:
mistral_df['verdict'].value_counts()

In [124]:
to_binary = {
    "TRUE": "TRUE",
    "MOSTLY TRUE": "FALSE",
    "HALF TRUE": "FALSE",
    "MOSTLY FALSE": "FALSE",
    "FALSE": "FALSE",
    "UNVERIFIABLE": "UNVERIFIABLE"
}

In [125]:
gemini_df['baseline_pass1_binary'] = gemini_df['baseline_pass1_verdict'].map(to_binary)
gemini_df['pipeline_pass1_binary'] = gemini_df['pipeline_pass1_verdict'].map(to_binary)
gemini_df['baseline_pass3_binary'] = gemini_df['baseline_pass3_verdict'].map(to_binary) 
gemini_df['pipeline_pass3_binary'] = gemini_df['pipeline_pass3_verdict'].map(to_binary)
gemini_df['verdict_binary'] = gemini_df['verdict'].map(to_binary)

mistral_df['baseline_pass1_binary'] = mistral_df['baseline_pass1_verdict'].map(to_binary)
mistral_df['pipeline_pass1_binary'] = mistral_df['pipeline_pass1_verdict'].map(to_binary)
mistral_df['baseline_pass3_binary'] = mistral_df['baseline_pass3_verdict'].map(to_binary)
mistral_df['pipeline_pass3_binary'] = mistral_df['pipeline_pass3_verdict'].map(to_binary)
mistral_df['verdict_binary'] = mistral_df['verdict'].map(to_binary)

In [122]:
from sklearn.metrics import accuracy_score
accuracy_score(gemini_df['baseline_pass1_binary'].to_list(), gemini_df['verdict_binary'].to_list())
accuracy_score(gemini_df['pipeline_pass1_binary'].to_list(), gemini_df['verdict_binary'].to_list())
accuracy_score(gemini_df['baseline_pass3_binary'].to_list(), gemini_df['verdict_binary'].to_list())
accuracy_score(gemini_df['pipeline_pass3_binary'].to_list(), gemini_df['verdict_binary'].to_list())

0.66

In [126]:
from sklearn.metrics import accuracy_score
accuracy_score(mistral_df['baseline_pass1_binary'].to_list(), mistral_df['verdict_binary'].to_list())
accuracy_score(mistral_df['pipeline_pass1_binary'].to_list(), mistral_df['verdict_binary'].to_list())
accuracy_score(mistral_df['baseline_pass3_binary'].to_list(), mistral_df['verdict_binary'].to_list())
accuracy_score(mistral_df['pipeline_pass3_binary'].to_list(), mistral_df['verdict_binary'].to_list())

0.8333333333333334