## LTP Deployment

In [1]:
import time
import ollama
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import re
from tqdm import tqdm
import sys
import random
from collections import Counter

In [2]:
DATA_FILEPATH = 'data/handmade'

In [3]:
ad_hominem = pd.read_csv('{}/ad_hominem_final.csv'.format(DATA_FILEPATH))
ad_populum = pd.read_csv('{}/ad_populum_final.csv'.format(DATA_FILEPATH))
appeal_to_anger = pd.read_csv('{}/appeal_to_anger_final.csv'.format(DATA_FILEPATH))
appeal_to_authority = pd.read_csv('{}/appeal_to_authority_final.csv'.format(DATA_FILEPATH))
appeal_to_fear = pd.read_csv('{}/appeal_to_fear_final.csv'.format(DATA_FILEPATH))
appeal_to_nature = pd.read_csv('{}/appeal_to_nature_final.csv'.format(DATA_FILEPATH))
appeal_to_pity = pd.read_csv('{}/appeal_to_pity_final.csv'.format(DATA_FILEPATH))
appeal_to_ridicule = pd.read_csv('{}/appeal_to_ridicule_final.csv'.format(DATA_FILEPATH))
appeal_to_tradition = pd.read_csv('{}/appeal_to_tradition_final.csv'.format(DATA_FILEPATH))
appeal_to_worse_problems = pd.read_csv('{}/appeal_to_worse_problems_final.csv'.format(DATA_FILEPATH))
causal_oversimplifiation = pd.read_csv('{}/causal_oversimplification_final.csv'.format(DATA_FILEPATH))
equivocation = pd.read_csv('{}/equivocation_final.csv'.format(DATA_FILEPATH))
fallacy_of_division = pd.read_csv('{}/fallacy_of_division_final.csv'.format(DATA_FILEPATH))
false_analogy = pd.read_csv('{}/false_analogy_final.csv'.format(DATA_FILEPATH))
false_causality = pd.read_csv('{}/false_causality_final.csv'.format(DATA_FILEPATH))
false_dilemma = pd.read_csv('{}/false_dilemma_final.csv'.format(DATA_FILEPATH))
hasty_generalization = pd.read_csv('{}/hasty_generalization_final.csv'.format(DATA_FILEPATH))
nothing = pd.read_csv('{}/nothing_final.csv'.format(DATA_FILEPATH))
slippery_slope = pd.read_csv('{}/slippery_slope_final.csv'.format(DATA_FILEPATH))
strawman = pd.read_csv('{}/strawman_final.csv'.format(DATA_FILEPATH))
circular_reasoning = pd.read_csv('{}/circular_reasoning.csv'.format(DATA_FILEPATH))
tu_quoque = pd.read_csv('{}/tu_quoque.csv'.format(DATA_FILEPATH))

In [4]:
list_of_tuples = []
nr_of_samples = 150
sample = False

datasets = [ad_hominem, ad_populum, appeal_to_anger, appeal_to_authority, appeal_to_fear, appeal_to_nature,
            appeal_to_pity, appeal_to_ridicule, appeal_to_tradition, appeal_to_worse_problems, causal_oversimplifiation,
            circular_reasoning, tu_quoque, #guilt_by_association,
            equivocation, fallacy_of_division, false_analogy, false_causality, false_dilemma,
            hasty_generalization, nothing, slippery_slope, strawman]

for dataset in datasets:
    if sample:
        if len(dataset) >= nr_of_samples:
            # Sample 15 entries if the dataset is large enough
            sampled = dataset.sample(n=nr_of_samples, random_state=random.randint(1, 100))  # Change seed for true randomness
        else:
            # If the dataset has less than 15 entries, take all available data
            sampled = dataset
    else:
        sampled = dataset

    # Convert the sampled data to tuples and add to the list
    list_of_tuples.extend(sampled.itertuples(index=False, name=None))

In [5]:
len(list_of_tuples)

846

In [6]:
def ollama_prompt(model_id, message_content):
    response = ollama.chat(model=model_id, messages=[
        {
            'role': 'user',
            'content': message_content
        }
    ])
    
    return response['message']['content']

In [7]:
def contains_whole_word(large_string, word):
    pattern = rf'\b{re.escape(word)}\b'
    return bool(re.search(pattern, large_string))

### Zero Shot

In [8]:
# Define the base message content
message_base_content = '''
Definition:
An argument consists of an assertion called the conclusion and one or more assertions called premises, where the premises are intended to establish the truth of the conclusion. Premises or conclusions can be implicit in an argument. 
A fallacious argument is an argument where the premises do not entail the conclusion. 

Types of fallacy:
hasty generalization 
slippery slope
causal oversimplification
appeal to ridicule
appeal to nature
false causality
ad populum
ad hominem
false analogy
false dilemma
appeal to fear
appeal to (false) authority
appeal to worse problems
circular reasoning
guilt by association
appeal to anger                 
straw man
appeal to tradition 
equivocation 
fallacy of division 
tu quoque 
appeal to positive emotion 
appeal to pity

I will give you different texts. For each text, determine if it is a fallacy or not. If it is not, or cannot be judged based on the text, write 'nothing'. If it is a fallacy, tell me which fallacy type of the above list it is. Do not give any explanation, just write the answer. If you are unsure, write the one answer that seems most likely: 
{}
'''

# List of example texts and their expected labels
# examples = [
#     ('Why is the sky blue?', 'nothing'),
#     ('You have no idea how street is, you better stop talking!', 'ad hominem'),
#     ("You're probably a drug addict yourself.", 'ad hominem'),
#     ("Why do you want a university-doctor's title to be shown on their identification-card? You must be one yourself, you egotistic prick ! Does it make you feel respected that we all know your little title?", "ad hominem"),
#     ("You’re way too nervous about driving at night so of course you don’t want to drive.", "ad hominem"),
#     ("Three million fans can't be wrong!", 'ad populum'),
#     ("The human soul is immortal, because all learned men agree that anything which does not come out of the potentiality of matter is incorruptible and immortal.", "ad populum"),
#     ("We all believe such preachers as Mr. Raskin. He is so nearly right, his ideals are so very high, that most people assent — while they have no difficulty in evading them and going on their way as if a breath of wind had fanned their faces, and no voice of truth had stirred their spirits.", "ad populum"),
#     ("You should buy this phone; it's the best-selling model worldwide.","ad populum"),
#     ("Are you tired of being ignored by your government? Is it right that the top 1% have so much when the rest of us have so little? I urge you to vote for me today!", 'appeal to anger'),
#     ("I get mad when i think about all these poor guys on the street, having no home, no job, no family. All because they started taking drugs. Its so sad, we need heavier penalties.","appeal to anger"),
# ]

examples = list_of_tuples

### Chain of Thought

In [9]:
filename = '''few_shot_cot_update'''
with open('data/{}.txt'.format(filename), 'r') as file:
    message_base_content_cot = file.read()
file.close()

In [12]:
def prompt(model_id, examples, message_base_content, selfconsistency=True):
    test_results = []
    results = pd.DataFrame()
    start_time = time.time() 
    
    for text, expected_label in tqdm(examples):
        message_content = message_base_content.format(text)

        if selfconsistency:
            loop = 3
            labels = [] 
            for i in range(loop):
                subresponse = ollama_prompt(model_id, message_content)
                label = subresponse.strip()
                labels.append(label.partition('.')[0].lower())

            c = Counter(labels)
            actual_label, _ = c.most_common()[0]
            
        else:
            response = ollama_prompt(model_id, message_content)
            actual_label = response.strip()
            actual_label = actual_label.lower()
            
        test_passed = contains_whole_word(actual_label, expected_label)
        test_results.append(test_passed)

        results = results._append({
            'text': text,
            'expected_label': expected_label,
            'actual_label': actual_label,
            'result': test_passed
        }, ignore_index=True)
        
    end_time = time.time()
    accuracy = sum(test_results) / len(test_results)
    f1 = f1_score([True]*len(test_results), test_results, average='weighted')
    precision = precision_score([True]*len(test_results), test_results, average='weighted')
    recall = recall_score([True]*len(test_results), test_results, average='weighted')

    time_taken = end_time - start_time

    return accuracy, f1, precision, recall, time_taken, results

In [14]:
for model in ["mistral", "gemma", "openchat"]:
    accuracy, f1, precision, recall, time_taken, results = prompt(model, examples, message_base_content_cot, selfconsistency=False)

    print('Accuracy cot {}: '.format(model), accuracy)
    print('F1 score cot {}:'.format(model), f1)
    print('Precision cot {}: '.format(model), precision)
    print('Recall cot {}: '.format(model), recall)
    
    print(f'Time Taken: {time_taken:.2f} seconds\n')
    
    results.to_csv("results/results-zero-shot-cot-{}.csv".format(model), sep=",")

  0%|                                                                               | 1/846 [03:45<52:56:15, 225.53s/it]


KeyboardInterrupt: 