In [2]:
import pandas as pd
import os

In [3]:
data_dir = '../data/argumentation/'
LOGIC_dir = '../data/LOGIC'

In [4]:
df_cckg = pd.read_csv(os.path.join(data_dir, 'train_cckg.tsv'), sep='\t')
df_iam = pd.read_csv(os.path.join(data_dir, 'train_iam.tsv'), sep='\t')

In [5]:
examples = pd.read_json(os.path.join(data_dir, 'fallacies_arguments_support.json'))
df_mapping = pd.read_csv(os.path.join(LOGIC_dir, 'mappings.csv'))
df_logic = pd.read_csv(os.path.join(LOGIC_dir, 'train.csv'))

In [6]:
examples.keys()

Index(['faulty generalization', 'false causality', 'circular reasoning',
       'ad populum', 'ad hominem', 'fallacy of logic', 'appeal to emotion',
       'false dilemma', 'equivocation', 'fallacy of extension',
       'fallacy of relevance', 'fallacy of credibility', 'intentional'],
      dtype='object')

In [7]:
def generate_prompt(topic, fallacy_type, supporting=True):
    data = examples[fallacy_type]
    supporting_args = [data['exampleArg1'], data['exampleArg2']]
    fallacies = [data['example1'], data['example2']]
    
     
    text =  f"""You are given a topic.  
    Your task is to generate a {'supporting' if supporting else 'counter'} {fallacy_type} logical fallacy argument in the context of the topic. 
    It should not be longer than 25 words. 
    
    {fallacy_type} fallacy is defined as: {examples[fallacy_type]['definition']}
    examples of {fallacy_type} fallacy are: 
    {fallacies[0]}
    {fallacies[1]}
        
    Here is an example of a supporting {fallacy_type} fallacy: 
    {supporting_args[0]}
    
    return the following using this json format:
    {"{"}
        "topic": {topic},
        "fallacy type": {fallacy_type},
        "{fallacy_type} fallacy support": <> 
    {"}"}
    """
    return text

In [8]:
df_iam.topic.sample(1).values[0]

'Should the death penalty be abolished'

In [9]:
df_cckg[df_cckg.topic=='People should not claim to be gender-neutral.']

Unnamed: 0,topic,argument,label,stuff
1929,People should not claim to be gender-neutral.,The idea of gender is a construct we invented ...,counter,(redefine; has property; gender-neutral)(gende...


In [10]:
fallacy_types = examples.keys()

for fallacy_type in fallacy_types:
    topic =  df_cckg.topic.sample(1).values[0]
    prompt = generate_prompt(topic=topic, fallacy_type=fallacy_type)
    print(prompt)
    print("######################################\n")

You are given a topic.  
    Your task is to generate a supporting faulty generalization logical fallacy argument in the context of the topic. 
    It should not be longer than 25 words. 
    
    faulty generalization fallacy is defined as: an informal fallacy wherein a conclusion is drawn about all or many instances of a phenomenon on the basis of one or a few instances of that phenomenon. is an example of jumping to conclusions.
    examples of faulty generalization fallacy are: 
    Flight attendants are the nicest people. I should know—I’ve taken two flights.
    The two courses I took at UWI were not very interesting. I don't think its a good university.
        
    Here is an example of a supporting faulty generalization fallacy: 
    {'topic': 'Misconceptions and Stereotypes Surrounding Poverty', 'fallacy type': 'faulty generalization', 'faulty generalization fallacy support': 'I know four poor families. They are lazy drug addicts. Therefore, all poor people are lazy drug addi

if we ask to generate different supporting fallacy arguments -- can we select those that are most relevant. Say we ask to create 3 supporting arguments, can we say okay, argument 2 is best given the arguments we have on this topic because maybe semantically it's better?

In [11]:
from gensim.models import KeyedVectors
import gensim.downloader as api
try:
    model = KeyedVectors.load('data/glove.model.d2v')
except:
    print("model not found, loading from api")
    model = api.load("glove-wiki-gigaword-50")

model not found, loading from api


In [12]:
df_cckg.head(2)

Unnamed: 0,topic,argument,label,stuff
0,Cannabis should be legal.,It's not a bad thing to make marijuana more av...,support,(cannabis; synonym of; marijuana)(legal; cause...
1,Women should not be in combat.,Women and men have the same rights.,counter,(women and men; is a; citizens)(citizens; caus...


In [13]:
import numpy as np
import scipy
import smart_open
import string
topic = 'Adopting gender-neutral language is too costly of a venture.'

def preprocess(text):
    translator = str.maketrans(string.punctuation, ' ' * len(string.punctuation))
    return text.translate(translator)

def get_vector(sentence):
    vec = []
    for word in sentence.split(' '):
        try:
            vec.append(model[word])
        except:
            continue
    return np.mean(vec, axis=0)
    
    
relevant_topics = df_cckg[df_cckg.topic==topic]
vecs = []
for arg in relevant_topics.argument:
    vec = get_vector(preprocess(arg))
    vecs.append(vec)

vecs = np.mean(vecs, axis=0)

In [14]:
def cosine_sim(vec1, vec2):
    return np.dot(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))

In [15]:
first = "Developing gender-neutral terms requires extensive resources without ensuring effective communication or societal acceptance."
second = "Switching to gender-neutral language poses financial burdens on educational institutions without guaranteed academic improvements."
third = "Enforcing gender-neutral language policies entails excessive costs, ignoring traditional linguistic structures and cultural norms."

first_vec = get_vector(preprocess(third))
cosine_sim(first_vec, vecs)

0.82103854

In [16]:
vecs

array([ 3.37170541e-01, -1.65743586e-02, -5.24969026e-03, -1.27383307e-01,
        2.58584231e-01,  2.85847813e-01, -2.21693918e-01, -3.34187716e-01,
       -2.20025167e-01, -1.35492282e-02,  1.53116912e-01,  2.16282845e-01,
       -8.98733735e-02, -2.23413020e-01,  4.35957462e-01,  2.87058592e-01,
       -7.97293335e-02, -2.51641810e-01,  7.70857409e-02, -3.33267689e-01,
        1.18010938e-02, -1.34714618e-02,  2.52462804e-01, -4.59917672e-02,
        9.56015959e-02, -1.47948301e+00, -4.00172561e-01, -1.80584453e-02,
        1.21767074e-02, -2.22831249e-01,  3.08530736e+00, -8.13211277e-02,
       -3.70791376e-01, -4.12182927e-01, -1.16167963e-03, -1.36338204e-01,
        6.34142905e-02, -2.73390859e-02, -1.02138385e-01, -2.12419689e-01,
       -2.90850699e-02,  5.30688278e-02,  1.44261092e-01,  2.40551621e-01,
       -1.67453468e-01, -3.57831642e-02, -2.38804482e-02,  3.02665710e-01,
       -6.65420070e-02, -8.69345218e-02], dtype=float32)