In [2]:
import pandas as pd
from deep_translator import GoogleTranslator

In [3]:
translator = GoogleTranslator(source='en', target='ro')

to_translate = "The bigger a child's shoe size, the better the child's handwriting"
translated = translator.translate(to_translate)
translated

'Cu cât mărimea pantofilor unui copil este mai mare, cu atât scrisul de mână al copilului este mai bun'

In [4]:
translator = GoogleTranslator(source='en', target='ro')


def translate_text(text):
    try:
        translated = translator.translate(text)
        return translated
    except Exception as e:
        print(f"Translation failed: {e}")
        return text

Translating huggingface LOGIC dataset

In [1]:
from datasets import load_dataset

full_data = load_dataset("tasksource/logical-fallacy")

In [2]:
full_data['test'].shape

(511, 3)

In [7]:
full_data['dev'].shape

(570, 3)

In [40]:
test = pd.DataFrame(full_data['test'])
test['source_article_ro'] = test['source_article'].apply(translate_text)
test.to_csv('test.csv', index=False)

In [42]:
train = pd.DataFrame(full_data['train'])
train['source_article_ro'] = train['source_article'].apply(translate_text)
train.to_csv('data/huggingface/train.csv', index=False)

In [43]:
dev = pd.DataFrame(full_data['dev'])
dev['source_article_ro'] = dev['source_article'].apply(translate_text)
dev.to_csv('data/huggingface/dev.csv', index=False)

Translating LFUD dataset

In [51]:
lfud = pd.read_csv("data/LFUD.csv")
lfud.head()

Unnamed: 0,index,proposition,sentence,fallacy_type,task1,task2,task3,task4,task5
0,0,All electronic products need electricity.,All electronic products need electricity. Elec...,faulty generalization,{'question': 'Statement: All electronic produc...,{'question': 'Faulty generalization occurs whe...,{'question': 'Faulty generalization occurs whe...,{'question': 'Faulty generalization occurs whe...,{'question': 'Original sentence: All electroni...
1,1,All electronic products need electricity.,Since all electronic products need electricity...,false causality,{'question': 'Statement: Since all electronic ...,{'question': 'False causality occurs when an a...,{'question': 'False causality occurs when an a...,{'question': 'False causality occurs when an a...,{'question': 'Original sentence: Since all ele...
2,2,All electronic products need electricity.,All electronic products function because they ...,circular reasoning,{'question': 'Statement: All electronic produc...,{'question': 'Circular reasoning occurs when a...,{'question': 'Circular reasoning occurs when a...,{'question': 'Circular reasoning occurs when a...,{'question': 'Original sentence: All electroni...
3,3,All electronic products need electricity.,Most people think that all electronic products...,ad populum,{'question': 'Statement: Most people think tha...,{'question': 'Ad populum occurs when an argume...,{'question': 'Ad populum occurs when an argume...,{'question': 'Ad populum occurs when an argume...,{'question': 'Original sentence: Most people t...
4,4,All electronic products need electricity.,"""Either every electronic item operates using e...",false dilemma,"{'question': 'Statement: ""Either every electro...",{'question': 'False dilemma occurs when incorr...,{'question': 'False dilemma occurs when incorr...,{'question': 'False dilemma occurs when incorr...,"{'question': 'Original sentence: ""Either every..."


In [52]:
lfud.drop(['task1', 'task2', 'task3', 'task4', 'task5'], axis=1, inplace=True)

In [56]:
lfud['proposition_ro'] = lfud['proposition'].apply(translate_text)
lfud['sentence_ro'] = lfud['sentence'].apply(translate_text)
lfud.to_csv('data/logical_fallacy_understanding_dataset/lfud.csv', index=False)

Translating Nonfallacies 

In [2]:
import re
import numpy as np
import pandas as pd

In [6]:
file_path = "data/facts/AllCombined.txt"
with open(file_path, "r", encoding="utf-8") as file:
    text = file.read()

cleaned_text = re.sub(r"\n{2,}", "\n", text)
cleaned_text = re.sub(r"(?m)^[A-Za-z]{1,15}\s*$\n?", "\n", cleaned_text)

In [7]:
cleaned_file_path = "data/facts/cleaned_facts.txt"
with open(cleaned_file_path, "w", encoding="utf-8") as file:
    file.write(cleaned_text)

Extracting the first n phrases

In [8]:
def extract_first_n_phrases(file_path, n=1):
    sentences = []
    with open(file_path, "r", encoding="utf-8") as file:
        text = file.read()
        paragraphs = text.split("\n\n")
        for paragraph in paragraphs:
            phrases = paragraph.split("\n")
            for phrase in phrases[:n]:
                sentence = phrase.split(". ")
                if len(sentence) > 5:
                    sentences.append(sentence[0])
        return sentences

first_two_phrases = extract_first_n_phrases(cleaned_file_path, 1)
print(len(first_two_phrases))
print(first_two_phrases[:10])

3687
["Air is the Earth's atmosphere", 'An abbreviation is a shorter way to write a word or phrase', 'Algebra (from Arabic: الجبر\u200e, transliterated "al-jabr", meaning "reunion of broken parts") is a part of mathematics', 'An atom is an extremely small piece of matter', 'Angola, officially the Republic of Angola, is a country in southern Africa', 'A boot is a type of footwear that protects the foot and ankle', 'A computer is a machine that uses electronics to input, process, store, and output data', 'To chat is to talk about ordinary things that are not usually very important', 'Comedy (from ), in modern times, is an entertainment with generally funny content', 'A comet is a ball of mostly ice that moves around in outer space']


In [9]:
df = pd.read_csv('data/all/combined_lfud_huggingface.csv')
df.shape

(4565, 5)

In [11]:
df.head()

Unnamed: 0,source_article,logical_fallacies,source_article_ro,proposition,proposition_ro
0,"company's slogan ""Expect More. Pay Less.""",appeal to emotion,sloganul companiei „Așteptați mai mult. Plătiț...,,
1,"The bigger a child's shoe size, the better the...",false causality,Cu cât mărimea pantofilor unui copil este mai ...,,
2,"Since many people believe this, then it must b...",ad populum,"Din moment ce mulți oameni cred asta, atunci t...",,
3,Senator Randall isn't lying when she says she ...,circular reasoning,Senatorul Randall nu minte când spune că îi pa...,,
4,A mother is telling her daughter that she went...,fallacy of relevance,O mamă îi spune fiicei ei că și-a analizat dat...,,


In [14]:
first_two_phrases = first_two_phrases[:len(df)//4]
nonfallacies_df = pd.DataFrame({
    'source_article': first_two_phrases,
    'logical_fallacies': ['nonfallacy'] * len(first_two_phrases)
})

In [15]:
print(nonfallacies_df.shape)

(1141, 2)


In [16]:
nonfallacies_df.head()

Unnamed: 0,source_article,logical_fallacies
0,Air is the Earth's atmosphere,nonfallacy
1,An abbreviation is a shorter way to write a wo...,nonfallacy
2,"Algebra (from Arabic: الجبر‎, transliterated ""...",nonfallacy
3,An atom is an extremely small piece of matter,nonfallacy
4,"Angola, officially the Republic of Angola, is ...",nonfallacy


In [None]:
nonfallacies_df['source_article_ro'] = nonfallacies_df['source_article'].apply(translate_text)

In [None]:
import winsound
frequency = 2500  # Set Frequency To 2500 Hertz
duration = 1000  # Set Duration To 1000 ms == 1 second
winsound.Beep(frequency, duration)

In [None]:
nonfallacies_df['proposition'] = np.nan
nonfallacies_df['proposition_ro'] = np.nan

In [None]:
nonfallacies_df.head(10)

In [None]:
combined_df = pd.concat([df, nonfallacies_df], ignore_index=True)
combined_df

In [9]:
from sklearn.utils import shuffle
shuffled_df = shuffle(combined_df, random_state=42)
shuffled_df

Unnamed: 0,source_article,logical_fallacies,source_article_ro,proposition,proposition_ro
2273,Such misrepresentations are now commonplace in...,intentional,Astfel de denaturari sunt acum obisnuite in an...,,
1094,If we use just one more can of hairspray this ...,faulty generalization,Dacă mai folosim o singură cutie de fixativ lu...,,
3095,That doesn ’ t mean pollution controls are fut...,fallacy of relevance,Asta nu înseamnă că controalele poluării sunt ...,,
2986,The Earth is not warming .,intentional,Pământul nu se încălzește.,,
3254,McDonald's Hamburgers: over 99 billion served.,ad populum,Hamburgeri McDonald's: peste 99 de miliarde se...,,
...,...,...,...,...,...
3772,If a gadget qualifies as an electronic product...,deductive fallacy,Dacă un gadget se califică drept produs electr...,All electronic products need electricity.,Toate produsele electronice au nevoie de elect...
5191,"Poole is a town in Dorset, England",nonfallacy,"Poole este un oraș din Dorset, Anglia",,
5226,A lock keeps things closed,nonfallacy,Un lacăt ține lucrurile închise,,
5390,Curium is a synthetic chemical element in the ...,nonfallacy,Curiul este un element chimic sintetic din tab...,,


In [10]:
shuffled_df.to_csv('data/all/combined_lfud_huggingface_nonfallacies.csv', index=False)

In [11]:
shuffled_df.shape

(5706, 5)