In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

# Step 1: Load dataset
df = pd.read_csv("straw_man_fallacy_dataset1.csv")
texts = df["text"].tolist()
labels_raw = df["label"].tolist()

# Step 2: Encode string labels to numeric
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels_raw)
class_names = label_encoder.classes_
all_encoded_labels = list(label_encoder.transform(class_names))

# Step 3: TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=1000, ngram_range=(1, 2))
X = vectorizer.fit_transform(texts)

# Step 4: Train/Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, labels, test_size=0.3, random_state=42, stratify=labels
)

# Step 5: Train Decision Tree Classifier
clf = DecisionTreeClassifier(max_depth=6, random_state=42)
clf.fit(X_train, y_train)

# Step 6: Predict & Evaluate
y_pred = clf.predict(X_test)

print("Label mapping:", dict(zip(class_names, all_encoded_labels)))
print(classification_report(y_test, y_pred, labels=all_encoded_labels, target_names=class_names))


Label mapping: {'healthy_argument': 0, 'non_argument': 1, 'straw_man': 2}
                  precision    recall  f1-score   support

healthy_argument       1.00      0.95      0.97        20
    non_argument       0.93      1.00      0.97        14
       straw_man       1.00      1.00      1.00        38

        accuracy                           0.99        72
       macro avg       0.98      0.98      0.98        72
    weighted avg       0.99      0.99      0.99        72



In [2]:
def predict_fallacy(text):
    X_input = vectorizer.transform([text])
    pred = clf.predict(X_input)
    return label_encoder.inverse_transform(pred)[0]

# Example
print(predict_fallacy("You want cleaner air? So you want to ban all cars."))


straw_man
