In [22]:
!pip install pandas
!pip install scikit-learn



In [23]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report

# Step 1: Load data from JSON
with open("./Data/synthetic_fallacy_dataset.json", "r") as f:
    data = json.load(f)

In [8]:
# Step 2: Convert to DataFrame
df = pd.DataFrame(data)
X = df["text"]
y = df["label"]

# Step 3: Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 4: Vectorize the text using TF-IDF
vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Step 5: Train Decision Tree classifier
model = DecisionTreeClassifier(max_depth=10, random_state=42)
model.fit(X_train_tfidf, y_train)

# Step 6: Evaluate model
y_pred = model.predict(X_test_tfidf)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Step 7: Prediction function
def predict_fallacy(text):
    vec = vectorizer.transform([text])
    prediction = model.predict(vec)[0]
    return prediction

# Example usage
sample = "If we start allowing students to skip classes, soon they won't attend school at all."
print(f"\nPrediction for sample: {predict_fallacy(sample)}")


Classification Report:

                   precision    recall  f1-score   support

       ad_hominem       1.00      1.00      1.00        16
appeal_to_emotion       1.00      1.00      1.00        13
      false_cause       1.00      1.00      1.00        20
   slippery_slope       1.00      1.00      1.00       110
        straw_man       1.00      1.00      1.00        21

         accuracy                           1.00       180
        macro avg       1.00      1.00      1.00       180
     weighted avg       1.00      1.00      1.00       180


Prediction for sample: slippery_slope


In [24]:
# === Load Dataset ===
def load_data(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)
    df = pd.DataFrame(data)
    return df



In [25]:
# === Vectorize Text ===
def vectorize_text(text_series, max_features=5000):
    vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words='english', max_features=max_features)
    tfidf = vectorizer.fit_transform(text_series)
    return tfidf, vectorizer

In [26]:
# === Train Model ===
def train_decision_tree(X_train, y_train, max_depth=10):
    model = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)
    return model

# === Evaluate Model ===
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("=== Classification Report ===")
    print(classification_report(y_test, y_pred))

In [27]:
# === Predict Single Sample ===
def predict_fallacy(model, vectorizer, text):
    vec = vectorizer.transform([text])
    return model.predict(vec)[0]

In [28]:
# === Pipeline ===
data_path = "./Data/synthetic_fallacy_dataset.json"
df = load_data(data_path)
X = df["text"]
y = df["label"]

X_train_text, X_test_text, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_vec, vectorizer = vectorize_text(X_train_text)
X_test_vec = vectorizer.transform(X_test_text)

model = train_decision_tree(X_train_vec, y_train)
evaluate_model(model, X_test_vec, y_test)

=== Classification Report ===
                   precision    recall  f1-score   support

       ad_hominem       1.00      1.00      1.00        11
appeal_to_emotion       1.00      1.00      1.00         9
      false_cause       1.00      1.00      1.00        13
   slippery_slope       1.00      1.00      1.00        73
        straw_man       1.00      1.00      1.00        14

         accuracy                           1.00       120
        macro avg       1.00      1.00      1.00       120
     weighted avg       1.00      1.00      1.00       120



In [34]:
# === Example Usage ===
sample_text = "I started wearing my lucky socks, and then my team won. Therefore, my lucky socks caused the win"
prediction = predict_fallacy(model, vectorizer, sample_text)
print(f"Prediction for sample: {prediction}")

Prediction for sample: slippery_slope


In [31]:
import json
import pandas as pd
from sklearn.metrics import classification_report

# === Load External Test Set ===
def load_test_data(json_path):
    with open(json_path, "r") as f:
        test_data = json.load(f)
    df = pd.DataFrame(test_data)
    return df["text"], df["label"]

# === Evaluate on Test Set ===
def evaluate_external_test(model, vectorizer, test_json_path):
    X_test_text, y_test = load_test_data(test_json_path)
    X_test_vec = vectorizer.transform(X_test_text)
    y_pred = model.predict(X_test_vec)
    
    print("=== External Test Set Evaluation ===")
    
    print(classification_report(y_test, y_pred))

# === Run Evaluation ===
test_json_path = "./Data/fallacy_test_set.json"
evaluate_external_test(model, vectorizer, test_json_path)


=== External Test Set Evaluation ===
                   precision    recall  f1-score   support

       ad_hominem       0.00      0.00      0.00        10
appeal_to_emotion       1.00      0.11      0.20         9
      false_cause       0.00      0.00      0.00        10
   slippery_slope       0.22      1.00      0.37        11
        straw_man       0.00      0.00      0.00        10

         accuracy                           0.24        50
        macro avg       0.24      0.22      0.11        50
     weighted avg       0.23      0.24      0.12        50



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
