## Stacking the Deck

In [1]:
pip install torch transformers scikit-learn nltk


Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Step 1: Load dataset
df = pd.read_csv("stacking_deck_fallacy_dataset.csv")  # Make sure this path is correct
texts = df["text"].tolist()
labels_raw = df["label"].tolist()

# Step 2: Encode labels (e.g., stacking_deck -> 1, others -> 0 or use full 3-class)
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels_raw)  # This will handle 3-class automatically

# Step 3: Tokenize
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt")

input_ids = encodings["input_ids"]
attention_mask = encodings["attention_mask"]
labels_tensor = torch.tensor(labels)

# Step 4: Prepare Dataset
dataset = TensorDataset(input_ids, attention_mask, labels_tensor)
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
train_loader = DataLoader(train_data, batch_size=8, shuffle=True)
test_loader = DataLoader(test_data, batch_size=8)

# Step 5: Load BERT Model
num_labels = len(set(labels))  # Handle multi-class automatically
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)

# Step 6: Train
model.train()
for epoch in range(3):
    for batch in train_loader:
        b_input_ids, b_input_mask, b_labels = batch
        outputs = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

# Step 7: Evaluate
model.eval()
predictions, true_labels = [], []

with torch.no_grad():
    for batch in test_loader:
        b_input_ids, b_input_mask, b_labels = batch
        outputs = model(b_input_ids, attention_mask=b_input_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=1)
        predictions.extend(preds.tolist())
        true_labels.extend(b_labels.tolist())

# Step 8: Report
print("Label mapping:", dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))))
print(classification_report(true_labels, predictions, target_names=label_encoder.classes_))


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Label mapping: {'healthy_argument': 0, 'non_argument': 1, 'stacking_deck': 2}
                  precision    recall  f1-score   support

healthy_argument       1.00      1.00      1.00        33
    non_argument       1.00      1.00      1.00        21
   stacking_deck       1.00      1.00      1.00        30

        accuracy                           1.00        84
       macro avg       1.00      1.00      1.00        84
    weighted avg       1.00      1.00      1.00        84



In [5]:
def predict_stacking_deck(text):
    encoding = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
    input_ids = encoding["input_ids"]
    attention_mask = encoding["attention_mask"]
    
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()
    
    return "Stacking the Deck" if prediction == 1 else "Good Argument or Non-Argument"

# Test
print(predict_stacking_deck("You’ve only heard the good things because the bad stuff isn’t worth discussing."))


Good Argument or Non-Argument
