In [12]:
!pip install scikit-learn nltk transformers torch



In [13]:
import nltk
from sklearn.model_selection import train_test_split
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

In [14]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/logan.ramos/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Synthetic Data (NOTE: pending professor approval) 

In [15]:
synthetic_examples = [
    "You don't know anything about politics, you're just a student.", 
    "That's a ridiculous statement, you're too young to understand.",
    "Your argument is invalid because you're just a dumb person.",
    "You don’t have the expertise to be making these kinds of claims.",
    "What do you know about this? You’re just a student with no real experience.",
    "You're too young to understand the complexities of this topic.",
    "You're just an idealist; you don’t know how the world really works.",
    "You can’t possibly know anything about this; you’ve never worked a real job.",
    "You're just upset because things aren't going your way.",
    "You wouldn’t understand; you’re just a part-time worker.",
    "You're too inexperienced to have a valid opinion on this.",
    "You’re just repeating what everyone else says; you don’t think for yourself.",
    "How can you argue with me? You’ve never been in this field.",
    "What do you know about this? You’ve never even had a leadership role.",
    "You can't possibly understand, you're just a teenager with no experience.",
    "You're too emotional to be taken seriously on this matter.",
    "You can’t contribute to this conversation; you're just a beginner.",
    "You don’t have enough life experience to understand what's really going on.",
    "You're just a fan of that person; your opinion is biased.",
    "You can’t speak with authority because you're not qualified.",
    "How could you possibly know? You haven’t even been through a real struggle.",
    "You're too inexperienced to be discussing this seriously.",
    "You don’t know what you’re talking about; you’ve never been in my shoes.",
    "You’re just a blogger; your opinion doesn’t count.",
    "You're too young to see the big picture.",
    "You’re just a dreamer, not a realist.",
    "What do you know? You’ve never held a real job.",
    "You're just saying that because you haven’t lived long enough."
]

synthetic_healthy_arguments = [
    "I understand your point, but I think the data shows something different.",
    "I see where you're coming from, but let's look at the facts before we make a decision.",
    "While I respect your perspective, I think we need to consider a wider range of views on this.",
    "That's an interesting point, but I believe the evidence suggests otherwise.",
    "I disagree with your conclusion, but let's explore the underlying reasoning together.",
    "I hear you, but from my experience, I believe the situation is more complex.",
    "I understand your concerns, and I think it’s important we weigh the pros and cons carefully.",
    "I appreciate your opinion, but I think it's crucial to also examine the long-term effects.",
    "You raise a valid point, but we also need to account for other factors in this situation.",
    "I think it’s important to keep an open mind, as there may be other perspectives to consider.",
    "I see what you’re saying, but have you thought about the potential downsides?",
    "I get that you’re frustrated, but let’s try to focus on finding a solution.",
    "I agree that this is an issue, but I think we should look at all sides before we act.",
    "I see your concerns, but I think we need to gather more information first.",
    "That’s a good point, but I think we need to look at this in a broader context.",
    "I understand your perspective, but the data we have doesn't fully support that view.",
    "You're right to be concerned, but I think there are other strategies we should also consider.",
    "I respect your opinion, but let’s take a closer look at the facts before making any conclusions.",
    "I agree that this could be an issue, but I think we need to address it in a more systematic way.",
    "I understand your viewpoint, but I think we need to evaluate the situation more thoroughly.",
    "I appreciate your viewpoint, but let’s also consider the other possible outcomes.",
    "I see your point, but we need to factor in other variables that might affect the outcome.",
    "I understand your frustration, but let’s focus on finding a constructive solution.",
    "I see where you're coming from, but I think it's important to keep the bigger picture in mind.",
    "I understand the concern, but I think we need to approach this more methodically.",
    "I respect your perspective, but I think we need to question our assumptions here.",
    "That’s a valid argument, but we also need to look at the data from multiple angles.",
    "I agree with your sentiment, but I think it’s essential to test this hypothesis before making a decision.",
    "I see your point, but I think we should gather more evidence before drawing conclusions.",
    "I agree that there is an issue, but I think we need to identify the root cause first.",
    "You make a good argument, but I think we should consider how this will affect the long term.",
    "I agree with you on some aspects, but we should also factor in other variables.",
    "I see the merit in your argument, but I believe the situation is more nuanced than that.",
    "I understand the concern, but perhaps a different approach might be more effective.",
    "I see your point, but I think it's important to focus on solutions rather than dwelling on the problem.",
    "I respect your opinion, but I think it’s important to consider the broader implications.",
    "I agree that change is necessary, but we need to look at sustainable options.",
    "That’s a good idea, but I think it’s important to balance that with other priorities.",
    "I understand your frustration, but I think we need to take a step back and look at the bigger picture.",
    "I appreciate your thoughts, but I believe we need to focus on actionable steps.",
    "You bring up a valid concern, but let’s also think about the long-term benefits of this decision.",
    "I see the logic behind your argument, but let’s take a more balanced approach.",
    "I agree with you on several points, but I think it’s important to consider other options as well.",
    "That’s a compelling argument, but I think we should explore other alternatives.",
    "I understand your point of view, but I believe we need to look at the evidence before moving forward.",
    "I agree that something needs to change, but I think we need a plan that addresses all the underlying issues.",
    "You have a strong argument, but I think we should be cautious about the potential risks.",
    "I agree that this is a pressing issue, but let's also consider the potential consequences of this action.",
    "I understand your reasoning, but I think it’s important to consider the different perspectives involved.",
    "I respect your opinion, but I believe we need to dig deeper into the facts before making a final decision."
] 
synthetic_non_arguments = [
    "The weather today is very nice, with clear skies and a mild breeze.",
    "I enjoy listening to music in my free time.",
    "There’s a coffee shop near my house that I go to every weekend.",
    "I recently started reading a new book that seems really interesting.",
    "My favorite season of the year is autumn because of the colors of the leaves.",
    "I think I'll have pasta for dinner tonight.",
    "I like to go for walks in the park to relax.",
    "I noticed that the flowers in my garden are blooming beautifully this spring.",
    "I love watching documentaries on wildlife.",
    "My friend just adopted a dog, and it’s so cute."
] 
synthetic_non_examples = synthetic_non_arguments + synthetic_healthy_arguments

In [16]:
positive_labels = [1] * len(synthetic_examples)
negative_labels = [0] * len(synthetic_non_examples)
labels = [*positive_labels, *negative_labels]
texts = [*synthetic_examples, *synthetic_non_examples]
X_train, X_test, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)

In [17]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def encode_texts(texts):
    if isinstance(texts, str):
        texts = [texts]
    return tokenizer(texts, truncation=True, padding=True, max_length=512, return_tensors="pt")
train_encodings = encode_texts(X_train)
test_encodings = encode_texts(X_test)
train_labels = torch.tensor(y_train)
test_labels = torch.tensor(y_test)
train_dataset = TensorDataset(train_encodings.input_ids, train_encodings.attention_mask, train_labels)
test_dataset = TensorDataset(test_encodings.input_ids, test_encodings.attention_mask, test_labels)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

In [18]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
model.train()
epochs = 3
for epoch in range(epochs):
    for batch in train_loader:
        input_ids, attention_mask, labels = batch
        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{epochs} completed.")
model.save_pretrained("ad_hominem_model")
tokenizer.save_pretrained("ad_hominem_model")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3 completed.
Epoch 2/3 completed.
Epoch 3/3 completed.


('ad_hominem_model/tokenizer_config.json',
 'ad_hominem_model/special_tokens_map.json',
 'ad_hominem_model/vocab.txt',
 'ad_hominem_model/added_tokens.json')

In [19]:
model.eval()  # Set the model to evaluation mode
predictions, true_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        input_ids, attention_mask, labels = batch
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        predictions.extend(preds.cpu().numpy())
        true_labels.extend(labels.cpu().numpy())
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00         7

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18



In [21]:
def predict_ad_hominem(text):
    encoding = encode_texts([text])
    input_ids = encoding.input_ids
    attention_mask = encoding.attention_mask
    model.eval()
    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()
    return "Ad Hominem" if prediction == 1 else "good"
    
predict_ad_hominem("hello world")

'good'