In [10]:
import pandas as pd
import torch
import nltk
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from transformers import (
    BertTokenizer,
    BertForSequenceClassification,
    get_scheduler,
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from nltk.tokenize import sent_tokenize


In [11]:
df = pd.read_csv("lardelli_and_gpt_data.csv")

In [12]:
class BiasDataset(Dataset):
    def __init__(self, dataframe, tokenizer):
        self.data = dataframe
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        english = self.data.iloc[idx]["english"]
        german = self.data.iloc[idx]["german"]
        label = int(self.data.iloc[idx]["label"])
        text_pair = english + " [SEP] " + german
        inputs = self.tokenizer(
            text_pair,
            padding="max_length",
            truncation=True,
            max_length=128,
            return_tensors="pt",
        )
        item = {key: val.squeeze(0) for key, val in inputs.items()}
        item["labels"] = torch.tensor(label)
        return item

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained("bert-base-multilingual-cased")
model = BertForSequenceClassification.from_pretrained(
    "bert-base-multilingual-cased", num_labels=2
)
model.to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1

In [14]:
nltk.download('punkt')

def split_and_predict(text, tokenizer, model, device):
    sentences = sent_tokenize(text)
    results = []

    model.eval()
    with torch.no_grad():
        for sent in sentences:
            inputs = tokenizer(sent, return_tensors="pt", truncation=True, padding=True).to(device)
            outputs = model(**inputs)
            logits = outputs.logits
            predicted_label = torch.argmax(logits, dim=1).item()
            results.append((sent, predicted_label))

    return results

[nltk_data] Downloading package punkt to /Users/khali/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [15]:
# Freeze first 10 layers of BERT, only train last 2 layers + classifier
trainable_layers = ["encoder.layer.10", "encoder.layer.11", "pooler", "classifier"]
for name, param in model.named_parameters():
    param.requires_grad = any(layer in name for layer in trainable_layers)

In [16]:
train_df, val_df = train_test_split(df, test_size=0.1, random_state=10)
train_dataset = BiasDataset(train_df, tokenizer)
val_dataset = BiasDataset(val_df, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

In [17]:
optimizer = AdamW(model.parameters(), lr=2e-5)
num_training_steps = len(train_loader) * 3
lr_scheduler = get_scheduler(
    "linear", optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps,
)

In [18]:
model.train()
for epoch in range(3):
    total_loss = 0
    num_batches = 0

    for batch in train_loader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss

        total_loss += loss.item()
        num_batches += 1

        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

    avg_train_loss = total_loss / num_batches
    print(f"Epoch {epoch+1}: Avg Train Loss = {avg_train_loss:.4f}")

    # Validation
    model.eval()
    val_loss = 0
    val_batches = 0

    with torch.no_grad():
        for batch in val_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            outputs = model(**batch)
            val_loss += outputs.loss.item()
            val_batches += 1

    avg_val_loss = val_loss / val_batches
    print(f"Epoch {epoch+1}: Avg Val Loss   = {avg_val_loss:.4f}")
    model.train()


Epoch 1: Avg Train Loss = 0.6833
Epoch 1: Avg Val Loss   = 0.6526
Epoch 2: Avg Train Loss = 0.5348
Epoch 2: Avg Val Loss   = 0.4064
Epoch 3: Avg Train Loss = 0.3558
Epoch 3: Avg Val Loss   = 0.3003


In [19]:
# Save model and tokenizer
output_dir = "./model_output"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"Model and tokenizer saved to {output_dir}")

Model and tokenizer saved to ./model_output


In [21]:
model_path = "./model_output"
model = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)
model.to(device)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1

In [22]:
val_dataset = BiasDataset(val_df, tokenizer)
val_loader = DataLoader(val_dataset, batch_size=4)

In [23]:
def evaluate_model(model, data_loader):
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in data_loader:
            labels = batch["labels"].to(device)
            inputs = {k: v.to(device) for k, v in batch.items() if k != "labels"}
            outputs = model(**inputs)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)

            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

    accuracy = accuracy_score(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average="binary")
    return accuracy, precision, recall, f1


In [24]:
accuracy, precision, recall, f1 = evaluate_model(model, val_loader)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.8958
Precision: 0.8421
Recall: 0.8889
F1 Score: 0.8649


In [31]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load model and tokenizer
tokenizer = BertTokenizer.from_pretrained("./model_output")
model = BertForSequenceClassification.from_pretrained("./model_output")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Test sentence pairs
examples = [
    ("The nurse is kind", "Die Krankenschwester ist freundlich"),
    ("The nurse is kind", "Die Pflegekraft ist freundlich"),
    ("The doctor is strong", "Der Arzt ist stark"),
    ("Hello, how are you?", "Hallo, wie geht es dir?"),
    ("The teacher is a caring", "Der Lehrer ist fürsorglich"),
    ("The engineer works a lot", "Der Ingenieur arbeitet sehr viel"),
    ("The teacher is smart", "Die Lehrkraft ist schlau"),
]

for en, de in examples:
    text_pair = en + " [SEP] " + de
    inputs = tokenizer(text_pair, return_tensors="pt", truncation=True, padding=True, max_length=128)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs).item()
        confidence = probs[0][pred].item()

    if pred == 1 and confidence >= 0.7:
        result = "Biased"
    else:
        result = "Neutral"

    print(f"EN: {en}")
    print(f"DE: {de}")
    print(f"Prediction: {result} (Confidence: {confidence:.2f})")
    print("-" * 50)


EN: The nurse is kind
DE: Die Krankenschwester ist freundlich
Prediction: Neutral (Confidence: 0.62)
--------------------------------------------------
EN: The nurse is kind
DE: Die Pflegekraft ist freundlich
Prediction: Neutral (Confidence: 0.59)
--------------------------------------------------
EN: The doctor is strong
DE: Der Arzt ist stark
Prediction: Biased (Confidence: 0.84)
--------------------------------------------------
EN: Hello, how are you?
DE: Hallo, wie geht es dir?
Prediction: Neutral (Confidence: 0.67)
--------------------------------------------------
EN: The teacher is a caring
DE: Der Lehrer ist fürsorglich
Prediction: Biased (Confidence: 0.86)
--------------------------------------------------
EN: The engineer works a lot
DE: Der Ingenieur arbeitet sehr viel
Prediction: Biased (Confidence: 0.88)
--------------------------------------------------
EN: The teacher is smart
DE: Die Lehrkraft ist schlau
Prediction: Neutral (Confidence: 0.52)
--------------------------