### Bert Base Cased ###

Train and test on ISHate dataset, then evlauate with the microaggressions dataset. Following example from previous assigments & notebooks.

In [None]:
!pip install -q transformers
!pip install -q torchinfo
!pip install -U -q datasets
!pip install -q evaluate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset, Dataset, DatasetDict
import pandas as pd
from sklearn.preprocessing import LabelEncoder

import numpy as np

import transformers
import evaluate

from torchinfo import summary

from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

splits = {
    'train': 'ishate_train.parquet.gzip',
    'validation': 'ishate_dev.parquet.gzip',
    'test': 'ishate_test.parquet.gzip'
}

df_train = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["train"])
df_val = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["validation"])
df_test = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["test"])
max_sequence_length = 128

# create DatasetDict
ishate_dataset = DatasetDict({
    "train": Dataset.from_pandas(df_train),
    "validation": Dataset.from_pandas(df_val),
    "test": Dataset.from_pandas(df_test)
})

# Encode labels, similar to how we did above for the CNN.
label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(ishate_dataset['train']['hateful_layer'])
y_val_encoded = label_encoder.transform(ishate_dataset['validation']['hateful_layer'])
y_test_encoded = label_encoder.transform(ishate_dataset['test']['hateful_layer'])
ishate_train_data = ishate_dataset['train'].add_column('label', y_train_encoded.tolist())
ishate_val_data = ishate_dataset['validation'].add_column('label', y_val_encoded.tolist())
ishate_test_data = ishate_dataset['test'].add_column('label', y_test_encoded.tolist())


def preprocess_data(data, tokenizer):
    # Ensure text is a list of strings
    text = data['cleaned_text']
    encoded = tokenizer.batch_encode_plus(
            text,
            max_length=max_sequence_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True
            # return_tensors="pt"
    )
    return encoded


metric = evaluate.load('accuracy')
f1  = evaluate.load("f1")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    return {
        **metric.compute(predictions=predictions, references=labels),
        "f1_macro": f1.compute(predictions=predictions, references=labels, average="macro")["f1"],
    }

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
def fine_tune_classification_model(classification_model,
                                   tokenizer,
                                   train_data,
                                   dev_data,
                                   batch_size = 16,
                                   num_epochs = 2,
                                   learning_rate=2e-5):
    """
    Preprocess the data using the given tokenizer (we've give you the code for that part).
    Create the training arguments and trainer for the given model and data (write your code for that).
    Then train it.
    """

    preprocessed_train_data = train_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})
    preprocessed_dev_data = dev_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})

    # Referencing lesson 4 notebook & assignment 2 as an example:
    training_args = TrainingArguments(
      output_dir="bert_ishate",
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=num_epochs,
      learning_rate=learning_rate,
      eval_strategy="epoch",
      save_strategy="epoch",
      report_to='none',
      load_best_model_at_end = True,
      metric_for_best_model = "f1_macro",
      seed = 42
    )

    trainer = Trainer(
      model=classification_model,
      args=training_args,
      train_dataset=preprocessed_train_data,
      eval_dataset=preprocessed_dev_data,
      compute_metrics=compute_metrics
    )

    trainer.train()

    return trainer

In [None]:
model_checkpoint_name = "bert-base-cased"
bert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
bert_classification_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint_name, num_labels = 2)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
bert_base_cased_trainer = fine_tune_classification_model(bert_classification_model, bert_tokenizer, ishate_train_data, ishate_val_data)

Map:   0%|          | 0/55023 [00:00<?, ? examples/s]

Map:   0%|          | 0/4367 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.1684,0.378137,0.855049,0.845229
2,0.1007,0.481225,0.862606,0.85476


In [None]:
preprocessed_test_data = ishate_test_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': bert_tokenizer})
predictions = bert_base_cased_trainer.predict(preprocessed_test_data)
preprocessed_test_pred = np.argmax(predictions.predictions, axis=1)
from sklearn.metrics import accuracy_score, classification_report

test_accuracy = accuracy_score(y_test_encoded, preprocessed_test_pred)
print(f"\nTest Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_encoded, preprocessed_test_pred, target_names=label_encoder.classes_))

Map:   0%|          | 0/4368 [00:00<?, ? examples/s]


Test Accuracy: 0.8686

Classification Report:
              precision    recall  f1-score   support

          HS       0.83      0.83      0.83      1687
      Non-HS       0.89      0.89      0.89      2681

    accuracy                           0.87      4368
   macro avg       0.86      0.86      0.86      4368
weighted avg       0.87      0.87      0.87      4368



In [None]:
from datasets import load_from_disk
balanced_selfMA_ds = load_from_disk("/content/drive/MyDrive/266_project/balanced_selfMA_ds")
balanced_selfMA_ds = balanced_selfMA_ds.rename_column('text', 'cleaned_text')

preprocessed_microagg_test = balanced_selfMA_ds['test'].map(
    preprocess_data,
    batched=True,
    fn_kwargs={'tokenizer': bert_tokenizer}
)

microagg_predictions = bert_base_cased_trainer.predict(preprocessed_microagg_test)
y_microagg_pred = np.argmax(microagg_predictions.predictions, axis=1)
y_microagg_test = balanced_selfMA_ds['test']['label']

microagg_accuracy = accuracy_score(y_microagg_test, y_microagg_pred)

print(f"\nMicroaggressions Test Accuracy (Sequential Training): {microagg_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(
    y_microagg_test,
    y_microagg_pred,
    target_names=['Non-Microaggression', 'Microaggression']
))


Microaggressions Test Accuracy (Sequential Training): 0.2766

Classification Report:
                     precision    recall  f1-score   support

Non-Microaggression       0.16      0.11      0.13       273
    Microaggression       0.33      0.45      0.38       273

           accuracy                           0.28       546
          macro avg       0.25      0.28      0.25       546
       weighted avg       0.25      0.28      0.25       546



### Now train on iSarcasm Eval: ###


In [None]:
isarc_train_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/train_EN_iSarcasmEval.csv')
isarc_test_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/task_A_En_test.csv')
isarc_test_df.head()

isarc_train_cleaned_df = isarc_train_df.copy()
isarc_train_cleaned_df['cleaned_text'] = isarc_train_df['tweet']
isarc_train_cleaned_df['label'] = isarc_train_df['sarcastic']

isarc_test_cleaned_df = isarc_test_df.copy()
isarc_test_cleaned_df['cleaned_text'] = isarc_test_df['text']
isarc_test_cleaned_df['label'] = isarc_test_df['sarcastic']


from sklearn.model_selection import train_test_split
isarc_train_split, isarc_val_split = train_test_split(
    isarc_train_cleaned_df,
    test_size=0.2,
    random_state=42,
    stratify=isarc_train_cleaned_df['label'] if 'label' in isarc_train_cleaned_df.columns else None
)

isarc_train_split['cleaned_text'] = isarc_train_split['cleaned_text'].fillna('').astype(str)
isarc_val_split['cleaned_text'] = isarc_val_split['cleaned_text'].fillna('').astype(str)
isarc_test_cleaned_df['cleaned_text'] = isarc_test_cleaned_df['cleaned_text'].fillna('').astype(str)


isarc_train_dataset = Dataset.from_pandas(isarc_train_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_val_dataset = Dataset.from_pandas(isarc_val_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_test_dataset = Dataset.from_pandas(isarc_test_cleaned_df[['cleaned_text', 'label']].reset_index(drop=True))

sarcasm_trainer = fine_tune_classification_model(
    bert_base_cased_trainer.model,
    bert_tokenizer,
    isarc_train_dataset,
    isarc_val_dataset,
    batch_size=16,
    num_epochs=2,
   learning_rate=1e-5
)

Map:   0%|          | 0/2774 [00:00<?, ? examples/s]

Map:   0%|          | 0/694 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,0.562499,0.75072,0.428807
2,No log,0.562181,0.75072,0.428807


In [None]:
# Check that the model can still detect hate speech, even after training on sarcasm
predictions_after_sarc = sarcasm_trainer.predict(preprocessed_test_data)
y_test_pred_after = np.argmax(predictions_after_sarc.predictions, axis=1)
test_accuracy_after = accuracy_score(y_test_encoded, y_test_pred_after)

print(f"\nISHate Test Performance:")
print(f"  Before sarcasm training: {test_accuracy:.4f}")
print(f"  After sarcasm training:  {test_accuracy_after:.4f}")
print(f"  Change:                  {(test_accuracy_after - test_accuracy):+.4f}")



ISHate Test Performance:
  Before sarcasm training: 0.8686
  After sarcasm training:  0.4018
  Change:                  -0.4668


In [None]:
from datasets import load_from_disk
balanced_selfMA_ds = load_from_disk("/content/drive/MyDrive/266_project/balanced_selfMA_ds")
balanced_selfMA_ds = balanced_selfMA_ds.rename_column('text', 'cleaned_text')

preprocessed_microagg_test = balanced_selfMA_ds['test'].map(
    preprocess_data,
    batched=True,
    fn_kwargs={'tokenizer': bert_tokenizer}
)

microagg_predictions = sarcasm_trainer.predict(preprocessed_microagg_test)
y_microagg_pred = np.argmax(microagg_predictions.predictions, axis=1)
y_microagg_test = balanced_selfMA_ds['test']['label']

microagg_accuracy = accuracy_score(y_microagg_test, y_microagg_pred)

print(f"\nMicroaggressions Test Accuracy (Sequential Training): {microagg_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(
    y_microagg_test,
    y_microagg_pred,
    target_names=['Non-Microaggression', 'Microaggression']
))


Microaggressions Test Accuracy (Sequential Training): 0.4982

Classification Report:
                     precision    recall  f1-score   support

Non-Microaggression       0.50      1.00      0.67       273
    Microaggression       0.00      0.00      0.00       273

           accuracy                           0.50       546
          macro avg       0.25      0.50      0.33       546
       weighted avg       0.25      0.50      0.33       546



### Microaggressions Evaluation:
Now trying with the balanced selfMA dataset Carlos created.


In [None]:
from datasets import load_from_disk
balanced_selfMA_ds = load_from_disk("/content/drive/MyDrive/266_project/balanced_selfMA_ds")
balanced_selfMA_ds = balanced_selfMA_ds.rename_column('text', 'cleaned_text')


microagg_trainer = fine_tune_classification_model(
    sarcasm_trainer.model,  # Continue from the sarcasm model
    bert_tokenizer,
    balanced_selfMA_ds['train'],
    balanced_selfMA_ds['validation'],
    batch_size=16,
    num_epochs=3,
    learning_rate=1e-5
)

Map:   0%|          | 0/4354 [00:00<?, ? examples/s]

Map:   0%|          | 0/544 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,0.153605,0.959559,0.959539
2,0.121800,0.110443,0.972426,0.972426
3,0.121800,0.112901,0.970588,0.970588


In [None]:
preprocessed_microagg_test = balanced_selfMA_ds['test'].map(
    preprocess_data,
    batched=True,
    fn_kwargs={'tokenizer': bert_tokenizer}
)

microagg_predictions = microagg_trainer.predict(preprocessed_microagg_test)
y_microagg_pred = np.argmax(microagg_predictions.predictions, axis=1)
y_microagg_test = balanced_selfMA_ds['test']['label']

microagg_accuracy = accuracy_score(y_microagg_test, y_microagg_pred)

print(f"\nMicroaggressions Test Accuracy (Sequential Training): {microagg_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(
    y_microagg_test,
    y_microagg_pred,
    target_names=['Non-Microaggression', 'Microaggression']
))

Map:   0%|          | 0/546 [00:00<?, ? examples/s]


Microaggressions Test Accuracy (Sequential Training): 0.9744

Classification Report:
                     precision    recall  f1-score   support

Non-Microaggression       0.98      0.97      0.97       273
    Microaggression       0.97      0.98      0.97       273

           accuracy                           0.97       546
          macro avg       0.97      0.97      0.97       546
       weighted avg       0.97      0.97      0.97       546

