### Bert Base Cased ###

Train and test on ISHate dataset, then evlauate with the microaggressions dataset. Following example from previous assigments & notebooks.

In [None]:
!pip install -q transformers
!pip install -q torchinfo
!pip install -U -q datasets
!pip install -q evaluate

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from datasets import load_dataset, Dataset, DatasetDict
import pandas as pd
from sklearn.preprocessing import LabelEncoder

import numpy as np

import transformers
import evaluate

from torchinfo import summary

from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

splits = {
    'train': 'ishate_train.parquet.gzip',
    'validation': 'ishate_dev.parquet.gzip',
    'test': 'ishate_test.parquet.gzip'
}

df_train = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["train"])
df_val = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["validation"])
df_test = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["test"])
max_sequence_length = 128

# create DatasetDict
ishate_dataset = DatasetDict({
    "train": Dataset.from_pandas(df_train),
    "validation": Dataset.from_pandas(df_val),
    "test": Dataset.from_pandas(df_test)
})

# Encode labels, similar to how we did above for the CNN.
label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(ishate_dataset['train']['hateful_layer'])
y_val_encoded = label_encoder.transform(ishate_dataset['validation']['hateful_layer'])
y_test_encoded = label_encoder.transform(ishate_dataset['test']['hateful_layer'])
ishate_train_data = ishate_dataset['train'].add_column('label', y_train_encoded.tolist())
ishate_val_data = ishate_dataset['validation'].add_column('label', y_val_encoded.tolist())
ishate_test_data = ishate_dataset['test'].add_column('label', y_test_encoded.tolist())


def preprocess_data(data, tokenizer):
    # Ensure text is a list of strings
    text = data['cleaned_text']
    encoded = tokenizer.batch_encode_plus(
            text,
            max_length=max_sequence_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True
            # return_tensors="pt"
    )
    return encoded


metric = evaluate.load('accuracy')
f1  = evaluate.load("f1")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    return {
        **metric.compute(predictions=predictions, references=labels),
        "f1_macro:": f1.compute(predictions=predictions, references=labels, average="macro")["f1"],
    }

In [None]:
def fine_tune_classification_model(classification_model,
                                   tokenizer,
                                   train_data,
                                   dev_data,
                                   batch_size = 16,
                                   num_epochs = 2,
                                   learning_rate=2e-5):
    """
    Preprocess the data using the given tokenizer (we've give you the code for that part).
    Create the training arguments and trainer for the given model and data (write your code for that).
    Then train it.
    """

    preprocessed_train_data = train_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})
    preprocessed_dev_data = dev_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})

    # Referencing lesson 4 notebook & assignment 2 as an example:
    training_args = TrainingArguments(
      output_dir="bert_ishate",
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=num_epochs,
      learning_rate=learning_rate,
      eval_strategy="epoch",
      save_strategy="epoch",
      report_to='none',
      load_best_model_at_end = True,
      metric_for_best_model = "f1_macro",
      seed = 42
    )

    trainer = Trainer(
      model=classification_model,
      args=training_args,
      train_dataset=preprocessed_train_data,
      eval_dataset=preprocessed_dev_data,
      compute_metrics=compute_metrics
    )

    trainer.train()

    return trainer

In [None]:
model_checkpoint_name = "bert-base-cased"
bert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
bert_classification_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint_name, num_labels = 2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
bert_base_cased_trainer = fine_tune_classification_model(bert_classification_model, bert_tokenizer, ishate_train_data, ishate_val_data)

Map:   0%|          | 0/55023 [00:00<?, ? examples/s]

Map:   0%|          | 0/4367 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.1694,0.393125,0.856652,0.845382
2,0.0993,0.491586,0.86398,0.856049


In [None]:
preprocessed_test_data = ishate_test_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': bert_tokenizer})
predictions = bert_base_cased_trainer.predict(preprocessed_test_data)
preprocessed_test_pred = np.argmax(predictions.predictions, axis=1)
from sklearn.metrics import accuracy_score, classification_report

test_accuracy = accuracy_score(y_test_encoded, preprocessed_test_pred)
print(f"\nTest Accuracy: {test_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test_encoded, preprocessed_test_pred, target_names=label_encoder.classes_))

Map:   0%|          | 0/4368 [00:00<?, ? examples/s]


Test Accuracy: 0.8764

Classification Report:
              precision    recall  f1-score   support

          HS       0.83      0.86      0.84      1687
      Non-HS       0.91      0.89      0.90      2681

    accuracy                           0.88      4368
   macro avg       0.87      0.87      0.87      4368
weighted avg       0.88      0.88      0.88      4368



### Now train on iSarcasm Eval: ###


In [None]:
isarc_train_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/train_EN_iSarcasmEval.csv')
isarc_test_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/task_A_En_test.csv')
isarc_test_df.head()

isarc_train_cleaned_df = isarc_train_df.copy()
isarc_train_cleaned_df['cleaned_text'] = isarc_train_df['tweet']
isarc_train_cleaned_df['label'] = isarc_train_df['sarcastic']

isarc_test_cleaned_df = isarc_test_df.copy()
isarc_test_cleaned_df['cleaned_text'] = isarc_test_df['text']
isarc_test_cleaned_df['label'] = isarc_test_df['sarcastic']


from sklearn.model_selection import train_test_split
isarc_train_split, isarc_val_split = train_test_split(
    isarc_train_cleaned_df,
    test_size=0.2,
    random_state=42,
    stratify=isarc_train_cleaned_df['label'] if 'label' in isarc_train_cleaned_df.columns else None
)

isarc_train_split['cleaned_text'] = isarc_train_split['cleaned_text'].fillna('').astype(str)
isarc_val_split['cleaned_text'] = isarc_val_split['cleaned_text'].fillna('').astype(str)
isarc_test_cleaned_df['cleaned_text'] = isarc_test_cleaned_df['cleaned_text'].fillna('').astype(str)


isarc_train_dataset = Dataset.from_pandas(isarc_train_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_val_dataset = Dataset.from_pandas(isarc_val_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_test_dataset = Dataset.from_pandas(isarc_test_cleaned_df[['cleaned_text', 'label']].reset_index(drop=True))

sarcasm_trainer = fine_tune_classification_model(
    bert_base_cased_trainer.model,
    bert_tokenizer,
    isarc_train_dataset,
    isarc_val_dataset,
    batch_size=16,
    num_epochs=2,
   learning_rate=1e-5
)

Map:   0%|          | 0/2774 [00:00<?, ? examples/s]

Map:   0%|          | 0/694 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.565151,0.75072
2,No log,0.566316,0.74928


In [None]:
# Check that the model can still detect hate speech, even after training on sarcasm
predictions_after_sarc = sarcasm_trainer.predict(preprocessed_test_data)
y_test_pred_after = np.argmax(predictions_after_sarc.predictions, axis=1)
test_accuracy_after = accuracy_score(y_test_encoded, y_test_pred_after)

print(f"\nISHate Test Performance:")
print(f"  Before sarcasm training: {test_accuracy:.4f}")
print(f"  After sarcasm training:  {test_accuracy_after:.4f}")
print(f"  Change:                  {(test_accuracy_after - test_accuracy):+.4f}")



ISHate Test Performance:
  Before sarcasm training: 0.8764
  After sarcasm training:  0.4412
  Change:                  -0.4352


### Microaggressions Evaluation:
Followed same initial load and format steps as Carlos, but tokenizer here is different.

In [None]:
micro_agg_url = "https://huggingface.co/spaces/khanak27/microaggressionsdetector/resolve/main/micro_agg.csv"
# Try different encodings to handle Unicode issues
encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'utf-8-sig']

df_micro = None
for encoding in encodings_to_try:
    try:
        print(f"Trying encoding: {encoding}")
        df_micro = pd.read_csv(micro_agg_url, encoding=encoding)
        print(f"✅ Successfully loaded with {encoding} encoding")
        break
    except UnicodeDecodeError as e:
        print(f"❌ Failed with {encoding}: {str(e)[:100]}...")
        continue
    except Exception as e:
        print(f"❌ Other error with {encoding}: {str(e)[:100]}...")
        continue

if df_micro is None:
    print("❌ Failed to load dataset with any encoding. Trying with error handling...")
    try:
        df_micro = pd.read_csv(micro_agg_url, encoding='utf-8', encoding_errors='replace')
        print("✅ Loaded with UTF-8 and error replacement")
    except Exception as e:
        print(f"❌ Final attempt failed: {e}")
        raise

print(f"Dataset shape: {df_micro.shape}")
print(f"Columns: {df_micro.columns.tolist()}")
print(f"\nFirst few rows:")
display(df_micro.head(10))

print(f"\nData types:")
print(df_micro.dtypes)

print(f"\nMissing values:")
print(df_micro.isnull().sum())

print(f"\nLabel distribution:")
print(df_micro['label'].value_counts().sort_index())

# Check for any text preprocessing needed
print(f"\nSample texts:")
for i in range(3):
    print(f"{i+1}. Label {df_micro.iloc[i]['label']}: {df_micro.iloc[i]['speech']}")



df_micro['cleaned_text'] = df_micro['speech']
def map_micro_labels_to_hate_speech(micro_label):
    """Map microaggression labels to hate speech labels"""
    if micro_label == 1:  # Microaggression
        return 'HS'  # Map to hateful speech
    else:  # Normal speech
        return 'Non-HS'  # Map to non-hateful speech

# Apply the mapping
df_micro['mapped_label'] = df_micro['label'].apply(map_micro_labels_to_hate_speech)
y_micro_encoded = label_encoder.transform(df_micro['mapped_label'])


Trying encoding: utf-8
❌ Failed with utf-8: 'utf-8' codec can't decode byte 0xe2 in position 17: invalid continuation byte...
Trying encoding: latin-1
✅ Successfully loaded with latin-1 encoding
Dataset shape: (171, 2)
Columns: ['speech', 'label']

First few rows:


Unnamed: 0,speech,label
0,You're very articulate for someone like you.,1
1,Where are you really from?,1
2,You're not like other girls.,1
3,You must be good at math since you're Asian.,1
4,You're too pretty to be a software engineer.,1
5,Are you sure you want to lead this project?,1
6,You don't look gay.,1
7,You are a credit to your race.,1
8,"That's a strange name, is it foreign?",1
9,Do you even understand this topic?,1



Data types:
speech    object
label      int64
dtype: object

Missing values:
speech    0
label     0
dtype: int64

Label distribution:
label
0    87
1    84
Name: count, dtype: int64

Sample texts:
1. Label 1: You're very articulate for someone like you.
2. Label 1: Where are you really from?
3. Label 1: You're not like other girls.


In [None]:
microaggressions_dataset = Dataset.from_dict({
    'cleaned_text': df_micro['cleaned_text'].fillna('').tolist(),
    'label': y_micro_encoded.tolist()
})

preprocessed_microaggressions_data = microaggressions_dataset.map(
    preprocess_data,
    batched=True,
    fn_kwargs={'tokenizer': bert_tokenizer}
)

Map:   0%|          | 0/171 [00:00<?, ? examples/s]

In [None]:
micro_predictions_sequential = sarcasm_trainer.predict(preprocessed_microaggressions_data)
y_micro_pred_sequential = np.argmax(micro_predictions_sequential.predictions, axis=1)
micro_accuracy_sequential = accuracy_score(y_micro_encoded, y_micro_pred_sequential)

print(f"\n Sequential Model Accuracy: {micro_accuracy_sequential:.4f}")
print("\nClassification Report (Sequential):")
print(classification_report(
    y_micro_encoded,
    y_micro_pred_sequential,
    target_names=['Normal Speech', 'Microaggression']
))



 Sequential Model Accuracy: 0.5965

Classification Report (Sequential):
                 precision    recall  f1-score   support

  Normal Speech       0.56      0.81      0.66        84
Microaggression       0.68      0.39      0.50        87

       accuracy                           0.60       171
      macro avg       0.62      0.60      0.58       171
   weighted avg       0.62      0.60      0.58       171

