In [None]:
!pip install -q transformers
!pip install -q torchinfo
!pip install -U -q datasets
!pip install -q evaluate

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from datasets import load_dataset, Dataset, DatasetDict
import pandas as pd
from sklearn.preprocessing import LabelEncoder

import numpy as np

import transformers
import evaluate

from torchinfo import summary

from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

splits = {
    'train': 'ishate_train.parquet.gzip',
    'validation': 'ishate_dev.parquet.gzip',
    'test': 'ishate_test.parquet.gzip'
}

df_train = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["train"])
df_val = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["validation"])
df_test = pd.read_parquet("hf://datasets/BenjaminOcampo/ISHate/" + splits["test"])
max_sequence_length = 128

# create DatasetDict
ishate_dataset = DatasetDict({
    "train": Dataset.from_pandas(df_train),
    "validation": Dataset.from_pandas(df_val),
    "test": Dataset.from_pandas(df_test)
})

# Encode labels, similar to how we did above for the CNN.
label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(ishate_dataset['train']['hateful_layer'])
y_val_encoded = label_encoder.transform(ishate_dataset['validation']['hateful_layer'])
y_test_encoded = label_encoder.transform(ishate_dataset['test']['hateful_layer'])
ishate_train_data = ishate_dataset['train'].add_column('label', y_train_encoded.tolist())
ishate_val_data = ishate_dataset['validation'].add_column('label', y_val_encoded.tolist())
ishate_test_data = ishate_dataset['test'].add_column('label', y_test_encoded.tolist())


def preprocess_data(data, tokenizer):
    # Ensure text is a list of strings
    text = data['cleaned_text']
    encoded = tokenizer.batch_encode_plus(
            text,
            max_length=max_sequence_length,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_token_type_ids=True
            # return_tensors="pt"
    )
    return encoded


metric = evaluate.load('accuracy')
f1  = evaluate.load("f1")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=1)
    return {
        **metric.compute(predictions=predictions, references=labels),
        "f1_macro": f1.compute(predictions=predictions, references=labels, average="macro")["f1"],
    }

Downloading builder script: 0.00B [00:00, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [None]:
def fine_tune_classification_model(classification_model,
                                   tokenizer,
                                   train_data,
                                   dev_data,
                                   batch_size = 16,
                                   num_epochs = 3,
                                   learning_rate=2e-5):
    """
    Preprocess the data using the given tokenizer (we've give you the code for that part).
    Create the training arguments and trainer for the given model and data (write your code for that).
    Then train it.
    """

    preprocessed_train_data = train_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})
    preprocessed_dev_data = dev_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': tokenizer})

    # Referencing lesson 4 notebook & assignment 2 as an example:
    training_args = TrainingArguments(
      output_dir="bert",
      per_device_train_batch_size=batch_size,
      per_device_eval_batch_size=batch_size,
      num_train_epochs=num_epochs,
      learning_rate=learning_rate,
      eval_strategy="epoch",
      save_strategy="epoch",
      report_to='none',
      load_best_model_at_end = True,
      metric_for_best_model = "f1_macro",
      seed = 42
    )

    trainer = Trainer(
      model=classification_model,
      args=training_args,
      train_dataset=preprocessed_train_data,
      eval_dataset=preprocessed_dev_data,
      compute_metrics=compute_metrics
    )

    trainer.train()

    return trainer

In [None]:
model_checkpoint_name = "bert-base-cased"
bert_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint_name)
bert_classification_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint_name, num_labels = 2)

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
ishate_trainer = fine_tune_classification_model(bert_classification_model, bert_tokenizer, ishate_train_data, ishate_val_data)

Map:   0%|          | 0/55023 [00:00<?, ? examples/s]

Map:   0%|          | 0/4367 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,0.1774,0.40165,0.855736,0.843283
2,0.1126,0.558222,0.860087,0.853995
3,0.0502,0.711765,0.861232,0.85486


In [None]:
preprocessed_ishate_test_data = ishate_test_data.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': bert_tokenizer})
predictions = ishate_trainer.predict(preprocessed_ishate_test_data)
preprocessed_ishate_test_pred = np.argmax(predictions.predictions, axis=1)
from sklearn.metrics import accuracy_score, classification_report

test_accuracy = accuracy_score(y_test_encoded, preprocessed_ishate_test_pred)
print(f"\nTest Accuracy on ISHate: {test_accuracy:.4f}")
print("\nClassification Report on ISHate:")
print(classification_report(y_test_encoded, preprocessed_ishate_test_pred, target_names=label_encoder.classes_))

Map:   0%|          | 0/4368 [00:00<?, ? examples/s]


Test Accuracy on ISHate: 0.8670

Classification Report on ISHate:
              precision    recall  f1-score   support

          HS       0.81      0.86      0.83      1687
      Non-HS       0.91      0.87      0.89      2681

    accuracy                           0.87      4368
   macro avg       0.86      0.87      0.86      4368
weighted avg       0.87      0.87      0.87      4368



### Load SelfMA Generated ###

In [None]:
from datasets import load_from_disk, Dataset
import random
import pandas as pd
import numpy as np
import evaluate
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer

train_df = pd.read_csv("/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/selfma_generated_combined_train_split.csv")
val_df = pd.read_csv("/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/selfma_generated_combined_validation_split.csv")
test_df = pd.read_csv("/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/selfma_generated_combined_test_split.csv")

def clean_dataframe(df):
    df = df[['text', 'label']]
    # Remove any missing values that could be throwing errors
    print(f"  Before dropping NaN: {len(df)} rows")
    df = df.dropna(subset=['text', 'label'])
    print(f"  After dropping NaN:  {len(df)} rows")

    df['text'] = df['text'].astype(str)
    df['label'] = df['label'].astype(int)

    df = df.rename(columns={'text': 'cleaned_text'})
    df = df.reset_index(drop=True)

    return df

train_df = clean_dataframe(train_df)
val_df = clean_dataframe(val_df)
test_df = clean_dataframe(test_df)

balanced_selfMA_generated_train = Dataset.from_pandas(train_df)
balanced_selfMA_generated_val = Dataset.from_pandas(val_df)
balanced_selfMA_generated_test = Dataset.from_pandas(test_df)

random.seed(42)

  Before dropping NaN: 2080 rows
  After dropping NaN:  2079 rows
  Before dropping NaN: 260 rows
  After dropping NaN:  260 rows
  Before dropping NaN: 260 rows
  After dropping NaN:  260 rows


### Evaluate on Generated SelfMA ###

In [None]:
preprocessed_selfMA_test_data = balanced_selfMA_generated_test.map(preprocess_data, batched=True, fn_kwargs={'tokenizer': bert_tokenizer})
predictions = ishate_trainer.predict(preprocessed_selfMA_test_data)
preprocessed_selfma_test_pred_bert_base = np.argmax(predictions.predictions, axis=1)
y_true_selfma_bert_base = balanced_selfMA_generated_test['label']
from sklearn.metrics import accuracy_score, classification_report

test_accuracy_bert_base = accuracy_score(y_true_selfma_bert_base, preprocessed_selfma_test_pred_bert_base)
print(f"\nTest Accuracy on Generated SelfMA (ISHate): {test_accuracy_bert_base:.4f}")
print("\nClassification Report on Generated SelfMA (ISHate):")
print(classification_report(y_true_selfma_bert_base, preprocessed_selfma_test_pred_bert_base, target_names=['0: Non-aggressive/normal text', '1: Microaggression']))

Map:   0%|          | 0/260 [00:00<?, ? examples/s]


Test Accuracy on Generated SelfMA (ISHate): 0.4269

Classification Report on Generated SelfMA (ISHate):
                               precision    recall  f1-score   support

0: Non-aggressive/normal text       0.21      0.05      0.09       130
           1: Microaggression       0.46      0.80      0.58       130

                     accuracy                           0.43       260
                    macro avg       0.34      0.43      0.33       260
                 weighted avg       0.34      0.43      0.33       260



### Load & Evaluate Workplace MA: ###

In [None]:
micro_agg_url = "https://huggingface.co/spaces/khanak27/microaggressionsdetector/resolve/main/micro_agg.csv"

# Try different encodings to handle Unicode issues
encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'utf-8-sig']

df_micro = None
for encoding in encodings_to_try:
    try:
        print(f"Trying encoding: {encoding}")
        df_micro = pd.read_csv(micro_agg_url, encoding=encoding)
        print(f"✅ Successfully loaded with {encoding} encoding")
        break
    except UnicodeDecodeError as e:
        print(f"❌ Failed with {encoding}: {str(e)[:100]}...")
        continue
    except Exception as e:
        print(f"❌ Other error with {encoding}: {str(e)[:100]}...")
        continue

if df_micro is None:
    print("❌ Failed to load dataset with any encoding. Trying with error handling...")
    try:
        df_micro = pd.read_csv(micro_agg_url, encoding='utf-8', encoding_errors='replace')
        print("✅ Loaded with UTF-8 and error replacement")
    except Exception as e:
        print(f"❌ Final attempt failed: {e}")
        raise

print(f"Dataset shape: {df_micro.shape}")
print(f"Columns: {df_micro.columns.tolist()}")
print(f"\nFirst few rows:")
display(df_micro.head(10))

print(f"\nData types:")
print(df_micro.dtypes)

print(f"\nMissing values:")
print(df_micro.isnull().sum())

print(f"\nLabel distribution:")
print(df_micro['label'].value_counts().sort_index())

# Check for any text preprocessing needed
print(f"\nSample texts:")
for i in range(3):
    print(f"{i+1}. Label {df_micro.iloc[i]['label']}: {df_micro.iloc[i]['speech']}")


df_micro['cleaned_text'] = df_micro['speech']
workplace_microaggressions_dataset = Dataset.from_dict({
    'cleaned_text': df_micro['cleaned_text'].fillna('').tolist(),
    'label': df_micro['label'].tolist()
})

preprocessed_workplace_microaggressions_data = workplace_microaggressions_dataset.map(
    preprocess_data,
    batched=True,
    fn_kwargs={'tokenizer': bert_tokenizer}
)

from sklearn.metrics import accuracy_score, classification_report

workplace_micro_predictions_after_ishate = ishate_trainer.predict(preprocessed_workplace_microaggressions_data)
y_workplace_micro_pred_after_ishate = np.argmax(workplace_micro_predictions_after_ishate.predictions, axis=1)
y_workplace_micro_true = df_micro['label'].tolist()
micro_accuracy_sequential = accuracy_score(y_workplace_micro_true, y_workplace_micro_pred_after_ishate)

print(f"\n= Test Accuracy on Workplace MA (ISHate): {micro_accuracy_sequential:.4f}")
print("\nClassification Report on Workplace MA (ISHate):")
print(classification_report(
    y_workplace_micro_true,
    y_workplace_micro_pred_after_ishate,
    target_names=['Normal Speech', 'Microaggression']
))


Trying encoding: utf-8
❌ Failed with utf-8: 'utf-8' codec can't decode byte 0xe2 in position 17: invalid continuation byte...
Trying encoding: latin-1
✅ Successfully loaded with latin-1 encoding
Dataset shape: (171, 2)
Columns: ['speech', 'label']

First few rows:


Unnamed: 0,speech,label
0,You're very articulate for someone like you.,1
1,Where are you really from?,1
2,You're not like other girls.,1
3,You must be good at math since you're Asian.,1
4,You're too pretty to be a software engineer.,1
5,Are you sure you want to lead this project?,1
6,You don't look gay.,1
7,You are a credit to your race.,1
8,"That's a strange name, is it foreign?",1
9,Do you even understand this topic?,1



Data types:
speech    object
label      int64
dtype: object

Missing values:
speech    0
label     0
dtype: int64

Label distribution:
label
0    87
1    84
Name: count, dtype: int64

Sample texts:
1. Label 1: You're very articulate for someone like you.
2. Label 1: Where are you really from?
3. Label 1: You're not like other girls.


Map:   0%|          | 0/171 [00:00<?, ? examples/s]


= Test Accuracy on Workplace MA (ISHate): 0.4620

Classification Report on Workplace MA (ISHate):
                 precision    recall  f1-score   support

  Normal Speech       0.00      0.00      0.00        87
Microaggression       0.48      0.94      0.63        84

       accuracy                           0.46       171
      macro avg       0.24      0.47      0.32       171
   weighted avg       0.23      0.46      0.31       171



In [None]:
isarc_train_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/train_EN_iSarcasmEval.csv')
isarc_test_df = pd.read_csv('/content/drive/MyDrive/W266_Fall2025_Neeha_Kotte/final_project/task_A_En_test.csv')
isarc_test_df.head()

isarc_train_cleaned_df = isarc_train_df.copy()
isarc_train_cleaned_df['cleaned_text'] = isarc_train_df['tweet']
isarc_train_cleaned_df['label'] = isarc_train_df['sarcastic']

isarc_test_cleaned_df = isarc_test_df.copy()
isarc_test_cleaned_df['cleaned_text'] = isarc_test_df['text']
isarc_test_cleaned_df['label'] = isarc_test_df['sarcastic']


from sklearn.model_selection import train_test_split
isarc_train_split, isarc_val_split = train_test_split(
    isarc_train_cleaned_df,
    test_size=0.2,
    random_state=42,
    stratify=isarc_train_cleaned_df['label'] if 'label' in isarc_train_cleaned_df.columns else None
)

isarc_train_split['cleaned_text'] = isarc_train_split['cleaned_text'].fillna('').astype(str)
isarc_val_split['cleaned_text'] = isarc_val_split['cleaned_text'].fillna('').astype(str)
isarc_test_cleaned_df['cleaned_text'] = isarc_test_cleaned_df['cleaned_text'].fillna('').astype(str)


isarc_train_dataset = Dataset.from_pandas(isarc_train_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_val_dataset = Dataset.from_pandas(isarc_val_split[['cleaned_text', 'label']].reset_index(drop=True))
isarc_test_dataset = Dataset.from_pandas(isarc_test_cleaned_df[['cleaned_text', 'label']].reset_index(drop=True))

sarcasm_trainer = fine_tune_classification_model(
    ishate_trainer.model,
    bert_tokenizer,
    isarc_train_dataset,
    isarc_val_dataset,
    batch_size=16,
    num_epochs=3,
   learning_rate=1e-5
)

Map:   0%|          | 0/2774 [00:00<?, ? examples/s]

Map:   0%|          | 0/694 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,0.56174,0.75072,0.434403
2,No log,0.561648,0.74928,0.4339
3,0.662100,0.569519,0.743516,0.447462


### Evaluate on Workplace MA ###

In [None]:
workplace_micro_predictions_after_sarc = sarcasm_trainer.predict(preprocessed_workplace_microaggressions_data)
y_workplace_micro_pred_after_sarc = np.argmax(workplace_micro_predictions_after_sarc.predictions, axis=1)

workplace_micro_accuracy_after_sarc = accuracy_score(y_workplace_micro_true, y_workplace_micro_pred_after_sarc)

print(f"\nWorkplace Microagg Accuracy (ISHate + Sarcasm): {workplace_micro_accuracy_after_sarc:.4f}")
print("\nClassification Report:")
print(classification_report(
    y_workplace_micro_true,
    y_workplace_micro_pred_after_sarc,
    target_names=['Normal Speech', 'Microaggression'],
    zero_division=0
))


Workplace Microagg Accuracy (ISHate + Sarcasm): 0.4035

Classification Report:
                 precision    recall  f1-score   support

  Normal Speech       0.43      0.52      0.47        87
Microaggression       0.36      0.29      0.32        84

       accuracy                           0.40       171
      macro avg       0.40      0.40      0.39       171
   weighted avg       0.40      0.40      0.40       171



### Evaluate on Generated SelfMA ###

In [None]:
predictions = sarcasm_trainer.predict(preprocessed_selfMA_test_data)
preprocessed_selfma_test_pred_after_sarc = np.argmax(predictions.predictions, axis=1)

test_selfma_accuracy_after_sarc = accuracy_score(y_true_selfma_bert_base, preprocessed_selfma_test_pred_after_sarc)
print(f"\n Generated SelfMA Test Accuracy (ISHate + Sarcasm): {test_selfma_accuracy_after_sarc:.4f}")
print("\nClassification Report:")
print(classification_report(y_true_selfma_bert_base, preprocessed_selfma_test_pred_after_sarc, target_names=['0: Non-aggressive/normal text', '1: Microaggression']))


 Generated SelfMA Test Accuracy (ISHate + Sarcasm): 0.4462

Classification Report:
                               precision    recall  f1-score   support

0: Non-aggressive/normal text       0.47      0.75      0.58       130
           1: Microaggression       0.36      0.14      0.20       130

                     accuracy                           0.45       260
                    macro avg       0.41      0.45      0.39       260
                 weighted avg       0.41      0.45      0.39       260



### Train on Generated SelfMA ###

In [None]:
generated_selfMA_trainer = fine_tune_classification_model(
    sarcasm_trainer.model,
    bert_tokenizer,
    balanced_selfMA_generated_train,
    balanced_selfMA_generated_val,
    batch_size=16,
    num_epochs=3,
    learning_rate=1e-5
)

Map:   0%|          | 0/2079 [00:00<?, ? examples/s]

Map:   0%|          | 0/260 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro
1,No log,0.286674,0.880769,0.880767
2,No log,0.294842,0.892308,0.891995
3,No log,0.299616,0.896154,0.896029


### Evaluate on Generated SelfMA ###

In [None]:
predictions = generated_selfMA_trainer.predict(preprocessed_selfMA_test_data)
preprocessed_selfma_test_pred_last = np.argmax(predictions.predictions, axis=1)

test_selfma_accuracy_last = accuracy_score(y_true_selfma_bert_base, preprocessed_selfma_test_pred_last)
print(f"\n Generated SelfMA Test Accuracy (ISHate + Sarcasm + Generated SelfMA): {test_selfma_accuracy_last:.4f}")
print("\nClassification Report:")
print(classification_report(y_true_selfma_bert_base, preprocessed_selfma_test_pred_last, target_names=['0: Non-aggressive/normal text', '1: Microaggression']))


 Generated SelfMA Test Accuracy (ISHate + Sarcasm + Generated SelfMA): 0.9192

Classification Report:
                               precision    recall  f1-score   support

0: Non-aggressive/normal text       0.90      0.94      0.92       130
           1: Microaggression       0.94      0.90      0.92       130

                     accuracy                           0.92       260
                    macro avg       0.92      0.92      0.92       260
                 weighted avg       0.92      0.92      0.92       260



### Evaluate on Workplace MA ###

In [None]:
workplace_micro_predictions_last = generated_selfMA_trainer.predict(preprocessed_workplace_microaggressions_data)
y_workplace_micro_pred_last = np.argmax(workplace_micro_predictions_last.predictions, axis=1)

workplace_micro_accuracy_last = accuracy_score(y_workplace_micro_true, y_workplace_micro_pred_last)

print(f"\nWorkplace Microagg Accuracy (ISHate + Sarcasm + Generated SelfMA): {workplace_micro_accuracy_last:.4f}")
print("\nClassification Report:")
print(classification_report(
    y_workplace_micro_true,
    y_workplace_micro_pred_last,
    target_names=['Normal Speech', 'Microaggression'],
    zero_division=0
))


Workplace Microagg Accuracy (ISHate + Sarcasm + Generated SelfMA): 0.7719

Classification Report:
                 precision    recall  f1-score   support

  Normal Speech       0.70      0.98      0.81        87
Microaggression       0.96      0.56      0.71        84

       accuracy                           0.77       171
      macro avg       0.83      0.77      0.76       171
   weighted avg       0.83      0.77      0.76       171

