# Initial Toxicity predictions with RoBERTa
### Running times are a main concern for later expanding how much data we use, but for now will use this as basis for building some bias detection 

In [2]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import re
from tqdm import tqdm
from transformers import RobertaTokenizer, RobertaModel, RobertaForSequenceClassification
import torch
import torch.nn.functional as f
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

import dataset

In [4]:
%pip install contractions

Note: you may need to restart the kernel to use updated packages.


In [3]:

df = pd.read_csv('all_data.csv')


In [6]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=1)  # Regression task

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
# Preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    return text



In [5]:
df['comment_text'] = df['comment_text'].fillna('') # Remove NaN values
df['cleaned_comment'] = df['comment_text'].apply(clean_text) 

In [8]:
df['target'] = df['toxicity'] # Mark target col
data = dataset.split_dataframe(df)

# Assign dfs
traindf = data[0]
valdf = data[1]

29992 159782


# Skip From here

In [11]:
# Tokenize with Roberta

# do not remove stopwords (for more context for Roberta)
Xtrain_r = list(traindf['cleaned_comment'])
ytrain_r = list(traindf['toxicity'])
Xtest_r = list(valdf['cleaned_comment'])
ytest_r = list(valdf['toxicity'])


# Downsize dataset for reasonable runtimes
Xtrain_r = Xtrain_r[:25000]
Xtest_r = Xtest_r[:2500]
ytrain_r = ytrain_r[:25000]
ytest_r = ytest_r[:2500]

Xtrain_encodings = tokenizer(Xtrain_r, truncation=True, padding=True, max_length=200, return_tensors='pt') # choosing max length of comment
Xtest_encodings = tokenizer(Xtest_r, truncation=True, padding=True, max_length=200, return_tensors='pt')


In [12]:
# Convert to tensors to prepare for dataloader 
ytrain_tensor = torch.tensor(ytrain_r, dtype=torch.float)
ytest_tensor = torch.tensor(ytest_r, dtype=torch.float)

In [13]:
Xtraintorch = torch.utils.data.TensorDataset(Xtrain_encodings['input_ids'], Xtrain_encodings['attention_mask'], ytrain_tensor)
Xtesttorch = torch.utils.data.TensorDataset(Xtest_encodings['input_ids'], Xtest_encodings['attention_mask'], ytest_tensor)
# Try different batch size to reduce running time
train_dataloader = DataLoader(Xtraintorch, batch_size=128, shuffle=True)
test_dataloader = DataLoader(Xtesttorch, batch_size=128, shuffle=False)

### With batch size 8, 1000 rows of training data, 1 epoch: 10 min running time to train w Roberta

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
roberta = roberta.to(device)


In [15]:
optimizer = torch.optim.AdamW(roberta.parameters(), lr=1e-5)
roberta.train()
## 
for epoch in range(5):  #Testing with 10 initially 
    for batch in tqdm(train_dataloader):
        optimizer.zero_grad()  # Clear previous gradients
        
        # Assign input data and labels from batch
        input_ids = batch[0].to(device)
        attention_mask = batch[1].to(device)
        labels = batch[2].to(device)
        
        # Forward pass: Compute predictions and loss
        outputs = roberta(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()  # backpropagate loss
        optimizer.step() 
        
    print(f"Epoch {epoch + 1}: Loss {loss.item()}")

 35%|███▍      | 68/196 [00:56<01:46,  1.20it/s]


KeyboardInterrupt: 

In [16]:
def evaluate_model(model, test_dataloader):
    ''' Evaluate model on test data using same framework as the training loop
    Params: model: torch.nn.Module, test_dataloader: torch.dataloader 
    Output: tuple: pred (list) of predicted toxicity scores for the test data, actual (list) of true scores
    '''
    model.eval()
    pred = []
    actual = []
    
    with torch.no_grad(): # no gradient calculation for faster running 
        for batch in tqdm(test_dataloader):
            # Get the input data and labels from the batch
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)
            
            # Forward pass: Compute predictions
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            logits = outputs.logits

            pred.extend(logits.cpu().numpy())
            actual.extend(labels.cpu().numpy()) 

    return pred, actual
            
        

In [17]:
pred1, actual1 = evaluate_model(roberta, test_dataloader)

# Calculate MSE of roberta model, trained with batch size 8, 200 rows of test data, 1 epoch
mse1 = mean_squared_error(actual1, pred1)

100%|██████████| 20/20 [00:05<00:00,  3.66it/s]


In [None]:
print(f'Mean Squared Error for Roberta model using 1000/20 data split: {mse1}')

Mean Squared Error for Roberta model using 1000/20 data split: 0.06018800660967827


In [18]:
pred2, actual2 = evaluate_model(roberta, test_dataloader)

# Calculate MSE of roberta model, trained with batch size 128, 2500 rows of test data, 1 epoch
mse2 = mean_squared_error(actual2, pred2)

100%|██████████| 20/20 [00:05<00:00,  3.66it/s]


 #### Low MSE and Loss on Roberta toxicity predictions.

### Calculating disparate bias for comments that include identities:

## Run from here:

In [9]:
# Using smaller subset 
trainsubset = traindf[:25000]
valsubset = valdf[:2500]

In [10]:
# Treat NaN values in identity cols
identities = ['male', 'female', 'transgender',
       'other_gender', 'heterosexual', 'homosexual_gay_or_lesbian', 'bisexual',
       'other_sexual_orientation', 'christian', 'jewish', 'muslim', 'hindu',
       'buddhist', 'atheist', 'other_religion', 'black', 'white', 'asian',
       'latino', 'other_race_or_ethnicity', 'physical_disability',
       'intellectual_or_learning_disability', 'psychiatric_or_mental_illness',
       'other_disability']

trainsubset[identities] = trainsubset[identities].fillna(0.0)

valsubset[identities] = valsubset[identities].fillna(0.0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trainsubset[identities] = trainsubset[identities].fillna(0.0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valsubset[identities] = valsubset[identities].fillna(0.0)


## Skip from here:

In [21]:
# Save previous Roberta predictions to test for bias
valbias = valsubset.copy
valbias['predicted_toxicity'] = pred2


TypeError: 'method' object does not support item assignment

In [27]:
def count_bias(df, identities):
    scores = {}
    for id in identities:
        identities_and_toxicity = df[df[id] == 1]['predicted_toxicity']
        no_identities_and_toxicity = df[df[id] == 0]['predicted_toxicity']


        avg_with_id = identities_and_toxicity.mean()
        avg_no_id = no_identities_and_toxicity.mean()
        
        disparity = avg_with_id - avg_no_id 
        scores[id] = disparity

    return scores
    

In [28]:
detected_bias = count_bias(valbias, identities)
print("Bias Scores (Disparate Impact) per Identity:")
for identity, score in detected_bias.items():
    print(f"{identity}: {score}")
    

TypeError: 'method' object is not subscriptable

In [24]:
import matplotlib.pyplot as plt

In [25]:
#Plot disparate bias found with Roberta model
biasdf = pd.DataFrame.from_dict(detected_bias, orient='index', columns=['bias'])
biasdf = biasdf.dropna()

plt.figure(figsize=(12, 8))

biasdf.sort_values('bias', ascending=False).plot(kind='bar', legend=False, color='c')

plt.title("RoBERTa's Disparate Impact per Identity Group", fontsize=12)
plt.ylabel('Bias Score', fontsize=12)
plt.xticks(rotation=60, ha='right')
plt.tight_layout()
plt.show()


NameError: name 'detected_bias' is not defined

### Adjusting model to address bias 

# Run from here

In [11]:
def adversarialloss(preds, identities_col):
    ''' Calculate loss for the adversarial model'''
    return f.mse_loss(preds.flatten(), identities_col.flatten())

def biasloss(modelchoice, input_ids, attention_mask, labels, identities_col, bias_penalty_factor=0.5):
    '''Calculates loss based on model's toxicity prediction with the bias penalty 
     bias_penalty_factor: factor for how much to penalize bias '''
    logits = modelchoice(input_ids, attention_mask=attention_mask).logits
    loss = f.mse_loss(logits.flatten(), labels)
    # Adversarial loss
    adversary_logits = modelchoice(input_ids, attention_mask=attention_mask).logits  
    adversary_loss = adversarialloss(adversary_logits, identities_col)
    
    # Calculate with adversarial penalty
    total_loss = loss - bias_penalty_factor * adversary_loss
    
    return total_loss


In [12]:
# Train Roberta with bias penalty

def train_with_penalty(model, train_dataloader, optimizer, device, bias_penalty_factor=0.5):
    ''' Training roberta model with adversarial loss to address bias'''
    
    model.train()
    
    for epoch in range(5):
        for batch in tqdm(train_dataloader):
            optimizer.zero_grad()  # Clear gradients

            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            labels = batch[2].to(device)
            identities_col = batch[3].to(device)  
                        
            # Compute the loss with bias regularization
            loss = biasloss(model, input_ids, attention_mask, labels, identities_col, bias_penalty_factor)
            loss.backward() 
            optimizer.step()

    print(f"Epoch {epoch + 1}: Loss {loss.item()}")

### Apply bias mitigation with adversarial loss to data

In [15]:
Xtrain_r2 = list(trainsubset['cleaned_comment'])
ytrain_r2 = list(trainsubset['toxicity'])
Xtest_r2 = list(valsubset['cleaned_comment'])
ytest_r2 = list(valsubset['toxicity'])

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
roberta2 = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=1)  # Regression task

Xtrain_encodings = tokenizer(Xtrain_r2, truncation=True, padding=True, max_length=200, return_tensors='pt') # choosing max length of comment
Xtest_encodings = tokenizer(Xtest_r2, truncation=True, padding=True, max_length=200, return_tensors='pt')

# Convert to tensors to prepare for dataloader 
ytrain_tensor = torch.tensor(ytrain_r2, dtype=torch.float)
ytest_tensor = torch.tensor(ytest_r2, dtype=torch.float)

identities_col_train = torch.tensor(trainsubset[identities].values, dtype=torch.float)
identities_col_test = torch.tensor(valsubset[identities].values, dtype=torch.float)

Xtraintorch = TensorDataset(Xtrain_encodings['input_ids'], Xtrain_encodings['attention_mask'], ytrain_tensor, identities_col_train)
Xtesttorch = TensorDataset(Xtest_encodings['input_ids'], Xtest_encodings['attention_mask'], ytest_tensor, identities_col_test)
# Try different batch size to reduce running time
train_dataloader = DataLoader(Xtraintorch, batch_size=128, shuffle=True)
test_dataloader = DataLoader(Xtesttorch, batch_size=128, shuffle=False)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
roberta2 = roberta2.to(device)
optimizer = torch.optim.AdamW(roberta2.parameters(), lr=1e-5)

In [19]:
with torch.no_grad():
    torch.cuda.empty_cache()


In [21]:
train_with_penalty(roberta2, train_dataloader, optimizer, device, bias_penalty_factor=0.5)


  0%|          | 0/196 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 76.00 MiB. GPU 0 has a total capacity of 23.69 GiB of which 43.00 MiB is free. Process 970332 has 1.66 GiB memory in use. Including non-PyTorch memory, this process has 21.95 GiB memory in use. Of the allocated memory 21.56 GiB is allocated by PyTorch, and 89.62 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)