#Installs & Imports

In [None]:
!pip install transformers torch pandas scikit-learn



In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments

import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Load and prepare data
df = pd.read_csv('/content/drive/MyDrive/data_sets/compas-scores-two-years.csv')

In [None]:
df = df[(df["days_b_screening_arrest"] <= 30)
        & (df["days_b_screening_arrest"] >= -30)
        & (df["is_recid"] != -1)
        & (df["c_charge_degree"] != 'O')
        & (df["score_text"] != 'N/A')].reset_index(drop=True)

In [None]:
cols_to_keep = ['sex','age', 'race', 'juv_fel_count', 'juv_misd_count', 'decile_score', 'juv_other_count', 'priors_count',
                'c_charge_degree', 'two_year_recid']


df_1 = df[cols_to_keep]
df_1

Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,decile_score,juv_other_count,priors_count,c_charge_degree,two_year_recid
0,Male,69,Other,0,0,1,0,0,F,0
1,Male,34,African-American,0,0,3,0,0,F,1
2,Male,24,African-American,0,0,4,1,4,F,1
3,Male,44,Other,0,0,1,0,0,M,0
4,Male,41,Caucasian,0,0,6,0,14,F,1
...,...,...,...,...,...,...,...,...,...,...
6167,Male,23,African-American,0,0,7,0,0,F,0
6168,Male,23,African-American,0,0,3,0,0,F,0
6169,Male,57,Other,0,0,1,0,0,F,0
6170,Female,33,African-American,0,0,2,0,3,M,0


In [None]:
#Filter df to include only Caucasian and African American
df_1 = df_1[df_1['race'].isin(['Caucasian', 'African-American'])]
df_1

Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,decile_score,juv_other_count,priors_count,c_charge_degree,two_year_recid
1,Male,34,African-American,0,0,3,0,0,F,1
2,Male,24,African-American,0,0,4,1,4,F,1
4,Male,41,Caucasian,0,0,6,0,14,F,1
6,Female,39,Caucasian,0,0,1,0,0,M,0
7,Male,27,Caucasian,0,0,4,0,0,F,0
...,...,...,...,...,...,...,...,...,...,...
6165,Male,30,African-American,0,0,2,0,0,M,1
6166,Male,20,African-American,0,0,9,0,0,F,0
6167,Male,23,African-American,0,0,7,0,0,F,0
6168,Male,23,African-American,0,0,3,0,0,F,0


#Splitting

In [None]:
#Convert df to a text format suitable for GPT-2
df_1['input_text'] = df_1.apply(lambda row: f"{row['sex']} {row['age']} {row['race']} {row['juv_fel_count']} {row['decile_score']} {row['juv_other_count']} {row['priors_count']} {row['c_charge_degree']} => {row['two_year_recid']}", axis=1)

#Split data into train and temp set
train_texts, temp_texts = train_test_split(df_1['input_text'], test_size=0.3, random_state=42)

#Split temp set into validation (15%) and test (15%)
val_texts, test_texts = train_test_split(temp_texts, test_size=0.5, random_state=42)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_1['input_text'] = df_1.apply(lambda row: f"{row['sex']} {row['age']} {row['race']} {row['juv_fel_count']} {row['decile_score']} {row['juv_other_count']} {row['priors_count']} {row['c_charge_degree']} => {row['two_year_recid']}", axis=1)


In [None]:
train_texts.head()

Unnamed: 0,input_text
902,Male 53 African-American 0 3 0 2 F => 0
618,Female 36 African-American 0 10 0 4 F => 1
4992,Female 56 Caucasian 0 1 0 3 F => 0
39,Male 29 African-American 0 7 0 0 F => 1
3831,Male 27 Caucasian 0 2 0 2 F => 0


In [None]:
val_texts.head()

Unnamed: 0,input_text
1944,Male 43 Caucasian 0 5 0 5 F => 0
2041,Male 55 African-American 0 4 0 1 F => 0
857,Male 31 African-American 0 10 0 0 F => 0
1116,Male 26 African-American 0 4 0 0 F => 1
3565,Male 27 Caucasian 0 7 0 2 F => 1


In [None]:
#Function to generate predictions
def generate_predictions(model, tokenizer, texts):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    generated_results = []
    for text in texts:
        #Split the input text at the '=>' and keep only the left side
        formatted_prompt = text.split('=>')[0].strip() + ' =>'

        #Tokenize and generate output
        encoding = tokenizer(
            formatted_prompt,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=50,
            return_attention_mask=True
        ).to(device)

        input_ids = encoding['input_ids']
        attention_mask = encoding['attention_mask']

        output = model.generate(
            input_ids,
            attention_mask=attention_mask,
            max_length=50,
            num_return_sequences=1,
            pad_token_id=tokenizer.eos_token_id
        )

        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
        generated_results.append(decoded_output)

    return generated_results

In [None]:
class RecidivismDataset(Dataset):
    def __init__(self, texts, tokenizer, max_length=128):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts.iloc[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt",
            padding="max_length",
        )
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()
        labels = input_ids.clone()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels  #labels for training
        }

#Initialise the tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token

model = GPT2LMHeadModel.from_pretrained('gpt2')

#Create datasets
train_dataset = RecidivismDataset(train_texts, tokenizer)
val_dataset = RecidivismDataset(val_texts, tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

#Training GPT-2 model

In [None]:
#Set up training args
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
)

#Create a Trainer instance
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
)

#Train model
trainer.train()



Epoch,Training Loss,Validation Loss
1,0.0854,0.083664
2,0.0796,0.080937
3,0.0795,0.079785


There were missing keys in the checkpoint model loaded: ['lm_head.weight'].


TrainOutput(global_step=2772, training_loss=0.19351468780340053, metrics={'train_runtime': 151.8044, 'train_samples_per_second': 73.002, 'train_steps_per_second': 18.26, 'total_flos': 723909574656000.0, 'train_loss': 0.19351468780340053, 'epoch': 3.0})

In [None]:
#Function to calculate specifc metrics
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)  #Get predicted class 0 or 1

    #Calculate accuracy
    accuracy = accuracy_score(labels, predictions)

    #Calculate precision, recall, and f1 score for binary classification
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')

    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

In [None]:
#Evaluate model on validation set
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics  #Passing custom metrics function
)

eval_results = trainer.evaluate()
print(eval_results)

#Model Performance Metrics

In [None]:
#Generate predictions for the validation set
val_texts_list = val_texts.tolist()  #Convert to list
generated_val_results = generate_predictions(model, tokenizer, val_texts_list)

#Prep df for validation results
val_texts_df = pd.DataFrame({
    'input_text': val_texts_list,  #Original input text
    'generated_output': generated_val_results  #Generated output
})

#Extract true labels from val set
true_labels = [int(text.split('=>')[-1].strip()) for text in val_texts_list]

#Extract predicted labels from generated outputs
predicted_labels = [int(result.split('=>')[-1].strip()) for result in generated_val_results]

#Calculate acc score for val set
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

#Classification report
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='binary')
print(f"Validation Precision: {precision:.2f}")
print(f"Validation Recall: {recall:.2f}")
print(f"Validation F1 Score: {f1:.2f}")

Validation Accuracy: 64.65%
Validation Precision: 0.64
Validation Recall: 0.60
Validation F1 Score: 0.62


In [None]:
#Predictions for the test set
test_texts_list = test_texts.tolist()
generated_test_results = generate_predictions(model, tokenizer, test_texts_list)

#Df for test results
test_texts_df = pd.DataFrame({
    'input_text': test_texts_list,  # Original input text
    'generated_output': generated_test_results  # Generated output
})

#lists to store true and predicted labels for test set
true_labels = [int(text.split('=>')[-1].strip()) for text in test_texts_list]
predicted_labels = [int(result.split('=>')[-1].strip()) for result in generated_test_results]

#Calculate accuracy score for test set
accuracy = accuracy_score(true_labels, predicted_labels)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

#classification report
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='binary')
print(f"Test Precision: {precision:.2f}")
print(f"Test Recall: {recall:.2f}")
print(f"Test F1 Score: {f1:.2f}")

                                input_text  \
0         Male 24 Caucasian 0 2 0 1 M => 0   
1  Male 20 African-American 1 3 0 1 F => 0   
2         Male 54 Caucasian 0 2 0 3 F => 0   
3       Female 51 Caucasian 0 5 0 5 F => 0   
4       Female 35 Caucasian 0 7 0 4 M => 0   

                          generated_output  
0         Male 24 Caucasian 0 2 0 1 M => 0  
1  Male 20 African-American 1 3 0 1 F => 1  
2         Male 54 Caucasian 0 2 0 3 F => 0  
3       Female 51 Caucasian 0 5 0 5 F => 0  
4       Female 35 Caucasian 0 7 0 4 M => 1  
Test Accuracy: 68.56%
Test Precision: 0.65
Test Recall: 0.66
Test F1 Score: 0.65


In [None]:
# Extract the indices of the test set
test_indices = test_texts.index

# Add 'race' and 'sex' columns to test_texts_df based on the test_indices
test_texts_df['race'] = df_1.loc[test_indices, 'race'].values
test_texts_df['sex'] = df_1.loc[test_indices, 'sex'].values

In [None]:
# Extract true labels from test set
test_texts_df['true_label'] = test_texts_df['input_text'].apply(lambda x: int(x.split('=>')[-1].strip()))
test_texts_df['predicted_label'] = test_texts_df['generated_output'].apply(lambda x: int(x.split('=>')[-1].strip()) if '=>' in x else 0)

#Fairness Metrics

In [None]:
# Calculate Demographic Parity and Difference
def demographic_parity(df, sensitive_feature, predicted_label):
    grouped = df.groupby(sensitive_feature)[predicted_label].mean()
    dp_difference = grouped.max() - grouped.min()
    return grouped, dp_difference

# Calculate Equalized Odds (TPR, FPR) and Differences
def equalized_odds(df, sensitive_feature, predicted_label, true_label):
    tpr_fpr = df.groupby(sensitive_feature).apply(lambda x: pd.Series({
        'TPR': (x[predicted_label] & x[true_label]).sum() / (x[true_label] == 1).sum(),
        'FPR': (x[predicted_label] & (x[true_label] == 0)).sum() / (x[true_label] == 0).sum()
    }))
    tpr_diff = tpr_fpr['TPR'].max() - tpr_fpr['TPR'].min()
    fpr_diff = tpr_fpr['FPR'].max() - tpr_fpr['FPR'].min()
    return tpr_fpr, tpr_diff, fpr_diff

# Calculate Predictive Parity and Difference
def predictive_parity(df, sensitive_feature, predicted_label, true_label):
    ppv = df.groupby(sensitive_feature).apply(lambda x: pd.Series({
        'PPV': (x[predicted_label] & x[true_label]).sum() / x[predicted_label].sum()
    }))
    ppv_diff = ppv['PPV'].max() - ppv['PPV'].min()
    return ppv, ppv_diff

In [None]:
# Demographic Parity for Test Set (Race and Sex)
dp_race_test, dp_race_diff = demographic_parity(test_texts_df, 'race', 'predicted_label')
dp_sex_test, dp_sex_diff = demographic_parity(test_texts_df, 'sex', 'predicted_label')
print("Test Set Demographic Parity by Race:\n", dp_race_test)
print(f"Demographic Parity Difference by Race: {dp_race_diff}\n")
print("Test Set Demographic Parity by Sex:\n", dp_sex_test)
print(f"Demographic Parity Difference by Sex: {dp_sex_diff}\n")

Test Set Demographic Parity by Race:
 race
African-American    0.587368
Caucasian           0.261830
Name: predicted_label, dtype: float64
Demographic Parity Difference by Race: 0.32553876805578613

Test Set Demographic Parity by Sex:
 sex
Female    0.303797
Male      0.495268
Name: predicted_label, dtype: float64
Demographic Parity Difference by Sex: 0.19147067044683141



In [None]:
# Equalized Odds for Test Set (Race and Sex)
eo_race_test, eo_race_tpr_diff, eo_race_fpr_diff = equalized_odds(test_texts_df, 'race', 'predicted_label', 'true_label')
eo_sex_test, eo_sex_tpr_diff, eo_sex_fpr_diff = equalized_odds(test_texts_df, 'sex', 'predicted_label', 'true_label')
print("Test Set Equalized Odds by Race:\n", eo_race_test)
print(f"Equalized Odds TPR Difference by Race: {eo_race_tpr_diff}")
print(f"Equalized Odds FPR Difference by Race: {eo_race_fpr_diff}\n")
print("Test Set Equalized Odds by Sex:\n", eo_sex_test)
print(f"Equalized Odds TPR Difference by Sex: {eo_sex_tpr_diff}")
print(f"Equalized Odds FPR Difference by Sex: {eo_sex_fpr_diff}\n")

Test Set Equalized Odds by Race:
                        TPR       FPR
race                                
African-American  0.763485  0.405983
Caucasian         0.438596  0.162562
Equalized Odds TPR Difference by Race: 0.32488898595035304
Equalized Odds FPR Difference by Race: 0.24342132962822619

Test Set Equalized Odds by Sex:
              TPR       FPR
sex                       
Female  0.418182  0.242718
Male    0.703333  0.308383
Equalized Odds TPR Difference by Sex: 0.2851515151515152
Equalized Odds FPR Difference by Sex: 0.06566478693099237



In [None]:
# Predictive Parity for Test Set (Race and Sex)
pp_race_test, pp_race_diff = predictive_parity(test_texts_df, 'race', 'predicted_label', 'true_label')
pp_sex_test, pp_sex_diff = predictive_parity(test_texts_df, 'sex', 'predicted_label', 'true_label')
print("Test Set Predictive Parity by Race:\n", pp_race_test)
print(f"Predictive Parity Difference by Race: {pp_race_diff}\n")
print("Test Set Predictive Parity by Sex:\n", pp_sex_test)
print(f"Predictive Parity Difference by Sex: {pp_sex_diff}\n")

Test Set Predictive Parity by Race:
                        PPV
race                      
African-American  0.659498
Caucasian         0.602410
Predictive Parity Difference by Race: 0.057088569331087746

Test Set Predictive Parity by Sex:
              PPV
sex             
Female  0.479167
Male    0.671975
Predictive Parity Difference by Sex: 0.1928078556263269

