**Packages**

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display, HTML
from itertools import product

from sklearn.metrics import make_scorer
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import  accuracy_score, balanced_accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report,  precision_recall_curve, auc 

import xgboost as xgb
from xgboost import XGBClassifier, plot_importance 

!pip install fairlearn
from fairlearn.metrics import MetricFrame, demographic_parity_difference, equalized_odds_difference, false_positive_rate, true_positive_rate, selection_rate

!pip install BlackBoxAuditing

!pip install aif360 
from aif360.datasets import StandardDataset, BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing, DisparateImpactRemover
from aif360.algorithms.inprocessing import ExponentiatedGradientReduction, AdversarialDebiasing
from aif360.algorithms.postprocessing import RejectOptionClassification, EqOddsPostprocessing
from aif360.metrics import ClassificationMetric
from aif360.sklearn.metrics import disparate_impact_ratio
import tensorflow as tf

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
Collecting BlackBoxAuditing
  Using cached BlackBoxAuditing-0.1.54-py2.py3-none-any.whl
Installing collected packages: BlackBoxAuditing
Successfully installed BlackBoxAuditing-0.1.54
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
Collecting aif360
  Obtaining dependency information for aif360 from https://files.pythonhosted.org/packages/61/7b/7e4fa9e7b6f62759663db3b5aaa12a6cc9ef866223e5978c25844bceb762/aif360-0.6.1-py3-none-any.whl.metadata
  Using cached aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Using cached aif360-0.6.1-py3-none-any.whl (259 kB)
Installing collected packages: aif360
Successfully installed aif360-0.6.1
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


2025-04-18 14:54:37.446713: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
pip install 'aif360[inFairness]'
pip install 'aif360[OptimalTransport]'


**Data Pre-Processing**

In [0]:
########################################################################################################################################################
## - Create clean pandas dataframe: df_cleaned_final

# Define the file path to read in the raw data
file_path = "/Volumes/prod_adw/dev_final_claims/cahps/Data_incl_sens.csv"

# Load the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Drop rows with missing 'PCM_DETRACTOR' values
df_cleaned = df.dropna(subset=['PCM_DETRACTOR'])

# Drop unnecessary columns
columns_to_drop = [
    'STARS_ID', 'TARGETREPORTMONTH', 'PCM_DETRACTOR_RAW', 'PCM_DETRACTOR_SCALED', 
    'FEATUREREPORTMONTH', 'STATE', 'COUNTY', 'BBT_6M', 'HMG_COA_REDUCTASE_INHIBTORS_2_RETAIL_FLG_9M', 'PRIOR_YEAR_CONTRACT_STAR_RATING_CTM_D',
    'STARS_ID.1', 'TARGETREPORTMONTH.1', 'PCM_DETRACTOR.1', 
    'PCM_DETRACTOR_RAW.1', 'PCM_DETRACTOR_SCALED.1', 'HEDIS_MSR_ADH_COUNT.1', 'AGE.1', 
    'RACE_ETHNICITY.1', 'GENDER.1','LANG.1','EDUC.1','CONTRACT_TENURE.1', 'COUNTY.1',
    'STATE.1', 'PLANTYPE.1', 'SURVEY_MODE.1','SMOKENOW.1', 'SDOH_CATG_SOCIAL_ECONIMICAL_24M_FLG.1',
    'SDOH_TRANSPORTATION_FOR_MEDICAL_APPOINTMENT_24M_FLG.1'
]

df_cleaned_final = df_cleaned.drop(columns=columns_to_drop)

# Replace missing values in 'PROVIDER_RISK_HIERARCHY' with 'M'
df_cleaned_final['PROVIDER_RISK_HIERARCHY'] = df_cleaned_final['PROVIDER_RISK_HIERARCHY'].fillna('M')

# Convert 'PCM_DETRACTOR' to category
df_cleaned_final['PCM_DETRACTOR'] = df_cleaned_final['PCM_DETRACTOR'].astype('category')

# Convert specified columns to category, others to numeric
category_columns = [
    'PCM_DETRACTOR', 'PROVIDER_RISK_HIERARCHY', 'AGE', 'LANG', 'EDUC', 
    'RACE_ETHNICITY', 'PLANTYPE', 'SMOKENOW', 'GENDER', 'SURVEY_MODE'
]
for column in df_cleaned_final.columns:
    if column in category_columns:
        df_cleaned_final[column] = df_cleaned_final[column].astype('category')
    else:
        df_cleaned_final[column] = pd.to_numeric(df_cleaned_final[column], errors='coerce')


# Replace missing values in numeric columns with the mean
numeric_columns = df_cleaned_final.select_dtypes(include=[np.number]).columns
df_cleaned_final[numeric_columns] = df_cleaned_final[numeric_columns].apply(lambda x: x.fillna(x.mean()))

# Replace missing values in categorical columns with the mode
categorical_columns = df_cleaned_final.select_dtypes(include=['category']).columns
df_cleaned_final[categorical_columns] = df_cleaned_final[categorical_columns].apply(lambda x: x.fillna(x.mode()[0]))

# Check for class imbalance
class_counts = df_cleaned_final['PCM_DETRACTOR'].value_counts()
class_percentages = df_cleaned_final['PCM_DETRACTOR'].value_counts(normalize=True) * 100


########################################################################################################################################################
## - Normalize numeric features
## - Split into X_train, X_test, y_train, y_test
## - These will be in pandas format

## Note: One hot encoding has not yet been applied to categorical features

# Separate the target variable and explanatory variables
X = df_cleaned_final.drop(columns=['PCM_DETRACTOR'])
y = df_cleaned_final['PCM_DETRACTOR']

# Define the numeric features and the categorical features
categorical_features = [
    'PROVIDER_RISK_HIERARCHY', 'AGE', 'LANG', 'EDUC', 'RACE_ETHNICITY', 
    'PLANTYPE', 'SMOKENOW', 'GENDER', 'SURVEY_MODE'
]
numeric_features = X.columns.drop(categorical_features)

# Convert non-numeric values to NaN and then handle missing values
X[numeric_features] = X[numeric_features].apply(pd.to_numeric, errors='coerce')
X[numeric_features] = X[numeric_features].fillna(X[numeric_features].mean())

# Normalize the numeric features
scaler = StandardScaler()
X[numeric_features] = scaler.fit_transform(X[numeric_features])

# Define Train/Test split
test_size=0.2

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

# Ensure the test set has the same data types as the training set
X_test[numeric_features] = X_test[numeric_features].apply(pd.to_numeric, errors='coerce')
X_test[numeric_features] = X_test[numeric_features].fillna(X_test[numeric_features].mean())
for feature in categorical_features:
    X_test[feature] = X_test[feature].astype('category')


########################################################################################################################################################

## - Apply one hot encoding to categorical features
## - Separate dataframe into X_train_processed, X_test_processed, y_train, y_test
##- Convert back to pandas dataframes X_train_processed_df, X_test_processed_df, y_train_df, y_test_df

## This data is ready for fitting models

# Create a ColumnTransformer to apply transformations to features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)

# Fit and transform the training data
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

# Get the column names after transformation
transformed_columns = preprocessor.get_feature_names_out()

# Remove the prefixes from the column names
transformed_columns = [col.split('__')[-1] for col in transformed_columns]

# Convert the transformed arrays back to pandas DataFrames with column names - suitable for AIF360 usage
X_train_processed_df = pd.DataFrame(X_train_processed, columns=transformed_columns)
X_test_processed_df = pd.DataFrame(X_test_processed, columns=transformed_columns)

# Ensure the target variable is also in DataFrame format
y_train_df = pd.DataFrame(y_train).reset_index(drop=True)
y_test_df = pd.DataFrame(y_test).reset_index(drop=True)


####################################################################################################################################################

## Create a StandardDataset compatible with the AIF360 package

# Updated categorical_features with 'RACE_ETHNICITY' removed
# Important to ensure that ['RACE_ETHNICITY'] is not listed as a categorical feature
categorical_features_standard = [
    'PROVIDER_RISK_HIERARCHY', 'AGE', 'LANG', 'EDUC', 
    'PLANTYPE', 'SMOKENOW', 'GENDER', 'SURVEY_MODE'
]

numeric_features = X.columns.drop(categorical_features_standard)
columns_to_keep = df_cleaned_final.columns

df_test = df_cleaned_final[columns_to_keep]

# Map the racial values to integers as required by the fairness algorithm
race_ethnicity_map = {'White': 0, 'Black': 1, 'Asian': 2, 'AIAN': 3, 'Latinx': 4, 'PacIsl': 5, 'TwoPlus': 6}

# Converting 'PCM_DETRACTOR' and 'RACE_ETHNICITY' to numeric types
df_test['PCM_DETRACTOR'] = df_test['PCM_DETRACTOR'].astype(float)
df_test['RACE_ETHNICITY'] = df_test['RACE_ETHNICITY'].map(race_ethnicity_map)
df_test['RACE_ETHNICITY'] = df_test['RACE_ETHNICITY'].astype(int)  

df_test = pd.DataFrame(df_test)

# Create a StandardDataset
dataset = StandardDataset(df_test, 
                          label_name='PCM_DETRACTOR', 
                          favorable_classes=[0],  # 0 ('White') is the favorable class
                          protected_attribute_names=['RACE_ETHNICITY'], 
                          privileged_classes=[[0]],  # 0 ('White') is the privileged class
                          categorical_features=categorical_features_standard)



# Specify the privileged and unprivileged group
privileged_group = [{'RACE_ETHNICITY': 0}]  # White
unprivileged_groups = [{'RACE_ETHNICITY': value} for key, value in race_ethnicity_map.items() if value != 0]

# Split the dataset into training and testing sets
train, test = dataset.split([0.8], shuffle=True)

# Extract features and labels from the StandardDataset
X_train = train.features
y_train = train.labels.ravel()

X_test = test.features
y_test = test.labels.ravel()

##########################################################################################################################################################################



**Reject Option-Based Classification (ROC)**

In [0]:
# Define the hyperparameters for XGBoost
params = {
    'learning_rate': 0.003, 
    'max_depth': 5,
    'n_estimators': 5000,
    'subsample': 1,
    'colsample_bytree': 1,
    'scale_pos_weight': 9,  # Adjusted for class imbalance
    'max_delta_step': 1,
    'eval_metric': 'aucpr',    
    'tree_method': 'hist',
    'device': 'cuda'     
}

# Initialize the XGBoost model
xgb_model_roc = XGBClassifier(**params)

# Fit the model on the training data
xgb_model_roc.fit(X_train, y_train)

# Evaluate the model on the test data
y_pred = xgb_model_roc.predict(X_test)
y_pred_proba = xgb_model_roc.predict_proba(X_test)[:, 1]

# Calculate AUC
auc = roc_auc_score(y_test, y_pred_proba)
auc

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)


0.6706880531564274

In [0]:
# Apply ROC post-processing
roc = RejectOptionClassification(
    unprivileged_groups=unprivileged_groups,
    privileged_groups=privileged_group,
    low_class_thresh=0.3, high_class_thresh=0.8,
    num_class_thresh=100, num_ROC_margin=50,
    metric_name="Statistical parity difference",
    metric_ub=0.05, metric_lb=-0.05
)

roc = roc.fit(test, y_pred_proba)

[0;31m---------------------------------------------------------------------------[0m
[0;31mTypeError[0m                                 Traceback (most recent call last)
File [0;32m<command-6920363785728942>, line 11[0m
[1;32m      1[0m [38;5;66;03m# Apply ROC post-processing[39;00m
[1;32m      2[0m roc [38;5;241m=[39m RejectOptionClassification(
[1;32m      3[0m     unprivileged_groups[38;5;241m=[39munprivileged_groups,
[1;32m      4[0m     privileged_groups[38;5;241m=[39mprivileged_group,
[0;32m   (...)[0m
[1;32m      8[0m     metric_ub[38;5;241m=[39m[38;5;241m0.05[39m, metric_lb[38;5;241m=[39m[38;5;241m-[39m[38;5;241m0.05[39m
[1;32m      9[0m )
[0;32m---> 11[0m roc [38;5;241m=[39m roc[38;5;241m.[39mfit(test, y_pred_proba)

File [0;32m/local_disk0/.ephemeral_nfs/envs/pythonEnv-67faf9ed-3fd0-4da9-91f1-e7a71d86343c/lib/python3.11/site-packages/aif360/algorithms/transformer.py:27[0m, in [0;36maddmetadata.<locals>.wrapper[0;34m(self, *args,

In [0]:
# Calculate AUC
auc = roc_auc_score(y_test, y_pred_proba)
print(f"AUC: {auc}")

In [0]:
###############################################################  FIT MODEL   ###################################################################################

# Define the hyperparameters for XGBoost
params = {
    'learning_rate': 0.003, 
    'max_depth': 5,
    'n_estimators': 5000,
    'subsample': 1,
    'colsample_bytree': 1,
    'scale_pos_weight': 9,  # Adjusted for class imbalance
    'max_delta_step': 1,
    'eval_metric': 'aucpr',    
    'tree_method': 'hist',
    'device': 'cuda'     
}

# Initialize the XGBoost model
xgb_model = XGBClassifier(**params)

# Fit the model on the training data
xgb_model.fit(X_train, y_train)

###############################################################  PERFORMANCE METRICS  ###################################################################################

# Evaluate the model on the test data
y_pred = xgb_model.predict(X_test)
y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]  # Get the probability predictions for AUC

auc = roc_auc_score(y_test, y_pred_proba)

# Assuming y_pred_proba and y_test are already defined
# Define a range of thresholds
thresholds = [i / 100 for i in range(10, 90)]  # Thresholds 

# Initialize a list to store the results
results = []

# Loop through each threshold and calculate metrics
for threshold in thresholds:
    # Apply the threshold to get the final class labels
    y_pred_custom_threshold = (y_pred_proba >= threshold).astype(int)
    
    # Calculate evaluation metrics with the custom threshold
    accuracy_custom = accuracy_score(y_test, y_pred_custom_threshold)
    balanced_acc = balanced_accuracy_score(y_test, y_pred_custom_threshold)
    f1_custom = f1_score(y_test, y_pred_custom_threshold)
    precision_custom = precision_score(y_test, y_pred_custom_threshold)
    recall_custom = recall_score(y_test, y_pred_custom_threshold)
    
    # Append the results to the list
    results.append({
        'threshold': threshold,
        'accuracy': accuracy_custom,
        'balanced_accuracy': balanced_acc,
        'f1': f1_custom,
        'recall': recall_custom,
        'precision': precision_custom
    })

# Convert the results list to a DataFrame
results_df = pd.DataFrame(results)

# Identify the threshold that gives the maximum F1 score
max_f1_threshold = results_df.loc[results_df['f1'].idxmax()]['threshold']
max_f1_value = results_df.loc[results_df['f1'].idxmax()]['f1']
recall_value = results_df.loc[results_df['f1'].idxmax()]['recall']
precision_value = results_df.loc[results_df['f1'].idxmax()]['precision']
accuracy_value = results_df.loc[results_df['f1'].idxmax()]['accuracy']
balanced_accuracy_value = results_df.loc[results_df['f1'].idxmax()]['balanced_accuracy']

print(f"The threshold that gives the maximum F1 score is: {max_f1_threshold}")
print(f"The maximum F1 score is: {max_f1_value}")
print(f"The auc is: {auc}")
print(f"The balanced accuracy is: {balanced_accuracy_value}")
print(f"The recall is: {recall_value}")
print(f"The precision is: {precision_value}")
print(f"The accuracy is: {accuracy_value}")

# Access the underlying XGBClassifier model
underlying_model = xgb_model

###############################################################  FAIRNESS METRICS  ###################################################################################

# Convert predictions to a StandardDataset
pred_dataset = test.copy()
pred_dataset.labels = y_pred.reshape(-1, 1)

# Initialize a list to store the results
fairness_metrics = []

for group in unprivileged_groups:
    metric = ClassificationMetric(test, pred_dataset, unprivileged_groups=[group], privileged_groups=privileged_group)
    
    disparate_impact = metric.disparate_impact()
    statistical_parity_difference = metric.statistical_parity_difference()
    equalized_odds_difference = metric.equalized_odds_difference()
    predictive_equality_difference = metric.false_positive_rate_difference()

    fairness_metrics.append({
        'group': group['RACE_ETHNICITY'],
        'disparate_impact': disparate_impact,
        'statistical_parity_difference': statistical_parity_difference,
        'equalized_odds_difference': equalized_odds_difference,
        'predictive_equality_difference': predictive_equality_difference
        })
    
# Extract the values for each metric
disparate_impact_values = [metric['disparate_impact'] for metric in fairness_metrics]
statistical_parity_difference_values = [metric['statistical_parity_difference'] for metric in fairness_metrics]
equalized_odds_difference_values = [metric['equalized_odds_difference'] for metric in fairness_metrics]
predictive_equality_difference_values = [metric['predictive_equality_difference'] for metric in fairness_metrics]

# Calculate the average and standard deviation for each metric
average_disparate_impact = np.mean(disparate_impact_values)
std_disparate_impact = np.std(disparate_impact_values)

average_statistical_parity_difference = np.mean(statistical_parity_difference_values)
std_statistical_parity_difference = np.std(statistical_parity_difference_values)

average_equalized_odds_difference = np.mean(equalized_odds_difference_values)
std_equalized_odds_difference = np.std(equalized_odds_difference_values)

average_predictive_equality_difference = np.mean(predictive_equality_difference_values)
std_predictive_equality_difference = np.std(predictive_equality_difference_values)

print(f"Average Disparate Impact: {average_disparate_impact} with a standard deviation of {std_disparate_impact}")
print(f"Average Statistical Parity Difference: {average_statistical_parity_difference} with a standard deviation of {std_statistical_parity_difference}")
print(f"Average Equalized Odds Difference: {average_equalized_odds_difference} with a standard deviation of {std_equalized_odds_difference}")
print(f"Average Predictive Equality Difference: {average_predictive_equality_difference} with a standard deviation of {std_predictive_equality_difference}")

fairness_metrics