In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from fairlearn.reductions import ExponentiatedGradient, DemographicParity
from sklearn.linear_model import LogisticRegression

In [2]:
from fairlearn.metrics import MetricFrame, selection_rate, demographic_parity_difference, equalized_odds_difference

In [3]:
df=pd.read_csv(r"C:\Users\Nithisha\Downloads\HR_Bias_Detection_Dataset.csv")

In [4]:
X = df.drop(columns=['Promotion Status'])
y = df['Promotion Status']

categorical = ['Gender', 'Age Range', 'Ethnicity', 'Department', 'Education Level']
numerical = ['Years of Experience', 'Performance Score', 'Training Participation',
             'Support for Diversity Initiatives', 'Experienced Workplace Bias',
             'Projects Handled', 'Overtime Hours']

In [5]:
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(drop='first'), categorical)
], remainder='passthrough')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train_enc = preprocessor.fit_transform(X_train)
X_test_enc = preprocessor.transform(X_test)

In [6]:
sensitive_train = X_train['Gender']
sensitive_test = X_test['Gender']

In [7]:
base_model = LogisticRegression(solver='liblinear', random_state=42)

mitigator = ExponentiatedGradient(
    estimator=base_model,
    constraints=DemographicParity()
)

In [8]:
mitigator.fit(X_train_enc, y_train, sensitive_features=sensitive_train)
y_pred = mitigator.predict(X_test_enc)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
metric_frame = MetricFrame(
    metrics={'accuracy': accuracy_score, 'selection_rate': selection_rate},
    y_true=y_test,
    y_pred=y_pred,
    sensitive_features=sensitive_test
)

In [10]:
print("Accuracy:", accuracy)

Accuracy: 0.92


In [11]:
dp_diff = demographic_parity_difference(y_test, y_pred, sensitive_features=sensitive_test)
eo_diff = equalized_odds_difference(y_test, y_pred, sensitive_features=sensitive_test)

In [12]:
print("Mitigated Accuracy:", accuracy)
print("Group-wise Metrics:", metric_frame.by_group)
print("Demographic Parity Difference:", dp_diff)
print("Equalized Odds Difference:", eo_diff)

Mitigated Accuracy: 0.92
Group-wise Metrics:         accuracy  selection_rate
Gender                          
Female  0.933333        0.066667
Male    0.906667        0.013333
Demographic Parity Difference: 0.05333333333333333
Equalized Odds Difference: 0.375
