# Script Hyperparameter Tuning

In [33]:
# import packages
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from fairlearn.postprocessing import ThresholdOptimizer
import pandas as pd
import numpy as np
import os

In [34]:
print(len(hiring_data))
# Prepare the data
X = hiring_data.drop(columns=['decision', 'Id', 'ind-languages', 'sport'])
y = hiring_data['decision']
A = hiring_data['gender']  # Sensitive attribute

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X

3917


Unnamed: 0,age,gender,ind-debateclub,ind-degree,ind-entrepeneur_exp,ind-exact_study,ind-international_exp,ind-previous_exp,ind-programming_exp,ind-testresult,ind-university_grade,nationality
0,24,1,0,2,0,1,0,4,0,36,70,2
1,26,0,0,0,0,1,0,4,1,26,67,2
2,23,1,0,1,0,1,1,4,1,38,67,1
3,24,0,0,1,0,1,0,4,1,40,70,1
4,26,1,0,1,0,0,0,4,0,22,59,2
...,...,...,...,...,...,...,...,...,...,...,...,...
3995,28,1,0,1,0,0,0,1,0,29,63,1
3996,27,1,0,0,1,1,0,1,0,36,62,1
3997,24,0,1,0,1,0,0,1,0,22,60,0
3998,22,1,0,0,0,1,0,1,1,17,66,1


## Hyperparameter Tuning Random Forest Model

In [35]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, message=".*use_inf_as_na.*")

# Limit threads to reduce nondeterminism from parallelism
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"

# Fix random seed
RANDOM_SEED = 42

# Split data with fixed random state and stratify if possible
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_SEED, stratify=y
)

# Define RandomForest with fixed random_state
rf = RandomForestClassifier(random_state=RANDOM_SEED)

# Hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False],
    'class_weight': [None, 'balanced', 'balanced_subsample']
}

# GridSearchCV with n_jobs=1 for reproducibility
grid_search = GridSearchCV(
    estimator=rf,
    param_grid=param_grid,
    cv=5,
    n_jobs=1,
    verbose=1,
    scoring='accuracy'
)

# Fit training data
grid_search.fit(X_train, y_train)

print("Best Hyperparameters:", grid_search.best_params_)

best_rf_model = grid_search.best_estimator_

# Baseline evaluation without fairness postprocessing
y_pred_baseline = best_rf_model.predict(X_test)
print("\nBaseline Model Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_baseline))
print("F1 Score:", f1_score(y_test, y_pred_baseline))
print("Precision:", precision_score(y_test, y_pred_baseline))
print("Recall:", recall_score(y_test, y_pred_baseline))

# Sensitive feature for fairness postprocessing
sensitive_feature = X_test['gender']  # adjust if needed

# Function to run ThresholdOptimizer with different constraints
def evaluate_threshold_optimizer(constraint_name):
    print(f"\nEvaluating ThresholdOptimizer with constraint: {constraint_name}")

    postproc = ThresholdOptimizer(
        estimator=best_rf_model,
        constraints=constraint_name,
        prefit=True
    )

    postproc.fit(X_test, y_test, sensitive_features=sensitive_feature)
    y_pred_postproc = postproc.predict(X_test, sensitive_features=sensitive_feature)

    print("Postprocessed Model Performance:")
    print("Accuracy:", accuracy_score(y_test, y_pred_postproc))
    print("F1 Score:", f1_score(y_test, y_pred_postproc))
    print("Precision:", precision_score(y_test, y_pred_postproc))
    print("Recall:", recall_score(y_test, y_pred_postproc))

# Evaluate all fairness constraints you want
for constraint in ["equalized_odds", "demographic_parity", "true_positive_rate_parity"]:
    evaluate_threshold_optimizer(constraint)

# Feature importance
importances = best_rf_model.feature_importances_
feature_names = X_train.columns

feature_importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
})

top_features = feature_importance_df.sort_values(by='Importance', ascending=False)

print("\nTop 10 important features:")
print(top_features.head(10))

Fitting 5 folds for each of 648 candidates, totalling 3240 fits
Best Hyperparameters: {'bootstrap': False, 'class_weight': None, 'max_depth': 10, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 50}

Baseline Model Performance:
Accuracy: 0.7920918367346939
F1 Score: 0.6091127098321343
Precision: 0.7559523809523809
Recall: 0.5100401606425703

Evaluating ThresholdOptimizer with constraint: equalized_odds
Postprocessed Model Performance:
Accuracy: 0.7857142857142857
F1 Score: 0.6
Precision: 0.7368421052631579
Recall: 0.5060240963855421

Evaluating ThresholdOptimizer with constraint: demographic_parity
Postprocessed Model Performance:
Accuracy: 0.8035714285714286
F1 Score: 0.7116104868913857
Precision: 0.6666666666666666
Recall: 0.7630522088353414

Evaluating ThresholdOptimizer with constraint: true_positive_rate_parity
Postprocessed Model Performance:
Accuracy: 0.8086734693877551
F1 Score: 0.7232472324723247
Precision: 0.6689419795221843
Recall: 0.7871485943775101

Top 10 im

In [36]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning, message=".*use_inf_as_na.*")

# Fix random seed here
RANDOM_SEED = 42

# Load your data (example)
X = hiring_data.drop(columns=['decision', 'Id', 'sport'])
y = hiring_data['decision']

# Split the data into train and test with fixed random state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)

# Create the logistic regression model with scaling and fixed random state for logistic regression
pipeline = make_pipeline(
    StandardScaler(),
    LogisticRegression(max_iter=4000, random_state=RANDOM_SEED)
)

param_grid = {
    'logisticregression__C': [0.01, 0.1, 1, 5, 10, 50, 100, 500, 1000],  
    'logisticregression__penalty': ['l2'], 
    'logisticregression__solver': ['liblinear'],  
    'logisticregression__max_iter': [100, 200, 300, 500, 1000],  
    'logisticregression__class_weight': [None, 'balanced'],  
}

grid_search = GridSearchCV(
    pipeline, param_grid, cv=5, n_jobs=-1, verbose=1, scoring='accuracy'
)

# Fit the model with training data
grid_search.fit(X_train, y_train)

best_pipeline = grid_search.best_estimator_

print("Best Hyperparameters:", grid_search.best_params_)

# Evaluate baseline model on test set before postprocessing
y_pred_test = best_pipeline.predict(X_test)

print("Baseline Test Accuracy:", accuracy_score(y_test, y_pred_test))
print("Baseline Test F1 Score:", f1_score(y_test, y_pred_test))
print("Baseline Test Precision:", precision_score(y_test, y_pred_test))
print("Baseline Test Recall:", recall_score(y_test, y_pred_test))

# Fairness postprocessing with ThresholdOptimizer

sensitive_feature = X_test['gender']  # Adjust if needed

def evaluate_postproc(constraint):
    print(f"\nEvaluating with constraint: {constraint}")
    
    postproc = ThresholdOptimizer(
        estimator=best_pipeline,
        constraints=constraint,
        prefit=True
    )
    
    postproc.fit(X_test, y_test, sensitive_features=sensitive_feature)
    
    y_pred = postproc.predict(X_test, sensitive_features=sensitive_feature)
    
    print("Test Accuracy:", accuracy_score(y_test, y_pred))
    print("Test F1 Score:", f1_score(y_test, y_pred))
    print("Test Precision:", precision_score(y_test, y_pred))
    print("Test Recall:", recall_score(y_test, y_pred))


# Evaluate fairness constraints
evaluate_postproc("equalized_odds")
evaluate_postproc("demographic_parity")
evaluate_postproc("true_positive_rate_parity")

# Optional: feature importance after training best logistic regression model
logreg_model = best_pipeline.named_steps['logisticregression']

feature_names = X_train.columns
coefficients = logreg_model.coef_[0]

coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients,
    'AbsValue': np.abs(coefficients)
})

top_features = coef_df.sort_values(by='AbsValue', ascending=False)
print("\nTop 10 important features:")
print(top_features[['Feature', 'Coefficient']].head(10))

Fitting 5 folds for each of 90 candidates, totalling 450 fits
Best Hyperparameters: {'logisticregression__C': 1, 'logisticregression__class_weight': None, 'logisticregression__max_iter': 100, 'logisticregression__penalty': 'l2', 'logisticregression__solver': 'liblinear'}
Baseline Test Accuracy: 0.7436224489795918
Baseline Test F1 Score: 0.5109489051094891
Baseline Test Precision: 0.6687898089171974
Baseline Test Recall: 0.41338582677165353

Evaluating with constraint: equalized_odds
Test Accuracy: 0.7295918367346939
Test F1 Score: 0.43617021276595747
Test Precision: 0.6721311475409836
Test Recall: 0.3228346456692913

Evaluating with constraint: demographic_parity
Test Accuracy: 0.7410714285714286
Test F1 Score: 0.46437994722955145
Test Precision: 0.704
Test Recall: 0.3464566929133858

Evaluating with constraint: true_positive_rate_parity
Test Accuracy: 0.7385204081632653
Test F1 Score: 0.4444444444444444
Test Precision: 0.7130434782608696
Test Recall: 0.3228346456692913

Top 10 importa

Exception ignored in: <function ResourceTracker.__del__ at 0x1021f7880>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x102577880>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/m