#Fairness-Aware Job-Matching: Bias Analysis and Counterfactual Intervention Demo


#### This notebook demonstrates bias detection and mitigation in embedding-based job-matching systems using statistical analysis and counterfactual manipulation.


Environment Setup

In [None]:
print("Setting up environment...")

# Install required packages
!pip install scikit-learn pandas numpy scipy matplotlib seaborn plotly -q
!pip install pydantic-settings ranx faiss-cpu -q

# Mount Google Drive and setup paths
from google.colab import drive
drive.mount('/content/drive')

import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Add framework to path
sys.path.append('/content/drive/MyDrive/Thesis/fairness_aware_job_matching/')

# Import framework components
from fairness_framework import (
    Config,
    load_embeddings,
    load_oracle_gender_labels,
    load_test_job_data,
    load_resume_corpus_mapping,
    extract_valid_job_test_indices,
    run_bias_analysis,
    test_representation_significance,
    comprehensive_statistical_validation,
    comprehensive_validation_suite,
    train_gender_classifier_with_teacher_params,
    analyze_gender_directions,
    flip_embeddings_along_gender_direction,
    run_counterfactual_bias_analysis,
    compare_bias_results
)

def set_thesis_style():
    sns.set(style='whitegrid', font_scale=1.1)
    plt.rcParams.update({
        'axes.edgecolor': 'black',
        'axes.linewidth': 1.0,
        'figure.dpi': 100,
        'savefig.bbox': 'tight',
        'legend.frameon': True,
        'legend.loc': 'best'
    })

set_thesis_style()

print("Environment setup complete.")


Data Loading & Configuration

In [None]:
# Update configuration
Config.update_base_path("/content/drive/MyDrive/Thesis/")
Config.PROPRIETARY_SRC_PATH = "/content/drive/MyDrive/Thesis/fine_tuning/src"
Config.PROPRIETARY_INFERENCE_PATH = "/content/drive/MyDrive/Thesis/fine_tuning/inference_endpoint/src"

# Add proprietary paths
sys.path.append(Config.PROPRIETARY_SRC_PATH)
sys.path.append(Config.PROPRIETARY_INFERENCE_PATH)

# Load core data components
gender_mapping = load_oracle_gender_labels()
test_job_queries, test_job_query_ids = load_test_job_data()
resume_id_to_text, resume_corpus_ids = load_resume_corpus_mapping()
valid_job_indices, valid_job_query_ids = extract_valid_job_test_indices(
    test_job_queries, (100000,)
)

# Load proprietary FAISS components
from fairness_framework.utils.proprietary_imports import get_proprietary_components
proprietary_components = get_proprietary_components()

if proprietary_components:
    create_faiss_index = proprietary_components['create_faiss_index']
    search_faiss = proprietary_components['search_faiss']
    print("FAISS search components loaded successfully")
else:
    raise RuntimeError("Could not load required search components")

### 1.Bias Analysis

In [None]:
def run_jobs_to_resumes_analysis(modified_embeddings=None):
    """Execute bias analysis using the test job queries"""
    return run_bias_analysis(
        test_job_queries=test_job_queries,
        gender_mapping=gender_mapping,
        resume_corpus_ids=resume_corpus_ids,
        create_faiss_index=create_faiss_index,
        search_faiss=search_faiss,
        modified_cv_embeddings=modified_embeddings
    )

print("\nRunning comprehensive bias analysis...")
original_results = run_jobs_to_resumes_analysis()

In [None]:
print("\nSimilarity Score Distribution Analysis")
if 'score_distribution' in original_results:
    score_stats = original_results['score_distribution']

    # Display score statistics
    results_table = []
    for gender, stats in score_stats.items():
        if isinstance(stats, dict) and 'mean' in stats:
            results_table.append({
                'Gender': gender.capitalize(),
                'Count': f"{stats['count']:,}",
                'Mean': f"{stats['mean']:.4f}",
                'Std': f"{stats['std']:.4f}",
                'Median': f"{stats['median']:.4f}",
                'Q25': f"{stats['percentile_25']:.4f}",
                'Q75': f"{stats['percentile_75']:.4f}"
            })

    results_df = pd.DataFrame(results_table)
    print(results_df.to_string(index=False))

    # Statistical significance test
    if 'statistical_test' in score_stats:
        test = score_stats['statistical_test']
        print(f"\nStatistical Test Results:")
        print(f"  T-statistic: {test['t_statistic']:.4f}")
        print(f"  P-value: {test['p_value']:.4f}")
        print(f"  Effect size (Cohen's d): {test['effect_size']:.4f}")
        print(f"  Significant difference: {test['significant']}")

        if test['significant']:
            advantage_gender = "Female" if test['effect_size'] > 0 else "Male"
            effect_magnitude = "Large" if abs(test['effect_size']) >= 0.8 else "Medium" if abs(test['effect_size']) >= 0.5 else "Small"
            print(f"  Result: {advantage_gender} candidates show {effect_magnitude.lower()} score advantage")

In [None]:
print("\nTop-K Representation Analysis")
if 'top_k_representation' in original_results:
    repr_stats = original_results['top_k_representation']
    population_props = Config.POPULATION_PROPORTIONS

    representation_data = []

    for k, data in repr_stats.items():
        total_positions = sum(stats['count'] for stats in data.values())

        for gender, stats in data.items():
            expected_pct = population_props.get(gender, 0) * 100
            actual_pct = stats['percentage']
            difference = actual_pct - expected_pct

            representation_data.append({
                'Ranking': k.replace('_', '-').upper(),
                'Gender': gender.capitalize(),
                'Count': f"{stats['count']:,}",
                'Actual%': f"{actual_pct:.1f}%",
                'Expected%': f"{expected_pct:.1f}%",
                'Difference': f"{difference:+.1f}%"
            })

    repr_df = pd.DataFrame(representation_data)
    print(repr_df.to_string(index=False))

In [None]:
print("\nRepresentation Significance Testing")
representation_test_results = test_representation_significance(
    search_results=original_results['search_results'],
    gender_mapping=gender_mapping,
    population_proportions=Config.POPULATION_PROPORTIONS,
    analysis_name="Jobs to Resumes"
)

# Summarize significance results
sig_summary = []
for k, results in representation_test_results.items():
    sig_summary.append({
        'Ranking': k.replace('_', '-').upper(),
        'Male_pvalue': f"{results['male_p_value']:.4f}",
        'Female_pvalue': f"{results['female_p_value']:.4f}",
        'Male_Effect': f"{results['male_effect_size']:+.3f}",
        'Female_Effect': f"{results['female_effect_size']:+.3f}",
        'Chi2_pvalue': f"{results['chi2_p_value']:.4f}"
    })

sig_df = pd.DataFrame(sig_summary)
print(sig_df.to_string(index=False))


In [None]:
print("\nStatistical Validation")
comprehensive_stats = comprehensive_statistical_validation(
    search_results=original_results['search_results'],
    gender_mapping=gender_mapping,
    population_proportions=Config.POPULATION_PROPORTIONS
)

# Bootstrap validation results
if 'bootstrap_validation' in comprehensive_stats:
    bootstrap = comprehensive_stats['bootstrap_validation']
    print(f"Bootstrap Validation (1000 iterations):")
    print(f"  Score advantage: {bootstrap['score_advantage_mean']:+.4f}")
    print(f"  95% CI: [{bootstrap['score_advantage_ci'][0]:+.4f}, {bootstrap['score_advantage_ci'][1]:+.4f}]")
    print(f"  Rank advantage: {bootstrap['rank_advantage_mean']:+.2f}")
    print(f"  95% CI: [{bootstrap['rank_advantage_ci'][0]:+.2f}, {bootstrap['rank_advantage_ci'][1]:+.2f}]")

# Integrity validation
print(f"\nIntegrity Validation:")
validation_results = comprehensive_validation_suite(
    search_results=original_results['search_results'],
    gender_mapping=gender_mapping,
    sample_size=10,
    n_spot_checks=3
)

val_summary = validation_results['validation_summary']
print(f"  Rankings properly sorted: {val_summary['rankings_properly_sorted']}")
print(f"  Paradox detected: {val_summary['paradox_detected']}")
print(f"  Queries validated: {val_summary['total_queries_validated']}")


###2. Counterfactual Intervention

In [None]:
print("\nLoading embeddings for intervention...")

# Load the same embeddings used in bias analysis
from fairness_framework.data.data_loader import load_bias_analysis_embeddings
job_embeddings, cv_embeddings = load_bias_analysis_embeddings()

print(f"Loaded embeddings: jobs={job_embeddings.shape}, cvs={cv_embeddings.shape}")

# Use the same valid job indices that were used in bias analysis
job_embeddings_subset = job_embeddings[valid_job_indices]
print(f"Using subset of job embeddings: {job_embeddings_subset.shape}")

# The gender mapping uses resume IDs, so we need to map those to CV embedding indices
resume_indices_with_gender = []
gender_labels = []

# Map resume IDs from gender_mapping to CV embedding indices
for resume_id, gender in gender_mapping.items():
    try:
        # Extract index from resume_id (format: "resume_X")
        index = int(resume_id.split('_')[1])
        if index < cv_embeddings.shape[0]:
            resume_indices_with_gender.append(index)
            gender_labels.append(0 if gender == "female" else 1)
    except (ValueError, IndexError):
        continue

gender_labels = np.array(gender_labels)
labeled_embeddings = cv_embeddings[resume_indices_with_gender]

print(f"Labeled embeddings: {len(gender_labels)} samples")
print(f"Gender distribution: Female={np.sum(gender_labels == 0)}, Male={np.sum(gender_labels == 1)}")

In [None]:
print("\nTraining Gender Classifier")
classifier, cv_accuracy, train_accuracy = train_gender_classifier_with_teacher_params(
    labeled_embeddings, gender_labels
)

print(f"Classifier Performance:")
print(f"  Cross-validation accuracy: {cv_accuracy:.4f}")
print(f"  Training accuracy: {train_accuracy:.4f}")
# Analyze gender direction
gender_weights, direction_norm = analyze_gender_directions(classifier, labeled_embeddings)

print(f"\nGender Direction Analysis:")
print(f"  Direction norm: {direction_norm:.6f}")
print(f"  Weight statistics: mean={np.mean(gender_weights):.6f}, std={np.std(gender_weights):.6f}")

In [None]:
print("Counterfactual Intervention Analysis")
print("\nLoading embeddings for intervention...")
print("\nRunning Counterfactual Analysis")
# Use the modular counterfactual analysis function
counterfactual_results = run_counterfactual_bias_analysis(
    gender_mapping=gender_mapping,
    run_bias_analysis_jobs_to_resumes_func=run_jobs_to_resumes_analysis
)

if counterfactual_results is None:
    print("Counterfactual analysis failed")
else:
    print("Counterfactual analysis completed successfully")

    # Display results summary
    print(f"\nGender Classifier Performance:")
    print(f"  CV accuracy: {counterfactual_results['original_cv_acc']:.4f}")
    print(f"  Training accuracy: {counterfactual_results['original_train_acc']:.4f}")

    # Display intervention results
    print(f"\nIntervention Results:")
    intervention_results = []

    for factor, results in counterfactual_results["flip_results"].items():
        accuracy_drop = results['classifier_accuracy_drop']

        # Extract bias metrics
        if 'jobs_to_resumes' in results and 'score_distribution' in results['jobs_to_resumes']:
            modified_stats = results['jobs_to_resumes']['score_distribution']
            if 'statistical_test' in modified_stats:
                modified_test = modified_stats['statistical_test']
                original_test = original_results['score_distribution']['statistical_test']

                bias_reduction = abs(original_test['effect_size']) - abs(modified_test['effect_size'])

                print(f"\nFlip factor α = {factor}")
                print(f"  Classifier accuracy drop: {accuracy_drop:.4f}")
                print(f"  Effect size change: {original_test['effect_size']:.4f} → {modified_test['effect_size']:.4f}")
                print(f"  Bias reduction: {bias_reduction:.4f}")
                print(f"  P-value change: {original_test['p_value']:.4f} → {modified_test['p_value']:.4f}")

                intervention_results.append({
                    'Flip_Factor': factor,
                    'Accuracy_Drop': accuracy_drop,
                    'Original_Effect': original_test['effect_size'],
                    'Modified_Effect': modified_test['effect_size'],
                    'Bias_Reduction': bias_reduction,
                    'Original_Pvalue': original_test['p_value'],
                    'Modified_Pvalue': modified_test['p_value']
                })

    # Use modular comparison function for detailed analysis
    print(f"\nDetailed Comparative Analysis:")
    compare_bias_results(original_results, counterfactual_results)


In [None]:
print("\nIntervention Effectiveness Summary")
if intervention_results:
    intervention_df = pd.DataFrame(intervention_results)

    # Format for display
    display_df = intervention_df.copy()
    for col in ['Accuracy_Drop', 'Original_Effect', 'Modified_Effect', 'Bias_Reduction']:
        display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")
    for col in ['Original_Pvalue', 'Modified_Pvalue']:
        display_df[col] = display_df[col].apply(lambda x: f"{x:.4f}")

    print(display_df.to_string(index=False))

In [None]:
print("RESULTS SUMMARY")
# Compile key findings
print("\nKey Findings:")

# Score analysis summary
if 'score_distribution' in original_results and 'statistical_test' in original_results['score_distribution']:
    test = original_results['score_distribution']['statistical_test']
    print(f"1. Similarity Score Analysis:")
    print(f"   - Effect size: {test['effect_size']:.4f}")
    print(f"   - P-value: {test['p_value']:.4f}")
    print(f"   - Statistical significance: {test['significant']}")

# Representation summary
if 'top_k_representation' in original_results and 'top_5' in original_results['top_k_representation']:
    top5_data = original_results['top_k_representation']['top_5']
    population_props = Config.POPULATION_PROPORTIONS

    print(f"2. Representation Analysis (Top-5):")
    for gender in ['female', 'male']:
        if gender in top5_data:
            actual = top5_data[gender]['percentage']
            expected = population_props[gender] * 100
            print(f"   - {gender.capitalize()}: {actual:.1f}% (expected: {expected:.1f}%)")

# Intervention summary
if intervention_results:
    max_reduction = max(r['Bias_Reduction'] for r in intervention_results)
    print(f"3. Intervention Effectiveness:")
    print(f"   - Maximum bias reduction: {max_reduction:.4f}")
    print(f"   - Interventions tested: {len(intervention_results)}")

print(f"\nAnalysis completed.")
print(f"Total queries analyzed: {len(original_results.get('search_results', {}))}")
print(f"Candidates evaluated: {len(gender_mapping)}")