## Claude programming code responses test for dissecting bias dataset

### Referred to this extra created file - prompt 1

In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
print("Loading data...")
df = pd.read_csv('data_new.csv')

def analyze_group_disparities(df):
    # Create intersectional groups
    df['group'] = df['race'] + '_' + df['dem_female'].astype(str)
    
    # Calculate various disparity metrics
    disparities = {}
    
    # 1. Representation disparities
    group_sizes = df['group'].value_counts()
    total_samples = len(df)
    disparities['representation'] = {
        group: count/total_samples 
        for group, count in group_sizes.items()
    }
    
    # 2. Risk score disparities
    risk_stats = df.groupby('group')['risk_score_t'].agg(['mean', 'std'])
    disparities['risk_scores'] = risk_stats.to_dict('index')
    
    # 3. Enrollment rate disparities
    enrollment_rates = df.groupby('group')['program_enrolled_t'].mean()
    disparities['enrollment_rates'] = enrollment_rates.to_dict()
    
    return disparities

def calculate_sampling_weights(df, disparities):
    # Calculate weights to balance both demographic and outcome representation
    df['weight'] = 1.0
    
    # 1. Demographic balancing weights
    max_group_size = df['group'].value_counts().max()
    for group in df['group'].unique():
        group_size = df[df['group'] == group].shape[0]
        demographic_weight = max_group_size / group_size
        df.loc[df['group'] == group, 'weight'] *= demographic_weight
    
    # 2. Outcome balancing weights
    for group in df['group'].unique():
        group_mask = df['group'] == group
        enrolled_mask = df['program_enrolled_t'] == 1
        
        # Calculate positive class weight for this group
        pos_samples = (group_mask & enrolled_mask).sum()
        if pos_samples > 0:
            pos_weight = 1 / (pos_samples / group_mask.sum())
            df.loc[group_mask & enrolled_mask, 'weight'] *= pos_weight
    
    return df['weight']

def apply_fair_resampling(df, weights):
    # Prepare data for resampling
    X = df[['risk_score_t']]  # Features
    y = df['program_enrolled_t']  # Target
    groups = df['group']  # Group membership
    
    # Store original distributions
    original_dist = {
        'class': dict(Counter(y)),
        'group': dict(Counter(groups))
    }
    
    # Apply SMOTE with custom sampling strategy based on weights
    sampling_strategy = {1: int(len(df) * 0.5)}  # Target 50-50 class balance
    smote = SMOTE(sampling_strategy=sampling_strategy, random_state=42)
    
    # Resample
    X_resampled, y_resampled = smote.fit_resample(X, y)
    
    # Create resampled dataframe
    df_resampled = pd.DataFrame(X_resampled, columns=['risk_score_t'])
    df_resampled['program_enrolled_t'] = y_resampled
    
    return df_resampled, original_dist

def plot_rebalancing_effects(original_dist, df_resampled):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # Plot class distribution changes
    class_dist = {
        'Original': original_dist['class'],
        'Rebalanced': dict(Counter(df_resampled['program_enrolled_t']))
    }
    
    # Class distribution plot
    class_df = pd.DataFrame(class_dist).T
    class_df.plot(kind='bar', ax=ax1)
    ax1.set_title('Class Distribution Before and After Rebalancing')
    ax1.set_ylabel('Number of Samples')
    
    # Risk score distribution plot
    ax2.hist(df_resampled['risk_score_t'], bins=50, alpha=0.5, label='Rebalanced')
    ax2.set_title('Risk Score Distribution After Rebalancing')
    ax2.set_xlabel('Risk Score')
    ax2.set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.savefig('fair_rebalancing_results.png')
    plt.close()

# Main execution
print("\nAnalyzing group disparities...")
disparities = analyze_group_disparities(df)

print("\nOriginal group representations:")
for group, prop in disparities['representation'].items():
    print(f"{group}: {prop:.1%}")

print("\nRisk score means by group:")
for group, stats in disparities['risk_scores'].items():
    print(f"{group}: {stats['mean']:.2f} (±{stats['std']:.2f})")

print("\nEnrollment rates by group:")
for group, rate in disparities['enrollment_rates'].items():
    print(f"{group}: {rate:.1%}")

print("\nCalculating fair sampling weights...")
weights = calculate_sampling_weights(df, disparities)

print("\nApplying fair resampling...")
df_resampled, original_dist = apply_fair_resampling(df, weights)

print("\nRebalancing results:")
print(f"Original total samples: {len(df)}")
print(f"Rebalanced total samples: {len(df_resampled)}")
print("\nClass distribution after rebalancing:")
print(dict(Counter(df_resampled['program_enrolled_t'])))

print("\nGenerating visualizations...")
plot_rebalancing_effects(original_dist, df_resampled)

print("\nRecommendations for Fair Model Training:")
print("1. Use the rebalanced dataset for initial model training")
print("2. Implement stratified cross-validation using both race and gender")
print("3. Apply the calculated sample weights during model training")
print("4. Monitor and validate performance separately for each demographic group")
print("5. Consider implementing fairness constraints in the model objective function") 

Loading data...

Analyzing group disparities...

Original group representations:
white_1: 55.5%
white_0: 33.1%
black_1: 7.6%
black_0: 3.9%

Risk score means by group:
black_0: 5.56 (±9.26)
black_1: 5.28 (±7.24)
white_0: 4.39 (±5.52)
white_1: 4.19 (±4.83)

Enrollment rates by group:
black_0: 1.5%
black_1: 1.4%
white_0: 1.0%
white_1: 0.8%

Calculating fair sampling weights...

Applying fair resampling...

Rebalancing results:
Original total samples: 48784
Rebalanced total samples: 72724

Class distribution after rebalancing:
{0: 48332, 1: 24392}

Generating visualizations...

Recommendations for Fair Model Training:
1. Use the rebalanced dataset for initial model training
2. Implement stratified cross-validation using both race and gender
3. Apply the calculated sample weights during model training
4. Monitor and validate performance separately for each demographic group
5. Consider implementing fairness constraints in the model objective function


## Output
 Initial analysis revealed imbalances in group representation, risk scores, and enrollment rates, with white individuals making up the majority and black individuals underrepresented. Risk scores and enrollment rates also varied slightly across these groups. To mitigate these disparities, fair sampling weights were calculated and applied, increasing the dataset size from 48,784 to 72,724 samples. The resulting class distribution became more balanced, enabling a fairer foundation for model training. Recommendations include using the rebalanced data, applying stratified cross-validation across race and gender,

In [23]:
#Old code given by llm
#1. **Two-Stage Rebalancing Approach:**
# Stage 1: Demographic balancing
#demographic_weights = max_group_size / group_size

# Stage 2: Outcome balancing within groups
#outcome_weights = 1 / (positive_samples / group_size)

# Combined weights
#final_weights = demographic_weights * outcome_weights

#2. **Stratified Sampling Implementation:**
# Stratification by both race and gender
#stratify_columns = ['race', 'dem_female']
#train_test_split(stratify=df[stratify_columns])


#New code:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# 1. Load data
df = pd.read_csv("data_new.csv")

# 2. Build intersectional groups
df['group'] = df['race'] + '_' + df['dem_female'].astype(str)

# 3. Compute two‐stage rebalancing weights
group_counts      = df['group'].value_counts()
max_group_size    = group_counts.max()
demographic_weights = df['group'].map(lambda g: max_group_size / group_counts[g])

pos_counts      = df[df['program_enrolled_t'] == 1]['group'].value_counts()
outcome_weights = df['group'].map(
    lambda g: 1.0 / (pos_counts[g] / group_counts[g]) if g in pos_counts else 1.0
)

df['final_weight'] = demographic_weights * outcome_weights

# 4. Create strata for stratified splitting
df['strata'] = df['group']  # preserves race+gender mix

# 5. Define X, y, and weight vector w
y = df['program_enrolled_t']
X = df.drop(columns=['program_enrolled_t', 'group', 'final_weight', 'strata'])
w = df['final_weight']

# 6. Stratified train/test split
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
    X, y, w, test_size=0.20, stratify=df['strata'], random_state=42
)

# 7. Preprocessing pipeline (impute → encode/scale)
categorical_cols = ['race', 'dem_female']
numerical_cols   = [c for c in X_train.columns if c not in categorical_cols]

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot',  OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler',   StandardScaler())
])

preprocessor = ColumnTransformer([
    ('cat', cat_pipeline, categorical_cols),
    ('num', num_pipeline, numerical_cols)
], remainder='drop')

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed  = preprocessor.transform(X_test)

# 8. Train the model with sample weights
model = RandomForestClassifier(random_state=42)
model.fit(
    X_train_processed,
    y_train,
    sample_weight=w_train
)

# 9. Evaluate overall performance
y_pred  = model.predict(X_test_processed)
y_proba = model.predict_proba(X_test_processed)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC-AUC:",  roc_auc_score(y_test, y_proba))


Accuracy: 0.9902634006354413
ROC-AUC: 0.8709001078560612


## Comment
Changes made to original code:

Expanded weight computation: Replaced the simple placeholders (demographic_weights = max_group_size / group_size, outcome_weights = 1 / (…)) with actual Pandas mappings that calculate per-row demographic_weights, outcome_weights, and multiply into df['final_weight'].

Built explicit “strata” column: Added df['strata'] = df['group'] so train_test_split(..., stratify=...) actually preserves race+gender mixes.

Defined feature/target/weight arrays: Specified y, X, and w = df['final_weight'] instead of leaving them implicit.

Passed weights into split: Changed train_test_split to accept X, y, w and return w_train, w_test aligned with the splits.

Added full preprocessing: Introduced a ColumnTransformer with separate pipelines for imputing/scaling numerics and imputing/one-hot encoding categoricals—without this, model.fit would error on NaNs or strings.

Instantiated and trained a real model: Created a RandomForestClassifier and called fit(X_train_processed, y_train, sample_weight=w_train), rather than leaving model.fit(...) undefined.

Evaluation step: Included predict, predict_proba, and printed overall accuracy and ROC-AUC, so you get immediate feedback instead of a silent placeholder.

In [24]:
#2. **Feature Engineering:**
# Add interaction terms to capture intersectional effects
df['race_gender_interaction'] = df['race'] + '_' + df['dem_female']

#3. **Validation Strategy:**
# K-fold cross-validation with multiple stratification
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True)
for train_idx, val_idx in skf.split(X, df[stratify_columns]):
    # Ensure balanced representation in each fold

SyntaxError: incomplete input (3833615725.py, line 10)

## Comment
Did not finish code, only gave some recommendation. Not clear what to do next.

In [25]:
#**3. Model Training Recommendations:**
#A. **Fairness-Aware Training:**
#1. **Sample Weights:**
# Apply calculated weights during model training
model.fit(
    X_train_processed,
    y_train,
    sample_weight=w_train
)


## Comment
Only needed to change variables

In [28]:
#Old code provided by LLM
#2. **Custom Loss Function:**
#def fair_loss(y_true, y_pred, group_membership):
    # Base loss
#    base_loss = binary_crossentropy(y_true, y_pred)
    
    # Fairness penalty
#    fairness_penalty = calculate_group_disparity(y_true, y_pred, group_membership)
    
#    return base_loss + lambda_fairness * fairness_penalty


#B. **Monitoring and Validation:**
#1. **Group-Specific Metrics:**
#for group in groups:
#    group_metrics = evaluate_model(model, X[group_mask], y[group_mask])
#    monitor_group_performance(group, group_metrics)

#2. **Fairness Constraints:**
# Implement demographic parity constraint
#demographic_parity_diff = abs(group1_pred_rate - group2_pred_rate)
#assert demographic_parity_diff <= threshold

#New code:
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score

# — assume you already did —
# df = pd.read_csv("data_new.csv")
# df['group'] = df['race'] + '_' + df['dem_female'].astype(str)
# X_train_processed, X_test_processed, y_train, y_test, model have been defined

# For convenience:
groups_test = df.loc[y_test.index, 'group'].values
probs_test  = model.predict_proba(X_test_processed)[:,1]
preds_test  = (probs_test > 0.5).astype(int)

# Hyperparameters
lambda_fairness = 0.5
threshold       = 0.05  # max allowed parity gap

# 1. Custom loss & helper
def calculate_group_disparity(y_true, y_pred, groups):
    """Max absolute difference between group-wise mean(pred) and mean(true)."""
    disparities = []
    for g in np.unique(groups):
        mask = (groups == g)
        disparities.append(abs(y_pred[mask].mean() - y_true[mask].mean()))
    return max(disparities)

def fair_loss(y_true, y_pred, groups, lam):
    """Log‐loss plus fairness penalty."""
    base = log_loss(y_true, y_pred)
    pen  = calculate_group_disparity(y_true, y_pred, groups)
    return base + lam * pen

# Compute and print the fair loss on your test set
fl = fair_loss(y_test.values, probs_test, groups_test, lambda_fairness)
print(f"Fair loss (logloss + λ·disparity): {fl:.4f}")

# 2. Monitoring & group-specific metrics
print("\nPer-group performance:")
for g in np.unique(groups_test):
    m = (groups_test == g)
    acc = accuracy_score(y_test[m], preds_test[m])
    auc = roc_auc_score(y_test[m], probs_test[m])
    print(f" - {g}:  acc={acc:.3f},  AUC={auc:.3f}")

# 3. Demographic-parity check between the two largest groups
grp_counts = pd.Series(groups_test).value_counts()
g1, g2 = grp_counts.index[:2]    # top two
rate1 = preds_test[groups_test==g1].mean()
rate2 = preds_test[groups_test==g2].mean()
diff  = abs(rate1 - rate2)
print(f"\nDemographic parity gap ({g1} vs {g2}): {diff:.3f}")
assert diff <= threshold, f"Parity gap {diff:.3f} exceeds threshold {threshold}"


Fair loss (logloss + λ·disparity): 0.0736

Per-group performance:
 - black_0:  acc=0.987,  AUC=0.770
 - black_1:  acc=0.985,  AUC=0.914
 - white_0:  acc=0.989,  AUC=0.846
 - white_1:  acc=0.992,  AUC=0.886

Demographic parity gap (white_1 vs white_0): 0.000


## Output
Fair-loss score of 0.0736 combines log-loss and the fairness penalty, so lower is better, showing the model is both accurate and balanced across groups. All four subgroups have high accuracy (98.5–99.2 %) and strong ROC AUCs, though it’s weakest on Black men (AUC 0.770) versus Black women (0.914) or White women (0.886). The demographic-parity gap between White women and White men is 0.000, meaning they receive identical positive prediction rates.

### Prompt 2

### Fairness metric file

In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('data_new.csv')

def calculate_fairness_metrics(df, group_columns):
    # Convert risk score to binary prediction (using median as threshold)
    median_risk = df['risk_score_t'].median()
    df['predicted_high_risk'] = (df['risk_score_t'] > median_risk).astype(int)
    
    # Use program enrollment as actual outcome
    df['actual_outcome'] = df['program_enrolled_t']
    
    # Calculate metrics by group
    metrics = {}
    
    # Create group combinations
    df['group_key'] = df[group_columns].apply(lambda x: '_'.join(x.astype(str)), axis=1)
    groups = df['group_key'].unique()
    
    for group in groups:
        group_mask = df['group_key'] == group
        group_data = df[group_mask]
        
        if len(group_data) == 0:
            continue
            
        # Prediction rates
        pred_rate = group_data['predicted_high_risk'].mean()
        actual_rate = group_data['actual_outcome'].mean()
        
        # Calculate confusion matrix
        tn, fp, fn, tp = confusion_matrix(
            group_data['actual_outcome'],
            group_data['predicted_high_risk']
        ).ravel()
        
        # Calculate metrics
        fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
        fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
        tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
        ppv = tp / (tp + fp) if (tp + fp) > 0 else 0  # Positive Predictive Value
        
        # Calculate equal opportunity difference (true positive rate difference)
        # and demographic parity difference
        metrics[group] = {
            'group_size': len(group_data),
            'prediction_rate': pred_rate,
            'actual_rate': actual_rate,
            'false_positive_rate': fpr,
            'false_negative_rate': fnr,
            'true_positive_rate': tpr,
            'positive_predictive_value': ppv
        }
    
    return metrics

def calculate_fairness_differences(metrics):
    # Find reference group (usually the majority group)
    reference_group = max(metrics.items(), key=lambda x: x[1]['group_size'])[0]
    ref_metrics = metrics[reference_group]
    
    differences = {}
    for group in metrics:
        if group != reference_group:
            group_metrics = metrics[group]
            
            # Calculate differences in key metrics
            differences[group] = {
                'equal_opportunity_diff': group_metrics['true_positive_rate'] - ref_metrics['true_positive_rate'],
                'demographic_parity_diff': group_metrics['prediction_rate'] - ref_metrics['prediction_rate'],
                'fpr_diff': group_metrics['false_positive_rate'] - ref_metrics['false_positive_rate'],
                'ppv_diff': group_metrics['positive_predictive_value'] - ref_metrics['positive_predictive_value']
            }
    
    return differences, reference_group

def plot_fairness_metrics(metrics, group_columns):
    groups = list(metrics.keys())
    metrics_to_plot = ['prediction_rate', 'false_positive_rate', 'false_negative_rate', 'true_positive_rate']
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.ravel()
    
    for idx, metric in enumerate(metrics_to_plot):
        values = [m[metric] for m in metrics.values()]
        ax = axes[idx]
        ax.bar(groups, values)
        ax.set_title(f'{metric.replace("_", " ").title()}')
        ax.set_xticklabels(groups, rotation=45, ha='right')
        ax.set_ylabel('Rate')
    
    plt.tight_layout()
    plt.savefig('fairness_metrics.png')
    plt.close()

# Calculate metrics for different demographic intersections
group_columns = ['race', 'dem_female']  # Adding gender intersection
metrics = calculate_fairness_metrics(df, group_columns)
differences, reference_group = calculate_fairness_differences(metrics)

# Print results
print("\nFairness Metrics Analysis")
print("=" * 50)
print(f"\nReference group: {reference_group}")

print("\nDetailed Metrics by Group:")
for group, group_metrics in metrics.items():
    print(f"\n{group} (n={group_metrics['group_size']}):")
    for metric, value in group_metrics.items():
        if metric != 'group_size':
            print(f"  {metric}: {value:.3f}")

print("\nFairness Differences (compared to reference group):")
for group, diff_metrics in differences.items():
    print(f"\n{group}:")
    for metric, value in diff_metrics.items():
        # Add interpretation threshold
        threshold = 0.1  # 10% difference threshold
        interpretation = ""
        if abs(value) > threshold:
            interpretation = " [SIGNIFICANT DISPARITY]"
        print(f"  {metric}: {value:.3f}{interpretation}")

# Create visualizations
plot_fairness_metrics(metrics, group_columns) 


Fairness Metrics Analysis

Reference group: white_1

Detailed Metrics by Group:

white_0 (n=16125):
  prediction_rate: 0.502
  actual_rate: 0.010
  false_positive_rate: 0.497
  false_negative_rate: 0.006
  true_positive_rate: 0.994
  positive_predictive_value: 0.020

white_1 (n=27077):
  prediction_rate: 0.493
  actual_rate: 0.008
  false_positive_rate: 0.489
  false_negative_rate: 0.024
  true_positive_rate: 0.976
  positive_predictive_value: 0.015

black_1 (n=3686):
  prediction_rate: 0.540
  actual_rate: 0.014
  false_positive_rate: 0.533
  false_negative_rate: 0.000
  true_positive_rate: 1.000
  positive_predictive_value: 0.026

black_0 (n=1896):
  prediction_rate: 0.483
  actual_rate: 0.015
  false_positive_rate: 0.476
  false_negative_rate: 0.071
  true_positive_rate: 0.929
  positive_predictive_value: 0.028

Fairness Differences (compared to reference group):

white_0:
  equal_opportunity_diff: 0.018
  demographic_parity_diff: 0.009
  fpr_diff: 0.008
  ppv_diff: 0.004

black_1:

  ax.set_xticklabels(groups, rotation=45, ha='right')
  ax.set_xticklabels(groups, rotation=45, ha='right')
  ax.set_xticklabels(groups, rotation=45, ha='right')
  ax.set_xticklabels(groups, rotation=45, ha='right')


## Output
The analysis uses each group’s median risk score to make a binary “high‐risk” prediction and then compares that to actual program enrollment. White women (white_1) serve as the reference.

Overall behavior: All four groups have very low enrollment rates (~0.8–1.5%) and the model predicts “high risk” for about 48–54% of cases, so it makes many false positives.

True-positive rates (TPR) are highest for Black women (black_1: 1.000) and lowest for Black men (black_0: 0.929), so Black men are the only group whose equal-opportunity diff (–0.048) dips below the reference.

Demographic-parity diffs (difference in how often the model flags “high risk”) range from –0.010 (black_0) to +0.047 (black_1), meaning Black women are flagged slightly more often than White women, and Black men slightly less.

False-positive rate (FPR) differences track these parity gaps (±0.008–0.044), and positive-predictive values (PPV) are all under 3%.

Because none of these differences exceed the 10% threshold, the model meets the chosen fairness guardrails overall. The largest shortfall is that Black men see about a 4.8% lower true-positive rate than White women, suggesting you might tune the model further if equal opportunity for that subgroup is a priority.

### Prompt 3

In [6]:
pip install imbalanced-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### Evaluation file

In [29]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.combine import SMOTEENN
from collections import Counter

# Load the data
print("Loading and preparing data...")
df = pd.read_csv('data_new.csv')

def analyze_group_distributions(df):
    # Create intersectional groups
    df['group'] = df['race'] + '_' + df['dem_female'].astype(str)
    
    # Calculate group sizes and proportions
    group_stats = pd.DataFrame({
        'size': df.groupby('group').size(),
        'proportion': df.groupby('group').size() / len(df) * 100
    })
    
    # Calculate prediction rates per group
    group_stats['prediction_rate'] = df.groupby('group')['risk_score_t'].mean()
    group_stats['enrollment_rate'] = df.groupby('group')['program_enrolled_t'].mean()
    
    return group_stats

def plot_group_distributions(group_stats):
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 12))
    
    # Plot group sizes
    group_stats['size'].plot(kind='bar', ax=ax1)
    ax1.set_title('Group Sizes')
    ax1.set_ylabel('Number of Samples')
    ax1.tick_params(axis='x', rotation=45)
    
    # Plot rates
    rates = group_stats[['prediction_rate', 'enrollment_rate']]
    rates.plot(kind='bar', ax=ax2)
    ax2.set_title('Prediction and Enrollment Rates by Group')
    ax2.set_ylabel('Rate')
    ax2.tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.savefig('group_distributions.png')
    plt.close()

def evaluate_imbalance_severity(group_stats):
    # Calculate imbalance metrics
    max_size = group_stats['size'].max()
    min_size = group_stats['size'].min()
    imbalance_ratio = max_size / min_size
    
    # Calculate coefficient of variation
    cv = group_stats['size'].std() / group_stats['size'].mean()
    
    return {
        'imbalance_ratio': imbalance_ratio,
        'coefficient_of_variation': cv,
        'size_range': f"{min_size:,} - {max_size:,}"
    }

def prepare_data_for_rebalancing(df):
    # Create features and target
    X = df[['risk_score_t']]  # Using risk score as feature for demonstration
    y = df['program_enrolled_t']  # Program enrollment as target
    groups = df['race'] + '_' + df['dem_female'].astype(str)
    
    return X, y, groups

def apply_rebalancing_techniques(X, y, groups):
    results = {}
    
    # Original distribution
    results['original'] = dict(Counter(y))
    
    # SMOTE oversampling
    smote = SMOTE(random_state=42)
    X_smote, y_smote = smote.fit_resample(X, y)
    results['smote'] = dict(Counter(y_smote))
    
    # Random undersampling
    rus = RandomUnderSampler(random_state=42)
    X_rus, y_rus = rus.fit_resample(X, y)
    results['undersampling'] = dict(Counter(y_rus))
    
    # Combined SMOTE+ENN
    smoteenn = SMOTEENN(random_state=42)
    X_smoteenn, y_smoteenn = smoteenn.fit_resample(X, y)
    results['smoteenn'] = dict(Counter(y_smoteenn))
    
    return results

def plot_rebalancing_results(rebalancing_results):
    techniques = list(rebalancing_results.keys())
    fig, ax = plt.subplots(figsize=(12, 6))
    
    x = np.arange(len(techniques))
    width = 0.35
    
    # Plot class distributions for each technique
    for i, technique in enumerate(techniques):
        counts = rebalancing_results[technique]
        ax.bar(x[i], counts[0], width, label='Class 0', color='skyblue')
        ax.bar(x[i], counts[1], width, bottom=counts[0], label='Class 1' if i == 0 else "", color='lightcoral')
    
    ax.set_ylabel('Number of Samples')
    ax.set_title('Class Distribution After Different Rebalancing Techniques')
    ax.set_xticks(x)
    ax.set_xticklabels(techniques, rotation=45)
    ax.legend()
    
    plt.tight_layout()
    plt.savefig('rebalancing_results.png')
    plt.close()

# Main analysis
print("\nAnalyzing group distributions...")
group_stats = analyze_group_distributions(df)
print("\nGroup Statistics:")
print(group_stats)

print("\nEvaluating imbalance severity...")
imbalance_metrics = evaluate_imbalance_severity(group_stats)
print("\nImbalance Metrics:")
for metric, value in imbalance_metrics.items():
    print(f"{metric}: {value}")

print("\nPreparing data for rebalancing...")
X, y, groups = prepare_data_for_rebalancing(df)

print("\nApplying rebalancing techniques...")
rebalancing_results = apply_rebalancing_techniques(X, y, groups)
print("\nRebalancing Results:")
for technique, counts in rebalancing_results.items():
    print(f"{technique}: {counts}")

# Generate visualizations
print("\nGenerating visualizations...")
plot_group_distributions(group_stats)
plot_rebalancing_results(rebalancing_results)

# Recommendations based on imbalance ratio
print("\nRecommendations:")
if imbalance_metrics['imbalance_ratio'] > 10:
    print("- Severe imbalance detected. Consider using SMOTEENN for balanced performance.")
    print("- Implement cost-sensitive learning or class weights.")
    print("- Consider collecting more data for minority groups.")
elif imbalance_metrics['imbalance_ratio'] > 3:
    print("- Moderate imbalance detected. Consider using SMOTE for minority class oversampling.")
    print("- Evaluate performance with and without rebalancing.")
else:
    print("- Mild imbalance. Standard techniques may be sufficient.")
    print("- Monitor performance across groups during training.") 

Loading and preparing data...

Analyzing group distributions...

Group Statistics:
          size  proportion  prediction_rate  enrollment_rate
group                                                       
black_0   1896    3.886520         5.555866         0.014768
black_1   3686    7.555756         5.281573         0.013836
white_0  16125   33.053870         4.389017         0.010047
white_1  27077   55.503854         4.194229         0.007793

Evaluating imbalance severity...

Imbalance Metrics:
imbalance_ratio: 14.281118143459915
coefficient_of_variation: 0.9648296832362823
size_range: 1,896 - 27,077

Preparing data for rebalancing...

Applying rebalancing techniques...

Rebalancing Results:
original: {0: 48332, 1: 452}
smote: {0: 48332, 1: 48332}
undersampling: {0: 452, 1: 452}
smoteenn: {0: 46402, 1: 38024}

Generating visualizations...

Recommendations:
- Severe imbalance detected. Consider using SMOTEENN for balanced performance.
- Implement cost-sensitive learning or class weig

### Output
The results show a highly skewed dataset both by demographic group and by outcome. Intersectionally, White women make up 55.5% of the data while Black men are with only 3.9%, and the largest group is more than 14× bigger than the smallest (imbalance ratio ≈14.3, CV≈0.96). In the raw outcome distribution there are 48,332 negatives versus just 452 positives a 99:1 split.

When applying SMOTE, oversample until each class has 48,332 examples; with random undersampling shrink both classes to 452; and with SMOTE+ENN end up with about 46,402 negatives and 38,024 positives, a much more balanced compromise. Because the imbalance ratio exceeds 10, the code recommends SMOTEENN for best class balance, using cost-sensitive learning or class weights in the model.
