<a href="https://colab.research.google.com/github/mmfara/RisCanvi/blob/main/Confidence_Interval_for_COMPAS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook calculates the mean and 95% confidence interval for each oversampling strategy, fairness metric, and model across various sampling techniques.

Standard Oversampling Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 1.9849, "KNN": 1.5289, "RF": 1.9908, "SVM": 1.7432, "DT": 2.0194, "NB": 3.0865, "GraB": 1.7243
            },
            "SMOTE": {
                "LR": 1.9763, "KNN": 1.5567, "RF": 2.0367, "SVM": 1.8800, "DT": 2.2134, "NB": 3.0865, "GraB": 1.7297
            },
            "SMOTETomek": {
                "LR": 1.9840, "KNN": 1.5467, "RF": 1.9270, "SVM": 1.7321, "DT": 1.9367, "NB": 2.7688, "GraB": 1.7297
            },
            "SMOTEENN": {
                "LR": 2.2346, "KNN": 2.0506, "RF": 2.0076, "SVM": 1.9886, "DT": 1.9903, "NB": 2.4696, "GraB": 2.0057
            },
            "SMOTEN": {
                "LR": 2.0224, "KNN": 1.6414, "RF": 2.1238, "SVM": 1.7602, "DT": 1.9364, "NB": 3.1651, "GraB": 1.7004
            },
            "KMeans-SMOTE": {
                "LR": 2.1249, "KNN": 1.5589, "RF": 2.0716, "SVM": 1.8867, "DT": 2.1833, "NB": 3.2116, "GraB": 2.0137
            },
            "Borderline-SMOTE": {
                "LR": 1.9796, "KNN": 1.6545, "RF": 2.0952, "SVM": 1.9309, "DT": 2.1947, "NB": 2.9429, "GraB": 1.7668
            },
            "SVM-SMOTE": {
                "LR": 1.9352, "KNN": 1.5769, "RF": 2.1752, "SVM": 1.7794, "DT": 2.0184, "NB": 3.0192, "GraB": 1.7967
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.2486, "KNN": 0.1515, "RF": 0.2581, "SVM": 0.2273, "DT": 0.2425, "NB": 0.1924, "GraB": 0.2192
            },
            "SMOTE": {
                "LR": 0.2480, "KNN": 0.1622, "RF": 0.2650, "SVM": 0.2321, "DT": 0.2690, "NB": 0.1924, "GraB": 0.2232
            },
            "SMOTETomek": {
                "LR": 0.2516, "KNN": 0.1610, "RF": 0.2490, "SVM": 0.2227, "DT": 0.2228, "NB": 0.1746, "GraB": 0.2232
            },
            "SMOTEENN": {
                "LR": 0.2797, "KNN": 0.2567, "RF": 0.2478, "SVM": 0.2656, "DT": 0.2163, "NB": 0.2212, "GraB": 0.2408
            },
            "SMOTEN": {
                "LR": 0.2564, "KNN": 0.1816, "RF": 0.2673, "SVM": 0.2325, "DT": 0.2258, "NB": 0.1997, "GraB": 0.2153
            },
            "KMeans-SMOTE": {
                "LR": 0.2548, "KNN": 0.1574, "RF": 0.2549, "SVM": 0.2253, "DT": 0.2413, "NB": 0.1789, "GraB": 0.2444
            },
            "Borderline-SMOTE": {
                "LR": 0.2520, "KNN": 0.1843, "RF": 0.2835, "SVM": 0.2485, "DT": 0.2784, "NB": 0.1886, "GraB": 0.2395
            },
            "SVM-SMOTE": {
                "LR": 0.2421, "KNN": 0.1727, "RF": 0.2947, "SVM": 0.2308, "DT": 0.2340, "NB": 0.1862, "GraB": 0.2424
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.2531, "KNN": 0.1432, "RF": 0.2613, "SVM": 0.1993, "DT": 0.2374, "NB": 0.2643, "GraB": 0.1995
            },
            "SMOTE": {
                "LR": 0.2551, "KNN": 0.1450, "RF": 0.2572, "SVM": 0.2245, "DT": 0.2455, "NB": 0.2643, "GraB": 0.1973
            },
            "SMOTETomek": {
                "LR": 0.2510, "KNN": 0.1532, "RF": 0.2367, "SVM": 0.1912, "DT": 0.1904, "NB": 0.2357, "GraB": 0.2056
            },
            "SMOTEENN": {
                "LR": 0.2900, "KNN": 0.2533, "RF": 0.2347, "SVM": 0.2590, "DT": 0.2374, "NB": 0.2508, "GraB": 0.2369
            },
            "SMOTEN": {
                "LR": 0.2572, "KNN": 0.1778, "RF": 0.2718, "SVM": 0.2034, "DT": 0.2088, "NB": 0.2724, "GraB": 0.2015
            },
            "KMeans-SMOTE": {
                "LR": 0.2534, "KNN": 0.1391, "RF": 0.2554, "SVM": 0.2226, "DT": 0.2541, "NB": 0.2504, "GraB": 0.2266
            },
            "Borderline-SMOTE": {
                "LR": 0.2510, "KNN": 0.1736, "RF": 0.2877, "SVM": 0.2387, "DT": 0.2964, "NB": 0.2520, "GraB": 0.2134
            },
            "SVM-SMOTE": {
                "LR": 0.2469, "KNN": 0.1796, "RF": 0.2897, "SVM": 0.2118, "DT": 0.2026, "NB": 0.2562, "GraB": 0.2218
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.1819, "KNN": 0.1049, "RF": 0.1991, "SVM": 0.1846, "DT": 0.1961, "NB": 0.0912, "GraB": 0.1714
            },
            "SMOTE": {
                "LR": 0.1792, "KNN": 0.1218, "RF": 0.2140, "SVM": 0.1776, "DT": 0.2370, "NB": 0.0912, "GraB": 0.1783
            },
            "SMOTETomek": {
                "LR": 0.1898, "KNN": 0.1138, "RF": 0.1996, "SVM": 0.1831, "DT": 0.1968, "NB": 0.0838, "GraB": 0.1745
            },
            "SMOTEENN": {
                "LR": 0.2064, "KNN": 0.2014, "RF": 0.1937, "SVM": 0.2086, "DT": 0.1368, "NB": 0.1447, "GraB": 0.1779
            },
            "SMOTEN": {
                "LR": 0.1930, "KNN": 0.1308, "RF": 0.2078, "SVM": 0.1910, "DT": 0.1898, "NB": 0.0976, "GraB": 0.1650
            },
            "KMeans-SMOTE": {
                "LR": 0.1922, "KNN": 0.1192, "RF": 0.1962, "SVM": 0.1650, "DT": 0.1790, "NB": 0.0834, "GraB": 0.1948
            },
            "Borderline-SMOTE": {
                "LR": 0.1898, "KNN": 0.1367, "RF": 0.2213, "SVM": 0.1960, "DT": 0.2109, "NB": 0.0934, "GraB": 0.1941
            },
            "SVM-SMOTE": {
                "LR": 0.1744, "KNN": 0.1121, "RF": 0.2389, "SVM": 0.1831, "DT": 0.2069, "NB": 0.0870, "GraB": 0.1962
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.6711, "KNN": 0.6433, "RF": 0.6547, "SVM": 0.6698, "DT": 0.6414, "NB": 0.6395, "GraB": 0.6667
            },
            "SMOTE": {
                "LR": 0.6711, "KNN": 0.6446, "RF": 0.6566, "SVM": 0.6635, "DT": 0.6427, "NB": 0.6395, "GraB": 0.6711
            },
            "SMOTETomek": {
                "LR": 0.6686, "KNN": 0.6433, "RF": 0.6597, "SVM": 0.6692, "DT": 0.6433, "NB": 0.6338, "GraB": 0.6648
            },
            "SMOTEENN": {
                "LR": 0.6780, "KNN": 0.6591, "RF": 0.6742, "SVM": 0.6673, "DT": 0.6730, "NB": 0.6547, "GraB": 0.6787
            },
            "SMOTEN": {
                "LR": 0.6705, "KNN": 0.6458, "RF": 0.6553, "SVM": 0.6692, "DT": 0.6383, "NB": 0.6402, "GraB": 0.6616
            },
            "KMeans-SMOTE": {
                "LR": 0.6742, "KNN": 0.6420, "RF": 0.6604, "SVM": 0.6686, "DT": 0.6484, "NB": 0.6275, "GraB": 0.6723
            },
            "Borderline-SMOTE": {
                "LR": 0.6705, "KNN": 0.6509, "RF": 0.6610, "SVM": 0.6629, "DT": 0.6496, "NB": 0.6408, "GraB": 0.6723
            },
            "SVM-SMOTE": {
                "LR": 0.6723, "KNN": 0.6477, "RF": 0.6635, "SVM": 0.6660, "DT": 0.6446, "NB": 0.6383, "GraB": 0.6648
            }
        }
    }
}

results = {}

for metric, methods in data["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    2.030238  1.946943  2.113532
KNN   1.639325  1.495440  1.783210
RF    2.053487  1.987099  2.119876
SVM   1.837637  1.756993  1.918282
DT    2.061575  1.963887  2.159263
NB    2.968775  2.764578  3.172972
GraB  1.808375  1.701654  1.915096

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR    0.254150  0.244759  0.263541
KNN   0.178425  0.150272  0.206578
RF    0.265038  0.251152  0.278923
SVM   0.235600  0.223556  0.247644
DT    0.241263  0.222821  0.259704
NB    0.191750  0.179795  0.203705
GraB  0.231000  0.221082  0.240918

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.257213  0.245844  0.268581
KNN   0.170600  0.139507  0.201693
RF    0.261813  0.244575  0.279050
SVM   0.218812  0.200153  0.237472
DT    0.234075  0.205856  0.262294
NB    0.255763  0.246321  0.265204
GraB  0.212825  0.200796  0.224854

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR    0.18

Oversampling Based on Sensitive Attributes Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data_2 = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 2.1080, "KNN": 1.6249, "RF": 1.8611, "SVM": 2.0327, "DT": 2.2134, "NB": 3.0815, "GraB": 2.0051
            },
            "SMOTE": {
                "LR": 2.2123, "KNN": 1.7683, "RF": 2.0996, "SVM": 2.2649, "DT": 1.9334, "NB": 3.0495, "GraB": 1.9675
            },
            "SMOTETomek": {
                "LR": 2.2218, "KNN": 1.6921, "RF": 2.2726, "SVM": 2.2701, "DT": 1.8961, "NB": 3.0130, "GraB": 1.9819
            },
            "SMOTEENN": {
                "LR": 2.4893, "KNN": 1.3786, "RF": 1.7786, "SVM": 2.1696, "DT": 1.0551, "NB": 5.5508, "GraB": 1.1229
            },
            "SMOTEN": {
                "LR": 2.0392, "KNN": 1.6669, "RF": 1.8057, "SVM": 1.8948, "DT": 1.6618, "NB": 2.7723, "GraB": 2.0507
            },
            "KMeans-SMOTE": {
                "LR": 2.1470, "KNN": 1.7602, "RF": 2.2026, "SVM": 2.0993, "DT": 2.4145, "NB": 3.0646, "GraB": 2.0893
            },
            "Borderline-SMOTE": {
                "LR": 2.1598, "KNN": 1.7434, "RF": 2.0287, "SVM": 1.9729, "DT": 2.0259, "NB": 2.9182, "GraB": 2.0157
            },
            "SVM-SMOTE": {
                "LR": 1.9108, "KNN": 1.3700, "RF": 2.0106, "SVM": 1.8524, "DT": 1.9512, "NB": 3.0601, "GraB": 1.9234
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.2492, "KNN": 0.1648, "RF": 0.2299, "SVM": 0.2356, "DT": 0.2690, "NB": 0.2021, "GraB": 0.2423
            },
            "SMOTE": {
                "LR": 0.2570, "KNN": 0.2001, "RF": 0.2651, "SVM": 0.2538, "DT": 0.2054, "NB": 0.1990, "GraB": 0.2286
            },
            "SMOTETomek": {
                "LR": 0.2550, "KNN": 0.1893, "RF": 0.2759, "SVM": 0.2548, "DT": 0.2001, "NB": 0.2020, "GraB": 0.2272
            },
            "SMOTEENN": {
                "LR": 0.2434, "KNN": 0.1305, "RF": 0.2406, "SVM": 0.2612, "DT": 0.0257, "NB": 0.2504, "GraB": 0.0479
            },
            "SMOTEN": {
                "LR": 0.2421, "KNN": 0.1942, "RF": 0.2203, "SVM": 0.2273, "DT": 0.1810, "NB": 0.1807, "GraB": 0.2482
            },
            "KMeans-SMOTE": {
                "LR": 0.2450, "KNN": 0.2128, "RF": 0.2724, "SVM": 0.2401, "DT": 0.2838, "NB": 0.2071, "GraB": 0.2520
            },
            "Borderline-SMOTE": {
                "LR": 0.2496, "KNN": 0.2057, "RF": 0.2530, "SVM": 0.2251, "DT": 0.2291, "NB": 0.1769, "GraB": 0.2399
            },
            "SVM-SMOTE": {
                "LR": 0.2240, "KNN": 0.1227, "RF": 0.2518, "SVM": 0.2110, "DT": 0.2155, "NB": 0.2000, "GraB": 0.2316
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.2534, "KNN": 0.1557, "RF": 0.2512, "SVM": 0.2392, "DT": 0.2926, "NB": 0.2785, "GraB": 0.2391
            },
            "SMOTE": {
                "LR": 0.2638, "KNN": 0.1984, "RF": 0.2594, "SVM": 0.2599, "DT": 0.1845, "NB": 0.2703, "GraB": 0.2147
            },
            "SMOTETomek": {
                "LR": 0.2598, "KNN": 0.1860, "RF": 0.2761, "SVM": 0.2599, "DT": 0.1721, "NB": 0.2723, "GraB": 0.2147
            },
            "SMOTEENN": {
                "LR": 0.2668, "KNN": 0.1344, "RF": 0.2283, "SVM": 0.2822, "DT": 0.0667, "NB": 0.3363, "GraB": 0.0609
            },
            "SMOTEN": {
                "LR": 0.2451, "KNN": 0.1942, "RF": 0.1999, "SVM": 0.2246, "DT": 0.1983, "NB": 0.2397, "GraB": 0.2330
            },
            "KMeans-SMOTE": {
                "LR": 0.2536, "KNN": 0.1899, "RF": 0.2719, "SVM": 0.2414, "DT": 0.2763, "NB": 0.2784, "GraB": 0.2310
            },
            "Borderline-SMOTE": {
                "LR": 0.2576, "KNN": 0.1736, "RF": 0.2513, "SVM": 0.2207, "DT": 0.2088, "NB": 0.2399, "GraB": 0.2331
            },
            "SVM-SMOTE": {
                "LR": 0.2226, "KNN": 0.1141, "RF": 0.2491, "SVM": 0.1981, "DT": 0.2008, "NB": 0.2723, "GraB": 0.2186
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.1801, "KNN": 0.1215, "RF": 0.1562, "SVM": 0.1716, "DT": 0.1978, "NB": 0.0965, "GraB": 0.1857
            },
            "SMOTE": {
                "LR": 0.1881, "KNN": 0.1495, "RF": 0.2110, "SVM": 0.1877, "DT": 0.1669, "NB": 0.0970, "GraB": 0.1810
            },
            "SMOTETomek": {
                "LR": 0.1887, "KNN": 0.1388, "RF": 0.2191, "SVM": 0.1898, "DT": 0.1654, "NB": 0.1007, "GraB": 0.1779
            },
            "SMOTEENN": {
                "LR": 0.1689, "KNN": 0.0747, "RF": 0.2006, "SVM": 0.1868, "DT": -0.0421, "NB": 0.1473, "GraB": -0.0109
            },
            "SMOTEN": {
                "LR": 0.1742, "KNN": 0.1474, "RF": 0.1749, "SVM": 0.1671, "DT": 0.1124, "NB": 0.0901, "GraB": 0.2021
            },
            "KMeans-SMOTE": {
                "LR": 0.1734, "KNN": 0.1789, "RF": 0.2169, "SVM": 0.1780, "DT": 0.2351, "NB": 0.1049, "GraB": 0.2101
            },
            "Borderline-SMOTE": {
                "LR": 0.1791, "KNN": 0.1800, "RF": 0.1982, "SVM": 0.1643, "DT": 0.1896, "NB": 0.0849, "GraB": 0.1883
            },
            "SVM-SMOTE": {
                "LR": 0.1609, "KNN": 0.0765, "RF": 0.1930, "SVM": 0.1557, "DT": 0.1747, "NB": 0.0970, "GraB": 0.1824
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.6793, "KNN": 0.6389, "RF": 0.6553, "SVM": 0.6679, "DT": 0.6490, "NB": 0.6414, "GraB": 0.6622
            },
            "SMOTE": {
                "LR": 0.6749, "KNN": 0.6433, "RF": 0.6610, "SVM": 0.6705, "DT": 0.6528, "NB": 0.6420, "GraB": 0.6604
            },
            "SMOTETomek": {
                "LR": 0.6730, "KNN": 0.6446, "RF": 0.6585, "SVM": 0.6698, "DT": 0.6566, "NB": 0.6420, "GraB": 0.6622
            },
            "SMOTEENN": {
                "LR": 0.6610, "KNN": 0.6383, "RF": 0.6345, "SVM": 0.6610, "DT": 0.5884, "NB": 0.6212, "GraB": 0.6244
            },
            "SMOTEN": {
                "LR": 0.6780, "KNN": 0.6288, "RF": 0.6648, "SVM": 0.6686, "DT": 0.6496, "NB": 0.6370, "GraB": 0.6597
            },
            "KMeans-SMOTE": {
                "LR": 0.6780, "KNN": 0.6414, "RF": 0.6559, "SVM": 0.6686, "DT": 0.6547, "NB": 0.6427, "GraB": 0.6610
            },
            "Borderline-SMOTE": {
                "LR": 0.6761, "KNN": 0.6389, "RF": 0.6547, "SVM": 0.6749, "DT": 0.6540, "NB": 0.6338, "GraB": 0.6572
            },
            "SVM-SMOTE": {
                "LR": 0.6736, "KNN": 0.6395, "RF": 0.6660, "SVM": 0.6761, "DT": 0.6465, "NB": 0.6427, "GraB": 0.6616
            }
        }
    }
}

results = {}

for metric, methods in data_2["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    2.161025  2.021650  2.300400
KNN   1.625550  1.489703  1.761397
RF    2.007437  1.855293  2.159582
SVM   2.069587  1.936397  2.202778
DT    1.893925  1.554650  2.233200
NB    3.313750  2.553105  4.074395
GraB  1.894563  1.630485  2.158640

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR    0.245662  0.237118  0.254207
KNN   0.177513  0.148649  0.206376
RF    0.251125  0.234473  0.267777
SVM   0.238613  0.224176  0.253049
DT    0.201200  0.135198  0.267202
NB    0.202275  0.183676  0.220874
GraB  0.214713  0.157863  0.271562

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.252837  0.241179  0.264496
KNN   0.168288  0.142558  0.194017
RF    0.248400  0.227889  0.268911
SVM   0.240750  0.218549  0.262951
DT    0.200013  0.142381  0.257644
NB    0.273462  0.248427  0.298498
GraB  0.205638  0.156130  0.255145

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR    0.17

Oversampling Based on Sensitive Attributes Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data_3 = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 1.9334, "KNN": 1.6146, "RF": 1.9409, "SVM": 1.8463, "DT": 1.6439, "NB": 3.1109, "GraB": 1.9418
            },
            "SMOTE": {
                "LR": 1.9622, "KNN": 1.6358, "RF": 1.8267, "SVM": 1.8242, "DT": 1.9193, "NB": 3.2387, "GraB": 1.9595
            },
            "SMOTETomek": {
                "LR": 1.9428, "KNN": 1.5234, "RF": 2.0094, "SVM": 1.8544, "DT": 1.8356, "NB": 3.0388, "GraB": 1.8017
            },
            "SMOTEENN": {
                "LR": 1.7788, "KNN": 1.5976, "RF": 2.3267, "SVM": 1.9736, "DT": 2.4663, "NB": 2.5015, "GraB": 2.4734
            },
            "SMOTEN": {
                "LR": 1.9428, "KNN": 1.3130, "RF": 1.8642, "SVM": 1.8420, "DT": 1.8870, "NB": 3.0142, "GraB": 1.9913
            },
            "KMeans-SMOTE": {
                "LR": 1.9008, "KNN": 1.4528, "RF": 1.9491, "SVM": 2.0133, "DT": 1.9660, "NB": 2.8903, "GraB": 1.9646
            },
            "Borderline-SMOTE": {
                "LR": 1.9291, "KNN": 1.3695, "RF": 1.7834, "SVM": 2.0472, "DT": 1.9338, "NB": 3.0763, "GraB": 1.9239
            },
            "SVM-SMOTE": {
                "LR": 1.9193, "KNN": 1.5485, "RF": 1.8515, "SVM": 1.9285, "DT": 2.1006, "NB": 3.2396, "GraB": 1.8482
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.2054, "KNN": 0.1671, "RF": 0.2253, "SVM": 0.2040, "DT": 0.1646, "NB": 0.1742, "GraB": 0.2164
            },
            "SMOTE": {
                "LR": 0.2086, "KNN": 0.1718, "RF": 0.2033, "SVM": 0.1974, "DT": 0.1934, "NB": 0.1739, "GraB": 0.2220
            },
            "SMOTETomek": {
                "LR": 0.2075, "KNN": 0.1533, "RF": 0.2319, "SVM": 0.2046, "DT": 0.1758, "NB": 0.1716, "GraB": 0.1907
            },
            "SMOTEENN": {
                "LR": 0.2067, "KNN": 0.1770, "RF": 0.3371, "SVM": 0.2410, "DT": 0.3274, "NB": 0.2162, "GraB": 0.3385
            },
            "SMOTEN": {
                "LR": 0.2075, "KNN": 0.1064, "RF": 0.2111, "SVM": 0.2030, "DT": 0.1995, "NB": 0.1695, "GraB": 0.2278
            },
            "KMeans-SMOTE": {
                "LR": 0.2026, "KNN": 0.1407, "RF": 0.2304, "SVM": 0.2230, "DT": 0.2141, "NB": 0.1713, "GraB": 0.2201
            },
            "Borderline-SMOTE": {
                "LR": 0.1954, "KNN": 0.1148, "RF": 0.1851, "SVM": 0.2203, "DT": 0.1995, "NB": 0.1579, "GraB": 0.2048
            },
            "SVM-SMOTE": {
                "LR": 0.1978, "KNN": 0.1562, "RF": 0.2080, "SVM": 0.2073, "DT": 0.2262, "NB": 0.1703, "GraB": 0.1976
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.2128, "KNN": 0.1391, "RF": 0.2188, "SVM": 0.2128, "DT": 0.1331, "NB": 0.2402, "GraB": 0.1943
            },
            "SMOTE": {
                "LR": 0.2169, "KNN": 0.1433, "RF": 0.1800, "SVM": 0.2026, "DT": 0.1785, "NB": 0.2485, "GraB": 0.2044
            },
            "SMOTETomek": {
                "LR": 0.2128, "KNN": 0.1164, "RF": 0.2230, "SVM": 0.2128, "DT": 0.1667, "NB": 0.2402, "GraB": 0.1719
            },
            "SMOTEENN": {
                "LR": 0.2020, "KNN": 0.1981, "RF": 0.3099, "SVM": 0.2472, "DT": 0.3290, "NB": 0.2489, "GraB": 0.3308
            },
            "SMOTEN": {
                "LR": 0.2169, "KNN": 0.0691, "RF": 0.2045, "SVM": 0.2128, "DT": 0.1947, "NB": 0.2341, "GraB": 0.2045
            },
            "KMeans-SMOTE": {
                "LR": 0.2067, "KNN": 0.1287, "RF": 0.2288, "SVM": 0.2355, "DT": 0.2151, "NB": 0.2380, "GraB": 0.2106
            },
            "Borderline-SMOTE": {
                "LR": 0.2049, "KNN": 0.0859, "RF": 0.1802, "SVM": 0.2377, "DT": 0.1906, "NB": 0.2262, "GraB": 0.2046
            },
            "SVM-SMOTE": {
                "LR": 0.2028, "KNN": 0.1166, "RF": 0.1902, "SVM": 0.2130, "DT": 0.2256, "NB": 0.2445, "GraB": 0.1699
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.1343, "KNN": 0.1368, "RF": 0.1715, "SVM": 0.1381, "DT": 0.1349, "NB": 0.0834, "GraB": 0.1717
            },
            "SMOTE": {
                "LR": 0.1369, "KNN": 0.1447, "RF": 0.1646, "SVM": 0.1345, "DT": 0.1481, "NB": 0.0786, "GraB": 0.1732
            },
            "SMOTETomek": {
                "LR": 0.1385, "KNN": 0.1292, "RF": 0.1831, "SVM": 0.1387, "DT": 0.1388, "NB": 0.0786, "GraB": 0.1457
            },
            "SMOTEENN": {
                "LR": 0.1465, "KNN": 0.1068, "RF": 0.3006, "SVM": 0.1771, "DT": 0.2748, "NB": 0.1390, "GraB": 0.2902
            },
            "SMOTEN": {
                "LR": 0.1343, "KNN": 0.0833, "RF": 0.1577, "SVM": 0.1360, "DT": 0.1494, "NB": 0.0807, "GraB": 0.1875
            },
            "KMeans-SMOTE": {
                "LR": 0.1353, "KNN": 0.0993, "RF": 0.1694, "SVM": 0.1563, "DT": 0.1599, "NB": 0.0780, "GraB": 0.1648
            },
            "Borderline-SMOTE": {
                "LR": 0.1248, "KNN": 0.0878, "RF": 0.1314, "SVM": 0.1510, "DT": 0.1459, "NB": 0.0701, "GraB": 0.1406
            },
            "SVM-SMOTE": {
                "LR": 0.1306, "KNN": 0.1382, "RF": 0.1646, "SVM": 0.1457, "DT": 0.1754, "NB": 0.0765, "GraB": 0.1616
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.6780, "KNN": 0.6420, "RF": 0.6616, "SVM": 0.6616, "DT": 0.6484, "NB": 0.6263, "GraB": 0.6692
            },
            "SMOTE": {
                "LR": 0.6780, "KNN": 0.6357, "RF": 0.6559, "SVM": 0.6610, "DT": 0.6585, "NB": 0.6212, "GraB": 0.6705
            },
            "SMOTETomek": {
                "LR": 0.6768, "KNN": 0.6420, "RF": 0.6547, "SVM": 0.6629, "DT": 0.6275, "NB": 0.6263, "GraB": 0.6629
            },
            "SMOTEENN": {
                "LR": 0.6711, "KNN": 0.6446, "RF": 0.6585, "SVM": 0.6616, "DT": 0.6458, "NB": 0.6515, "GraB": 0.6528
            },
            "SMOTEN": {
                "LR": 0.6793, "KNN": 0.6364, "RF": 0.6604, "SVM": 0.6622, "DT": 0.6503, "NB": 0.6237, "GraB": 0.6610
            },
            "KMeans-SMOTE": {
                "LR": 0.6749, "KNN": 0.6357, "RF": 0.6692, "SVM": 0.6585, "DT": 0.6496, "NB": 0.6301, "GraB": 0.6711
            },
            "Borderline-SMOTE": {
                "LR": 0.6736, "KNN": 0.6326, "RF": 0.6578, "SVM": 0.6559, "DT": 0.6667, "NB": 0.6149, "GraB": 0.6755
            },
            "SVM-SMOTE": {
                "LR": 0.6736, "KNN": 0.6332, "RF": 0.6572, "SVM": 0.6585, "DT": 0.6458, "NB": 0.6187, "GraB": 0.6585
            }
        }
    }
}

results = {}

for metric, methods in data_3["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    1.913650  1.865659  1.961641
KNN   1.506900  1.408058  1.605742
RF    1.943987  1.800832  2.087143
SVM   1.916187  1.843641  1.988734
DT    1.969062  1.769577  2.168548
NB    3.013788  2.815639  3.211936
GraB  1.988050  1.815706  2.160394

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR    0.203938  0.199827  0.208048
KNN   0.148413  0.126629  0.170196
RF    0.229025  0.190249  0.267801
SVM   0.212575  0.200488  0.224662
DT    0.212563  0.170480  0.254645
NB    0.175613  0.161249  0.189976
GraB  0.227238  0.188162  0.266313

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.209475  0.204366  0.214584
KNN   0.124650  0.091982  0.157318
RF    0.216925  0.181729  0.252121
SVM   0.221800  0.208488  0.235112
DT    0.204163  0.155672  0.252653
NB    0.240075  0.233747  0.246403
GraB  0.211375  0.169011  0.253739

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR    0.13

Equalized Discrimination Group Instrances Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data_4 = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 1.1236, "KNN": 1.5384, "RF": 1.1855, "SVM": 1.3839, "DT": 1.3106, "NB": 2.5487, "GraB": 1.0490
            },
            "SMOTE": {
                "LR": 1.1155, "KNN": 1.5066, "RF": 1.4068, "SVM": 1.5609, "DT": 1.0464, "NB": 2.4778, "GraB": 1.1012
            },
            "SMOTETomek": {
                "LR": 1.1236, "KNN": 1.4915, "RF": 1.4099, "SVM": 1.5508, "DT": 1.1334, "NB": 2.5387, "GraB": 1.1335
            },
            "SMOTEENN": {
                "LR": 1.9507, "KNN": 1.8816, "RF": 2.0180, "SVM": 1.7135, "DT": 2.3074, "NB": 2.2106, "GraB": 2.0359
            },
            "SMOTEN": {
                "LR": 1.0450, "KNN": 1.7603, "RF": 1.2654, "SVM": 1.3693, "DT": 1.1881, "NB": 2.5590, "GraB": 1.1430
            },
            "KMeans-SMOTE": {
                "LR": 1.5002, "KNN": 1.5240, "RF": 1.9233, "SVM": 1.8314, "DT": 1.8784, "NB": 2.5793, "GraB": 1.4694
            },
            "Borderline-SMOTE": {
                "LR": 1.0104, "KNN": 1.4881, "RF": 0.9737, "SVM": 1.2280, "DT": 1.0427, "NB": 2.6521, "GraB": 0.9949
            },
            "SVM-SMOTE": {
                "LR": 1.1373, "KNN": 1.5112, "RF": 1.0913, "SVM": 1.3964, "DT": 1.1130, "NB": 2.5066, "GraB": 1.0178
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.0476, "KNN": 0.1507, "RF": 0.0735, "SVM": 0.1180, "DT": 0.1035, "NB": 0.1554, "GraB": 0.0196
            },
            "SMOTE": {
                "LR": 0.0445, "KNN": 0.1385, "RF": 0.1323, "SVM": 0.1733, "DT": 0.0194, "NB": 0.1506, "GraB": 0.0400
            },
            "SMOTETomek": {
                "LR": 0.0476, "KNN": 0.1368, "RF": 0.1366, "SVM": 0.1702, "DT": 0.0501, "NB": 0.1569, "GraB": 0.0521
            },
            "SMOTEENN": {
                "LR": 0.2507, "KNN": 0.2183, "RF": 0.2883, "SVM": 0.2298, "DT": 0.3173, "NB": 0.1979, "GraB": 0.2850
            },
            "SMOTEN": {
                "LR": 0.0183, "KNN": 0.2030, "RF": 0.0975, "SVM": 0.1362, "DT": 0.0682, "NB": 0.1589, "GraB": 0.0553
            },
            "KMeans-SMOTE": {
                "LR": 0.1384, "KNN": 0.1620, "RF": 0.2346, "SVM": 0.2058, "DT": 0.2004, "NB": 0.1610, "GraB": 0.1458
            },
            "Borderline-SMOTE": {
                "LR": 0.0042, "KNN": 0.1453, "RF": -0.0116, "SVM": 0.0871, "DT": 0.0162, "NB": 0.1470, "GraB": -0.0022
            },
            "SVM-SMOTE": {
                "LR": 0.0520, "KNN": 0.1621, "RF": 0.0377, "SVM": 0.1193, "DT": 0.0442, "NB": 0.1487, "GraB": 0.0070
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.0461, "KNN": 0.1249, "RF": 0.0521, "SVM": 0.1264, "DT": 0.0772, "NB": 0.2052, "GraB": -0.0028
            },
            "SMOTE": {
                "LR": 0.0461, "KNN": 0.1354, "RF": 0.1201, "SVM": 0.1403, "DT": 0.0196, "NB": 0.1970, "GraB": 0.0420
            },
            "SMOTETomek": {
                "LR": 0.0461, "KNN": 0.1313, "RF": 0.1303, "SVM": 0.1363, "DT": 0.0690, "NB": 0.2092, "GraB": 0.0440
            },
            "SMOTEENN": {
                "LR": 0.2449, "KNN": 0.2311, "RF": 0.2670, "SVM": 0.2055, "DT": 0.3022, "NB": 0.2180, "GraB": 0.2734
            },
            "SMOTEN": {
                "LR": 0.0153, "KNN": 0.1822, "RF": 0.0974, "SVM": 0.1235, "DT": 0.0732, "NB": 0.2113, "GraB": 0.0421
            },
            "KMeans-SMOTE": {
                "LR": 0.1286, "KNN": 0.1449, "RF": 0.2328, "SVM": 0.2086, "DT": 0.1948, "NB": 0.2154, "GraB": 0.1282
            },
            "Borderline-SMOTE": {
                "LR": 0.0031, "KNN": 0.1043, "RF": -0.0174, "SVM": 0.0787, "DT": 0.0221, "NB": 0.1994, "GraB": 0.0029
            },
            "SVM-SMOTE": {
                "LR": 0.0543, "KNN": 0.1141, "RF": 0.0276, "SVM": 0.1224, "DT": 0.0443, "NB": 0.1910, "GraB": 0.0117
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": -0.0173, "KNN": 0.1198, "RF": 0.0306, "SVM": 0.0478, "DT": 0.0650, "NB": 0.0754, "GraB": -0.0212
            },
            "SMOTE": {
                "LR": -0.0236, "KNN": 0.0928, "RF": 0.0840, "SVM": 0.1356, "DT": -0.0363, "NB": 0.0732, "GraB": -0.0248
            },
            "SMOTETomek": {
                "LR": -0.0173, "KNN": 0.0938, "RF": 0.0839, "SVM": 0.1335, "DT": -0.0222, "NB": 0.0732, "GraB": -0.0057
            },
            "SMOTEENN": {
                "LR": 0.1960, "KNN": 0.1522, "RF": 0.2490, "SVM": 0.1927, "DT": 0.2768, "NB": 0.1283, "GraB": 0.2422
            },
            "SMOTEN": {
                "LR": -0.0449, "KNN": 0.1746, "RF": 0.0402, "SVM": 0.0816, "DT": 0.0101, "NB": 0.0754, "GraB": 0.0048
            },
            "KMeans-SMOTE": {
                "LR": 0.0799, "KNN": 0.1237, "RF": 0.1750, "SVM": 0.1455, "DT": 0.1540, "NB": 0.0769, "GraB": 0.0959
            },
            "Borderline-SMOTE": {
                "LR": -0.0618, "KNN": 0.1291, "RF": -0.0650, "SVM": 0.0312, "DT": -0.0454, "NB": 0.0696, "GraB": -0.0686
            },
            "SVM-SMOTE": {
                "LR": -0.0162, "KNN": 0.1495, "RF": -0.0140, "SVM": 0.0537, "DT": -0.0102, "NB": 0.0759, "GraB": -0.0570
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.6660, "KNN": 0.6389, "RF": 0.6503, "SVM": 0.6667, "DT": 0.6534, "NB": 0.6288, "GraB": 0.6458
            },
            "SMOTE": {
                "LR": 0.6679, "KNN": 0.6326, "RF": 0.6515, "SVM": 0.6667, "DT": 0.6383, "NB": 0.6288, "GraB": 0.6585
            },
            "SMOTETomek": {
                "LR": 0.6660, "KNN": 0.6301, "RF": 0.6509, "SVM": 0.6660, "DT": 0.6465, "NB": 0.6326, "GraB": 0.6610
            },
            "SMOTEENN": {
                "LR": 0.6610, "KNN": 0.6540, "RF": 0.6515, "SVM": 0.6484, "DT": 0.6465, "NB": 0.6547, "GraB": 0.6427
            },
            "SMOTEN": {
                "LR": 0.6629, "KNN": 0.6250, "RF": 0.6471, "SVM": 0.6648, "DT": 0.6395, "NB": 0.6326, "GraB": 0.6534
            },
            "KMeans-SMOTE": {
                "LR": 0.6749, "KNN": 0.6383, "RF": 0.6654, "SVM": 0.6591, "DT": 0.6427, "NB": 0.6301, "GraB": 0.6667
            },
            "Borderline-SMOTE": {
                "LR": 0.6660, "KNN": 0.6307, "RF": 0.6420, "SVM": 0.6578, "DT": 0.6446, "NB": 0.6187, "GraB": 0.6528
            },
            "SVM-SMOTE": {
                "LR": 0.6673, "KNN": 0.6338, "RF": 0.6484, "SVM": 0.6660, "DT": 0.6376, "NB": 0.6256, "GraB": 0.6515
            }
        }
    }
}

results = {}

for metric, methods in data_4["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    1.250787  0.983456  1.518119
KNN   1.587713  1.463606  1.711819
RF    1.409238  1.093823  1.724652
SVM   1.504275  1.338020  1.670530
DT    1.377500  0.989566  1.765434
NB    2.509100  2.399457  2.618743
GraB  1.243087  0.948025  1.538150

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR    0.075412  0.007584  0.143241
KNN   0.164587  0.139326  0.189849
RF    0.123613  0.040886  0.206339
SVM   0.154963  0.114560  0.195365
DT    0.102413  0.014503  0.190322
NB    0.159550  0.145949  0.173151
GraB  0.075325 -0.005175  0.155825

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.073062  0.007252  0.138873
KNN   0.146025  0.111290  0.180760
RF    0.113738  0.032153  0.195322
SVM   0.142713  0.106031  0.179394
DT    0.100300  0.018080  0.182520
NB    0.205813  0.197941  0.213684
GraB  0.067687 -0.009813  0.145188

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR    0.01