<a href="https://colab.research.google.com/github/mmfara/RisCanvi/blob/main/Confidence_Interval_for_RisCanvi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook calculates the mean and 95% confidence interval for each oversampling strategy, fairness metric, and model across various sampling techniques.

Standard Oversampling Strategy

In [None]:
def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

# Example usage
data_1r = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 0.5813, "KNN": 0.7019, "RF": 0.5237, "SVM": 0.6022, "DT": 0.6498, "NB": 0.6601, "GraB": 0.5199
            },
            "SMOTE": {
                "LR": 0.4104, "KNN": 0.7058, "RF": 0.4461, "SVM": 0.4337, "DT": 0.4401, "NB": 0.6444, "GraB": 0.4576
            },
            "SMOTETomek": {
                "LR": 0.4126, "KNN": 0.7058, "RF": 0.5035, "SVM": 0.4337, "DT": 0.5827, "NB": 0.6444, "GraB": 0.5064
            },
            "SMOTEENN": {
                "LR": 0.5893, "KNN": 0.8569, "RF": 0.7970, "SVM": 0.7340, "DT": 0.8506, "NB": 0.6548, "GraB": 0.7827
            },
            "SMOTEN": {
                "LR": 0.5047, "KNN": 0.5979, "RF": 0.3537, "SVM": 0.5845, "DT": 0.4352, "NB": 0.6221, "GraB": 0.5668
            },
            "KMeans-SMOTE": {
                "LR": 0.6269, "KNN": 0.4285, "RF": 0.7736, "SVM": 0.7957, "DT": 0.6855, "NB": 0.6070, "GraB": 0.7427
            },
            "Borderline-SMOTE": {
                "LR": 0.3639, "KNN": 0.7099, "RF": 0.4482, "SVM": 0.5339, "DT": 0.5724, "NB": 0.6515, "GraB": 0.4482
            },
            "SVM-SMOTE": {
                "LR": 0.4440, "KNN": 0.6660, "RF": 0.5546, "SVM": 0.7043, "DT": 0.4705, "NB": 0.5958, "GraB": 0.5516
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": -0.1609, "KNN": -0.0439, "RF": -0.0667, "SVM": -0.0793, "DT": -0.0629, "NB": -0.1098, "GraB": -0.0862
            },
            "SMOTE": {
                "LR": -0.2011, "KNN": -0.0639, "RF": -0.1531, "SVM": -0.1393, "DT": -0.1357, "NB": -0.1085, "GraB": -0.1383
            },
            "SMOTETomek": {
                "LR": -0.1993, "KNN": -0.0639, "RF": -0.1052, "SVM": -0.1393, "DT": -0.1026, "NB": -0.1085, "GraB": -0.0975
            },
            "SMOTEENN": {
                "LR": -0.1696, "KNN": -0.0468, "RF": -0.0773, "SVM": -0.1027, "DT": -0.0545, "NB": -0.1283, "GraB": -0.0796
            },
            "SMOTEN": {
                "LR": -0.1832, "KNN": -0.0852, "RF": -0.1462, "SVM": -0.1208, "DT": -0.1947, "NB": -0.1316, "GraB": -0.1478
            },
            "KMeans-SMOTE": {
                "LR": -0.0516, "KNN": -0.0667, "RF": -0.0098, "SVM": -0.0077, "DT": -0.0367, "NB": -0.0734, "GraB": -0.0277
            },
            "Borderline-SMOTE": {
                "LR": -0.2273, "KNN": -0.0531, "RF": -0.1149, "SVM": -0.1280, "DT": -0.1021, "NB": -0.1070, "GraB": -0.1724
            },
            "SVM-SMOTE": {
                "LR": -0.1378, "KNN": -0.0552, "RF": -0.0616, "SVM": -0.0462, "DT": -0.1388, "NB": -0.0972, "GraB": -0.1111
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": -0.2447, "KNN": -0.1383, "RF": -0.1277, "SVM": -0.1702, "DT": -0.0532, "NB": -0.2447, "GraB": -0.1383
            },
            "SMOTE": {
                "LR": -0.3298, "KNN": -0.0851, "RF": -0.3085, "SVM": -0.2128, "DT": -0.2447, "NB": -0.2128, "GraB": -0.3298
            },
            "SMOTETomek": {
                "LR": -0.3191, "KNN": -0.0851, "RF": -0.1915, "SVM": -0.2128, "DT": -0.1277, "NB": -0.2128, "GraB": -0.2128
            },
            "SMOTEENN": {
                "LR": -0.2447, "KNN": -0.0213, "RF": -0.0319, "SVM": -0.1277, "DT": -0.0319, "NB": -0.2553, "GraB": -0.0532
            },
            "SMOTEN": {
                "LR": -0.2660, "KNN": -0.1170, "RF": -0.3085, "SVM": -0.1809, "DT": -0.1915, "NB": -0.1596, "GraB": -0.2660
            },
            "KMeans-SMOTE": {
                "LR": -0.1596, "KNN": -0.1809, "RF": -0.0851, "SVM": -0.0532, "DT": -0.0426, "NB": -0.2128, "GraB": -0.1596
            },
            "Borderline-SMOTE": {
                "LR": -0.3617, "KNN": -0.0957, "RF": -0.2340, "SVM": -0.2128, "DT": -0.2340, "NB": -0.2128, "GraB": -0.2660
            },
            "SVM-SMOTE": {
                "LR": -0.2447, "KNN": -0.0532, "RF": -0.1383, "SVM": -0.1064, "DT": -0.1489, "NB": -0.1277, "GraB": -0.1809
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": -0.1396, "KNN": -0.0232, "RF": -0.0530, "SVM": -0.0592, "DT": -0.0628, "NB": -0.0803, "GraB": -0.0739
            },
            "SMOTE": {
                "LR": -0.1723, "KNN": -0.0567, "RF": -0.1186, "SVM": -0.1218, "DT": -0.1121, "NB": -0.0849, "GraB": -0.0981
            },
            "SMOTETomek": {
                "LR": -0.1723, "KNN": -0.0567, "RF": -0.0851, "SVM": -0.1218, "DT": -0.0945, "NB": -0.0849, "GraB": -0.0729
            },
            "SMOTEENN": {
                "LR": -0.1505, "KNN": -0.0476, "RF": -0.0814, "SVM": -0.0933, "DT": -0.0545, "NB": -0.1001, "GraB": -0.0796
            },
            "SMOTEN": {
                "LR": -0.1633, "KNN": -0.0765, "RF": -0.1117, "SVM": -0.1065, "DT": -0.1924, "NB": -0.1227, "GraB": -0.1209
            },
            "KMeans-SMOTE": {
                "LR": -0.0278, "KNN": -0.0425, "RF": 0.0064, "SVM": 0.0021, "DT": -0.0343, "NB": -0.0437, "GraB": -0.0001
            },
            "Borderline-SMOTE": {
                "LR": -0.1974, "KNN": -0.0419, "RF": -0.0890, "SVM": -0.1078, "DT": -0.0744, "NB": -0.0831, "GraB": -0.1499
            },
            "SVM-SMOTE": {
                "LR": -0.1135, "KNN": -0.0527, "RF": -0.0436, "SVM": -0.0311, "DT": -0.1337, "NB": -0.0888, "GraB": -0.0930
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.7153, "KNN": 0.7970, "RF": 0.7958, "SVM": 0.7713, "DT": 0.7666, "NB": 0.7165, "GraB": 0.7795
            },
            "SMOTE": {
                "LR": 0.7351, "KNN": 0.7643, "RF": 0.7783, "SVM": 0.7690, "DT": 0.7573, "NB": 0.7270, "GraB": 0.7643
            },
            "SMOTETomek": {
                "LR": 0.7340, "KNN": 0.7643, "RF": 0.7865, "SVM": 0.7690, "DT": 0.7585, "NB": 0.7270, "GraB": 0.7748
            },
            "SMOTEENN": {
                "LR": 0.6896, "KNN": 0.7048, "RF": 0.6849, "SVM": 0.6884, "DT": 0.6861, "NB": 0.6908, "GraB": 0.7048
            },
            "SMOTEN": {
                "LR": 0.7071, "KNN": 0.7631, "RF": 0.7841, "SVM": 0.7246, "DT": 0.7060, "NB": 0.7013, "GraB": 0.7235
            },
            "KMeans-SMOTE": {
                "LR": 0.8133, "KNN": 0.8168, "RF": 0.8425, "SVM": 0.8331, "DT": 0.7900, "NB": 0.7841, "GraB": 0.8145
            },
            "Borderline-SMOTE": {
                "LR": 0.7281, "KNN": 0.7830, "RF": 0.7818, "SVM": 0.7573, "DT": 0.7468, "NB": 0.7246, "GraB": 0.7538
            },
            "SVM-SMOTE": {
                "LR": 0.7736, "KNN": 0.7923, "RF": 0.8191, "SVM": 0.8098, "DT": 0.7526, "NB": 0.7480, "GraB": 0.7853
            }
        }
    }
}

results = {}

for metric, methods in data_1r["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    0.491638  0.409556  0.573719
KNN   0.671588  0.569871  0.773304
RF    0.550050  0.418403  0.681697
SVM   0.602750  0.490143  0.715357
DT    0.585850  0.466967  0.704733
NB    0.635012  0.615133  0.654892
GraB  0.571987  0.467469  0.676506

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR   -0.166350 -0.211388 -0.121312
KNN  -0.059838 -0.070848 -0.048827
RF   -0.091850 -0.131702 -0.051998
SVM  -0.095413 -0.135365 -0.055460
DT   -0.103500 -0.147187 -0.059813
NB   -0.108037 -0.123151 -0.092924
GraB -0.107575 -0.145622 -0.069528

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR   -0.271287 -0.324639 -0.217936
KNN  -0.097075 -0.138370 -0.055780
RF   -0.178188 -0.262657 -0.093718
SVM  -0.159600 -0.208782 -0.110418
DT   -0.134313 -0.205762 -0.062863
NB   -0.204813 -0.239979 -0.169646
GraB -0.200825 -0.273620 -0.128030

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR   -0.14

Oversampling Based on Sensitive Attributes Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data_2r = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 0.7683, "KNN": 0.7427, "RF": 0.4886, "SVM": 0.9283, "DT": 0.5673, "NB": 0.6325, "GraB": 0.7014
            },
            "SMOTE": {
                "LR": 0.2901, "KNN": 1.1140, "RF": 0.9283, "SVM": 0.9283, "DT": 0.8212, "NB": 0.6119, "GraB": 0.4791
            },
            "SMOTETomek": {
                "LR": 0.3094, "KNN": 1.1346, "RF": 0.4126, "SVM": 0.9283, "DT": 0.9465, "NB": 0.6119, "GraB": 0.4220
            },
            "SMOTEENN": {
                "LR": 0.4642, "KNN": 3.9361, "RF": 3.3007, "SVM": 0.0000, "DT": 0.7884, "NB": 0.6397, "GraB": 0.7166
            },
            "SMOTEN": {
                "LR": 0.4642, "KNN": 0.7073, "RF": 0.6631, "SVM": 1.2378, "DT": 0.7158, "NB": 0.6873, "GraB": 0.2321
            },
            "KMeans-SMOTE": {
                "LR": 0.3537, "KNN": 0.6840, "RF": 0.7427, "SVM": 1.1140, "DT": 0.8737, "NB": 0.6223, "GraB": 0.6382
            },
            "Borderline-SMOTE": {
                "LR": 0.4814, "KNN": 0.5157, "RF": 0.7736, "SVM": 0.0000, "DT": 0.7707, "NB": 0.6292, "GraB": 0.4126
            },
            "SVM-SMOTE": {
                "LR": 0.3868, "KNN": 0.4528, "RF": 0.7645, "SVM": 0.9283, "DT": 0.6007, "NB": 0.6189, "GraB": 0.6189
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": -0.0121, "KNN": -0.0139, "RF": -0.0174, "SVM": -0.0067, "DT": -0.0559, "NB": -0.1201, "GraB": -0.0241
            },
            "SMOTE": {
                "LR": -0.0408, "KNN": 0.0041, "RF": -0.0008, "SVM": -0.0003, "DT": -0.0167, "NB": -0.1226, "GraB": -0.0290
            },
            "SMOTETomek": {
                "LR": -0.0223, "KNN": 0.0044, "RF": -0.0095, "SVM": -0.0003, "DT": -0.0049, "NB": -0.1226, "GraB": -0.0228
            },
            "SMOTEENN": {
                "LR": -0.0154, "KNN": 0.1318, "RF": 0.0372, "SVM": -0.0018, "DT": -0.0277, "NB": -0.0770, "GraB": -0.0290
            },
            "SMOTEN": {
                "LR": -0.0231, "KNN": -0.0110, "RF": -0.0085, "SVM": 0.0013, "DT": -0.0423, "NB": -0.1016, "GraB": -0.0551
            },
            "KMeans-SMOTE": {
                "LR": -0.0244, "KNN": -0.0108, "RF": -0.0069, "SVM": 0.0010, "DT": -0.0116, "NB": -0.1214, "GraB": -0.0208
            },
            "Borderline-SMOTE": {
                "LR": -0.0251, "KNN": -0.0156, "RF": -0.0049, "SVM": -0.0054, "DT": -0.0218, "NB": -0.1198, "GraB": -0.0190
            },
            "SVM-SMOTE": {
                "LR": -0.0264, "KNN": -0.0403, "RF": -0.0072, "SVM": -0.0005, "DT": -0.0487, "NB": -0.1149, "GraB": -0.0226
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": -0.0957, "KNN": -0.0957, "RF": -0.1170, "SVM": 0.0106, "DT": -0.0745, "NB": -0.2234, "GraB": -0.0213
            },
            "SMOTE": {
                "LR": -0.1064, "KNN": -0.0532, "RF": -0.0426, "SVM": -0.0106, "DT": -0.0319, "NB": -0.2553, "GraB": -0.1277
            },
            "SMOTETomek": {
                "LR": -0.0851, "KNN": -0.0745, "RF": -0.0638, "SVM": -0.0106, "DT": -0.0106, "NB": -0.2553, "GraB": -0.1170
            },
            "SMOTEENN": {
                "LR": -0.0638, "KNN": 0.1064, "RF": 0.0319, "SVM": 0.0000, "DT": -0.1596, "NB": -0.1383, "GraB": -0.0851
            },
            "SMOTEN": {
                "LR": -0.0638, "KNN": -0.1064, "RF": -0.0532, "SVM": -0.0213, "DT": -0.0957, "NB": -0.2234, "GraB": -0.1489
            },
            "KMeans-SMOTE": {
                "LR": -0.1064, "KNN": -0.1170, "RF": -0.0851, "SVM": -0.0426, "DT": -0.0426, "NB": -0.2340, "GraB": -0.1383
            },
            "Borderline-SMOTE": {
                "LR": -0.0745, "KNN": -0.1064, "RF": -0.0851, "SVM": -0.0213, "DT": -0.0426, "NB": -0.2234, "GraB": -0.0957
            },
            "SVM-SMOTE": {
                "LR": -0.0957, "KNN": -0.1489, "RF": -0.1277, "SVM": -0.0319, "DT": -0.0957, "NB": -0.2234, "GraB": -0.0957
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.0053, "KNN": 0.0032, "RF": 0.0025, "SVM": -0.0077, "DT": -0.0505, "NB": -0.0964, "GraB": -0.0231
            },
            "SMOTE": {
                "LR": -0.0267, "KNN": 0.0158, "RF": 0.0075, "SVM": 0.0018, "DT": -0.0124, "NB": -0.0935, "GraB": -0.0090
            },
            "SMOTETomek": {
                "LR": -0.0094, "KNN": 0.0201, "RF": 0.0014, "SVM": 0.0018, "DT": -0.0027, "NB": -0.0935, "GraB": -0.0040
            },
            "SMOTEENN": {
                "LR": -0.0054, "KNN": 0.1372, "RF": 0.0388, "SVM": -0.0022, "DT": -0.0005, "NB": -0.0621, "GraB": -0.0163
            },
            "SMOTEN": {
                "LR": -0.0141, "KNN": 0.0082, "RF": 0.0007, "SVM": 0.0057, "DT": -0.0297, "NB": -0.0745, "GraB": -0.0360
            },
            "KMeans-SMOTE": {
                "LR": -0.0076, "KNN": 0.0104, "RF": 0.0090, "SVM": 0.0097, "DT": -0.0033, "NB": -0.0961, "GraB": 0.0032
            },
            "Borderline-SMOTE": {
                "LR": -0.0144, "KNN": 0.0025, "RF": 0.0115, "SVM": -0.0022, "DT": -0.0163, "NB": -0.0961, "GraB": -0.0036
            },
            "SVM-SMOTE": {
                "LR": -0.0119, "KNN": -0.0184, "RF": 0.0169, "SVM": 0.0057, "DT": -0.0386, "NB": -0.0903, "GraB": -0.0069
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.8296, "KNN": 0.8285, "RF": 0.8331, "SVM": 0.8191, "DT": 0.7981, "NB": 0.7188, "GraB": 0.8168
            },
            "SMOTE": {
                "LR": 0.8366, "KNN": 0.8238, "RF": 0.8343, "SVM": 0.8343, "DT": 0.8040, "NB": 0.7235, "GraB": 0.8250
            },
            "SMOTETomek": {
                "LR": 0.8366, "KNN": 0.8250, "RF": 0.8366, "SVM": 0.8343, "DT": 0.7970, "NB": 0.7235, "GraB": 0.8296
            },
            "SMOTEENN": {
                "LR": 0.8331, "KNN": 0.7841, "RF": 0.8273, "SVM": 0.8343, "DT": 0.7911, "NB": 0.7701, "GraB": 0.8110
            },
            "SMOTEN": {
                "LR": 0.8355, "KNN": 0.8320, "RF": 0.8320, "SVM": 0.8343, "DT": 0.7923, "NB": 0.7141, "GraB": 0.8226
            },
            "KMeans-SMOTE": {
                "LR": 0.8366, "KNN": 0.8308, "RF": 0.8366, "SVM": 0.8355, "DT": 0.8203, "NB": 0.7200, "GraB": 0.8296
            },
            "Borderline-SMOTE": {
                "LR": 0.8331, "KNN": 0.8320, "RF": 0.8413, "SVM": 0.8366, "DT": 0.8063, "NB": 0.7223, "GraB": 0.8308
            },
            "SVM-SMOTE": {
                "LR": 0.8366, "KNN": 0.8156, "RF": 0.8355, "SVM": 0.8355, "DT": 0.7865, "NB": 0.7351, "GraB": 0.8331
            }
        }
    }
}

results = {}

for metric, methods in data_2r["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    0.439762  0.313183  0.566342
KNN   1.160900  0.200885  2.120915
RF    1.009263  0.223056  1.795469
SVM   0.758125  0.355682  1.160568
DT    0.760537  0.652529  0.868546
NB    0.631713  0.611238  0.652187
GraB  0.527613  0.386107  0.669118

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR   -0.023700 -0.030821 -0.016579
KNN   0.006088 -0.037935  0.050110
RF   -0.002250 -0.016142  0.011642
SVM  -0.001588 -0.004040  0.000865
DT   -0.028700 -0.044103 -0.013297
NB   -0.112500 -0.125828 -0.099172
GraB -0.027800 -0.037490 -0.018110

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR   -0.086425 -0.101035 -0.071815
KNN  -0.074463 -0.139990 -0.008935
RF   -0.067825 -0.109520 -0.026130
SVM  -0.015962 -0.030229 -0.001696
DT   -0.069150 -0.108925 -0.029375
NB   -0.222062 -0.252658 -0.191467
GraB -0.103712 -0.137238 -0.070187

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR   -0.01

Oversampling Based on Sensitive Attributes Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

data_3r = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 4.0144, "KNN": 1.1321, "RF": 2.1219, "SVM": 1.4616, "DT": 0.7871, "NB": 0.8707, "GraB": 1.1936
            },
            "SMOTE": {
                "LR": 2.8734, "KNN": 1.4282, "RF": 2.5025, "SVM": 1.5914, "DT": 0.9283, "NB": 0.8538, "GraB": 2.3307
            },
            "SMOTETomek": {
                "LR": 2.8734, "KNN": 1.4282, "RF": 3.4813, "SVM": 1.5914, "DT": 0.9082, "NB": 0.8538, "GraB": 1.8567
            },
            "SMOTEENN": {
                "LR": 3.5997, "KNN": 2.5861, "RF": 9.1405, "SVM": 1.8998, "DT": 2.4068, "NB": 0.8880, "GraB": 5.8291
            },
            "SMOTEN": {
                "LR": 4.5621, "KNN": 1.3368, "RF": 2.1661, "SVM": 2.3630, "DT": 1.8962, "NB": 0.9921, "GraB": 1.0564
            },
            "KMeans-SMOTE": {
                "LR": 1.0884, "KNN": 0.9032, "RF": 1.2014, "SVM": 1.2378, "DT": 0.7265, "NB": 0.8469, "GraB": 1.8567
            },
            "Borderline-SMOTE": {
                "LR": 2.7043, "KNN": 1.3470, "RF": 5.2219, "SVM": 2.0072, "DT": 2.4313, "NB": 0.8868, "GraB": 1.8567
            },
            "SVM-SMOTE": {
                "LR": 1.7759, "KNN": 1.1991, "RF": 2.9913, "SVM": 1.5266, "DT": 1.3217, "NB": 0.9662, "GraB": 0.8974
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.2002, "KNN": 0.0097, "RF": 0.0282, "SVM": 0.0390, "DT": -0.0352, "NB": -0.0337, "GraB": 0.0195
            },
            "SMOTE": {
                "LR": 0.1413, "KNN": 0.0400, "RF": 0.0620, "SVM": 0.0372, "DT": -0.0118, "NB": -0.0360, "GraB": 0.1123
            },
            "SMOTETomek": {
                "LR": 0.1413, "KNN": 0.0400, "RF": 0.0713, "SVM": 0.0372, "DT": -0.0152, "NB": -0.0360, "GraB": 0.0861
            },
            "SMOTEENN": {
                "LR": 0.2287, "KNN": 0.1595, "RF": 0.3800, "SVM": 0.0695, "DT": 0.2046, "NB": -0.0278, "GraB": 0.3728
            },
            "SMOTEN": {
                "LR": 0.2238, "KNN": 0.0302, "RF": 0.0377, "SVM": 0.0808, "DT": 0.0756, "NB": -0.0019, "GraB": 0.0059
            },
            "KMeans-SMOTE": {
                "LR": 0.0092, "KNN": -0.0064, "RF": 0.0061, "SVM": 0.0102, "DT": -0.0226, "NB": -0.0313, "GraB": 0.0523
            },
            "Borderline-SMOTE": {
                "LR": 0.1407, "KNN": 0.0318, "RF": 0.1213, "SVM": 0.0669, "DT": 0.1079, "NB": -0.0272, "GraB": 0.0769
            },
            "SVM-SMOTE": {
                "LR": 0.0641, "KNN": 0.0172, "RF": 0.0644, "SVM": 0.0425, "DT": 0.0341, "NB": -0.0059, "GraB": -0.0111
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.3404, "KNN": -0.0638, "RF": -0.0213, "SVM": 0.0106, "DT": -0.0532, "NB": -0.1596, "GraB": 0.0106
            },
            "SMOTE": {
                "LR": 0.2021, "KNN": 0.0213, "RF": 0.1277, "SVM": 0.0638, "DT": -0.0745, "NB": -0.1064, "GraB": 0.1277
            },
            "SMOTETomek": {
                "LR": 0.2021, "KNN": 0.0213, "RF": 0.1170, "SVM": 0.0638, "DT": -0.0957, "NB": -0.1064, "GraB": 0.0106
            },
            "SMOTEENN": {
                "LR": 0.3511, "KNN": 0.0745, "RF": 0.4468, "SVM": 0.1383, "DT": 0.0638, "NB": -0.0851, "GraB": 0.4574
            },
            "SMOTEN": {
                "LR": 0.3936, "KNN": -0.0638, "RF": 0.0106, "SVM": 0.1277, "DT": 0.0638, "NB": 0.0106, "GraB": 0.0319
            },
            "KMeans-SMOTE": {
                "LR": -0.0638, "KNN": -0.1277, "RF": -0.0745, "SVM": -0.0426, "DT": -0.0213, "NB": -0.1064, "GraB": 0.0851
            },
            "Borderline-SMOTE": {
                "LR": 0.2660, "KNN": 0.0000, "RF": 0.0957, "SVM": 0.1702, "DT": 0.2128, "NB": -0.1277, "GraB": 0.1064
            },
            "SVM-SMOTE": {
                "LR": 0.0957, "KNN": -0.0851, "RF": 0.0426, "SVM": 0.0745, "DT": 0.0319, "NB": -0.0745, "GraB": -0.0213
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.1764, "KNN": 0.0258, "RF": 0.0384, "SVM": 0.0466, "DT": -0.0301, "NB": -0.0061, "GraB": 0.0232
            },
            "SMOTE": {
                "LR": 0.1321, "KNN": 0.0462, "RF": 0.0510, "SVM": 0.0341, "DT": 0.0015, "NB": -0.0190, "GraB": 0.1116
            },
            "SMOTETomek": {
                "LR": 0.1321, "KNN": 0.0462, "RF": 0.0636, "SVM": 0.0341, "DT": 0.0015, "NB": -0.0190, "GraB": 0.1026
            },
            "SMOTEENN": {
                "LR": 0.2083, "KNN": 0.1781, "RF": 0.3693, "SVM": 0.0588, "DT": 0.2330, "NB": -0.0133, "GraB": 0.3585
            },
            "SMOTEN": {
                "LR": 0.1940, "KNN": 0.0506, "RF": 0.0438, "SVM": 0.0736, "DT": 0.0793, "NB": -0.0003, "GraB": 0.0031
            },
            "KMeans-SMOTE": {
                "LR": 0.0257, "KNN": 0.0186, "RF": 0.0226, "SVM": 0.0219, "DT": -0.0217, "NB": -0.0132, "GraB": 0.0477
            },
            "Borderline-SMOTE": {
                "LR": 0.1196, "KNN": 0.0405, "RF": 0.1279, "SVM": 0.0495, "DT": 0.0890, "NB": -0.0043, "GraB": 0.0735
            },
            "SVM-SMOTE": {
                "LR": 0.0603, "KNN": 0.0391, "RF": 0.0700, "SVM": 0.0391, "DT": 0.0358, "NB": 0.0105, "GraB": -0.0073
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.7993, "KNN": 0.8215, "RF": 0.8261, "SVM": 0.8121, "DT": 0.7643, "NB": 0.7340, "GraB": 0.8028
            },
            "SMOTE": {
                "LR": 0.7993, "KNN": 0.8145, "RF": 0.8215, "SVM": 0.8226, "DT": 0.7538, "NB": 0.7445, "GraB": 0.7958
            },
            "SMOTETomek": {
                "LR": 0.7993, "KNN": 0.8145, "RF": 0.8191, "SVM": 0.8226, "DT": 0.7526, "NB": 0.7445, "GraB": 0.7865
            },
            "SMOTEENN": {
                "LR": 0.7795, "KNN": 0.7748, "RF": 0.7538, "SVM": 0.8156, "DT": 0.7165, "NB": 0.7421, "GraB": 0.7305
            },
            "SMOTEN": {
                "LR": 0.7900, "KNN": 0.8121, "RF": 0.8226, "SVM": 0.8110, "DT": 0.7876, "NB": 0.7526, "GraB": 0.8063
            },
            "KMeans-SMOTE": {
                "LR": 0.8121, "KNN": 0.8273, "RF": 0.8331, "SVM": 0.8331, "DT": 0.8075, "NB": 0.7771, "GraB": 0.8145
            },
            "Borderline-SMOTE": {
                "LR": 0.7993, "KNN": 0.8168, "RF": 0.8203, "SVM": 0.8203, "DT": 0.7806, "NB": 0.7515, "GraB": 0.8005
            },
            "SVM-SMOTE": {
                "LR": 0.8075, "KNN": 0.8180, "RF": 0.8273, "SVM": 0.8215, "DT": 0.7806, "NB": 0.7830, "GraB": 0.7993
            }
        }
    }
}

results = {}

for metric, methods in data_3r["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    2.936450  1.984884  3.888016
KNN   1.420087  0.999584  1.840591
RF    3.603362  1.485763  5.720962
SVM   1.709850  1.410545  2.009155
DT    1.425763  0.824734  2.026791
NB    0.894787  0.849092  0.940483
GraB  2.109663  0.788056  3.431269

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR    0.143662  0.079462  0.207863
KNN   0.040250 -0.002184  0.082684
RF    0.096375 -0.003617  0.196367
SVM   0.047912  0.028774  0.067051
DT    0.042175 -0.027250  0.111600
NB   -0.024975 -0.036229 -0.013721
GraB  0.089338 -0.012732  0.191407

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.223400  0.096817  0.349983
KNN  -0.027912 -0.084363  0.028538
RF    0.093075 -0.040020  0.226170
SVM   0.075788  0.017462  0.134113
DT    0.015950 -0.067890  0.099790
NB   -0.094438 -0.135998 -0.052877
GraB  0.101050 -0.026951  0.229051

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR    0.13

Equalized Discrimination Group Instrances Strategy

In [None]:
import numpy as np
import scipy.stats as stats
import pandas as pd

def compute_mean_ci(data):
    df = pd.DataFrame(data)
    mean_values = df.mean(axis=1)
    std_values = df.std(axis=1, ddof=1)
    n = df.shape[1]  # number of samples
    confidence_level = 0.95
    t_score = stats.t.ppf((1 + confidence_level) / 2, n - 1)
    margin_of_error = t_score * (std_values / np.sqrt(n))
    lower_bound = mean_values - margin_of_error
    upper_bound = mean_values + margin_of_error

    return pd.DataFrame({
        "Mean": mean_values,
        "Lower CI": lower_bound,
        "Upper CI": upper_bound
    })

# Example usage
data_4r = {
    "Metrics": {
        "Disparate Impact": {
            "RandomOverSampler": {
                "LR": 1.0488, "KNN": 0.3961, "RF": 1.0403, "SVM": 1.1313, "DT": 1.0056, "NB": 0.7034, "GraB": 1.1845
            },
            "SMOTE": {
                "LR": 0.9162, "KNN": 0.5010, "RF": 0.5298, "SVM": 0.9760, "DT": 0.5553, "NB": 0.6697, "GraB": 0.6711
            },
            "SMOTETomek": {
                "LR": 0.9162, "KNN": 0.4879, "RF": 0.5657, "SVM": 0.9428, "DT": 0.6142, "NB": 0.6734, "GraB": 0.8328
            },
            "SMOTEENN": {
                "LR": 0.7512, "KNN": 0.6363, "RF": 0.7460, "SVM": 0.6308, "DT": 0.7887, "NB": 0.6791, "GraB": 0.7576
            },
            "SMOTEN": {
                "LR": 1.1091, "KNN": 0.5842, "RF": 0.7775, "SVM": 0.8198, "DT": 0.7587, "NB": 0.8878, "GraB": 0.9265
            },
            "KMeans-SMOTE": {
                "LR": 0.6877, "KNN": 0.2694, "RF": 0.5215, "SVM": 0.5318, "DT": 0.5628, "NB": 0.6285, "GraB": 0.4714
            },
            "Borderline-SMOTE": {
                "LR": 0.9366, "KNN": 0.4537, "RF": 0.7856, "SVM": 1.0285, "DT": 0.8659, "NB": 0.6875, "GraB": 0.8900
            },
            "SVM-SMOTE": {
                "LR": 0.7008, "KNN": 0.5164, "RF": 0.7713, "SVM": 0.6907, "DT": 0.8228, "NB": 0.5901, "GraB": 0.6433
            }
        },
        "Statistical Parity Difference": {
            "RandomOverSampler": {
                "LR": 0.0147, "KNN": -0.1283, "RF": 0.0042, "SVM": 0.0188, "DT": 0.0011, "NB": -0.1022, "GraB": 0.0257
            },
            "SMOTE": {
                "LR": -0.0212, "KNN": -0.1274, "RF": -0.1016, "SVM": -0.0036, "DT": -0.1159, "NB": -0.1079, "GraB": -0.0693
            },
            "SMOTETomek": {
                "LR": -0.0212, "KNN": -0.1308, "RF": -0.0931, "SVM": -0.0090, "DT": -0.0909, "NB": -0.1061, "GraB": -0.0358
            },
            "SMOTEENN": {
                "LR": -0.1115, "KNN": -0.1578, "RF": -0.1043, "SVM": -0.1793, "DT": -0.0785, "NB": -0.1209, "GraB": -0.0970
            },
            "SMOTEN": {
                "LR": 0.0298, "KNN": -0.2108, "RF": -0.0385, "SVM": -0.0370, "DT": -0.0728, "NB": -0.0413, "GraB": -0.0152
            },
            "KMeans-SMOTE": {
                "LR": -0.0474, "KNN": -0.1096, "RF": -0.0402, "SVM": -0.0326, "DT": -0.0523, "NB": -0.0796, "GraB": -0.0717
            },
            "Borderline-SMOTE": {
                "LR": -0.0173, "KNN": -0.1298, "RF": -0.0413, "SVM": 0.0050, "DT": -0.0323, "NB": -0.1010, "GraB": -0.0246
            },
            "SVM-SMOTE": {
                "LR": -0.0604, "KNN": -0.1356, "RF": -0.0269, "SVM": -0.0558, "DT": -0.0348, "NB": -0.0959, "GraB": -0.0541
            }
        },
        "Equal Opportunity Difference": {
            "RandomOverSampler": {
                "LR": 0.0334, "KNN": -0.1195, "RF": -0.0387, "SVM": 0.0988, "DT": 0.0326, "NB": -0.0869, "GraB": -0.0238
            },
            "SMOTE": {
                "LR": 0.0848, "KNN": -0.0800, "RF": -0.0554, "SVM": 0.1039, "DT": -0.1023, "NB": -0.1116, "GraB": -0.0678
            },
            "SMOTETomek": {
                "LR": 0.0848, "KNN": -0.0800, "RF": -0.1463, "SVM": 0.0941, "DT": -0.0729, "NB": -0.1116, "GraB": -0.0040
            },
            "SMOTEENN": {
                "LR": -0.0032, "KNN": -0.2120, "RF": -0.1015, "SVM": -0.1407, "DT": -0.0352, "NB": -0.0869, "GraB": -0.0647
            },
            "SMOTEN": {
                "LR": 0.1264, "KNN": -0.2440, "RF": -0.0753, "SVM": 0.0058, "DT": -0.1608, "NB": 0.0432, "GraB": -0.0064
            },
            "KMeans-SMOTE": {
                "LR": -0.1415, "KNN": -0.1343, "RF": -0.0755, "SVM": -0.0829, "DT": -0.1492, "NB": -0.1314, "GraB": -0.1955
            },
            "Borderline-SMOTE": {
                "LR": 0.0307, "KNN": -0.1267, "RF": -0.0922, "SVM": 0.0082, "DT": -0.0382, "NB": -0.0845, "GraB": -0.0554
            },
            "SVM-SMOTE": {
                "LR": -0.1585, "KNN": -0.1436, "RF": -0.0999, "SVM": -0.1463, "DT": -0.0949, "NB": -0.1757, "GraB": -0.1171
            }
        },
        "Predictive Equality Difference": {
            "RandomOverSampler": {
                "LR": 0.0351, "KNN": -0.1175, "RF": 0.0224, "SVM": 0.0140, "DT": 0.0028, "NB": -0.0817, "GraB": 0.0452
            },
            "SMOTE": {
                "LR": -0.0157, "KNN": -0.1200, "RF": -0.0885, "SVM": -0.0091, "DT": -0.1089, "NB": -0.0856, "GraB": -0.0549
            },
            "SMOTETomek": {
                "LR": -0.0157, "KNN": -0.1238, "RF": -0.0649, "SVM": -0.0134, "DT": -0.0849, "NB": -0.0834, "GraB": -0.0269
            },
            "SMOTEENN": {
                "LR": -0.1062, "KNN": -0.1267, "RF": -0.0818, "SVM": -0.1647, "DT": -0.0617, "NB": -0.1056, "GraB": -0.0786
            },
            "SMOTEN": {
                "LR": 0.0296, "KNN": -0.1925, "RF": -0.0173, "SVM": -0.0297, "DT": -0.0412, "NB": -0.0363, "GraB": -0.0000
            },
            "KMeans-SMOTE": {
                "LR": -0.0131, "KNN": -0.0921, "RF": -0.0206, "SVM": -0.0113, "DT": -0.0253, "NB": -0.0485, "GraB": -0.0319
            },
            "Borderline-SMOTE": {
                "LR": -0.0051, "KNN": -0.1130, "RF": -0.0118, "SVM": 0.0180, "DT": -0.0130, "NB": -0.0812, "GraB": -0.0011
            },
            "SVM-SMOTE": {
                "LR": -0.0216, "KNN": -0.1128, "RF": 0.0015, "SVM": -0.0196, "DT": -0.0105, "NB": -0.0631, "GraB": -0.0306
            }
        },
        "Accuracy": {
            "RandomOverSampler": {
                "LR": 0.7246, "KNN": 0.7771, "RF": 0.8121, "SVM": 0.7701, "DT": 0.7386, "NB": 0.7246, "GraB": 0.7830
            },
            "SMOTE": {
                "LR": 0.7643, "KNN": 0.7596, "RF": 0.8016, "SVM": 0.7853, "DT": 0.7293, "NB": 0.7328, "GraB": 0.7725
            },
            "SMOTETomek": {
                "LR": 0.7643, "KNN": 0.7608, "RF": 0.7958, "SVM": 0.7841, "DT": 0.7386, "NB": 0.7340, "GraB": 0.7596
            },
            "SMOTEENN": {
                "LR": 0.6569, "KNN": 0.6756, "RF": 0.6803, "SVM": 0.6406, "DT": 0.7036, "NB": 0.6989, "GraB": 0.6884
            },
            "SMOTEN": {
                "LR": 0.7083, "KNN": 0.6021, "RF": 0.7923, "SVM": 0.7666, "DT": 0.7223, "NB": 0.6756, "GraB": 0.7690
            },
            "KMeans-SMOTE": {
                "LR": 0.8238, "KNN": 0.8191, "RF": 0.8471, "SVM": 0.8518, "DT": 0.8156, "NB": 0.8051, "GraB": 0.8436
            },
            "Borderline-SMOTE": {
                "LR": 0.7386, "KNN": 0.7806, "RF": 0.8040, "SVM": 0.7713, "DT": 0.7573, "NB": 0.7363, "GraB": 0.7678
            },
            "SVM-SMOTE": {
                "LR": 0.8086, "KNN": 0.7701, "RF": 0.8320, "SVM": 0.8168, "DT": 0.7725, "NB": 0.7841, "GraB": 0.8028
            }
        }
    }
}

results = {}

for metric, methods in data_4r["Metrics"].items():
    results[metric] = compute_mean_ci(methods)

# Display results
for metric, result in results.items():
    print(f"\n{metric}:")
    print(result)



Disparate Impact:
          Mean  Lower CI  Upper CI
LR    0.883325  0.752125  1.014525
KNN   0.480625  0.386254  0.574996
RF    0.717212  0.571436  0.862989
SVM   0.843963  0.667809  1.020116
DT    0.746750  0.613830  0.879670
NB    0.689937  0.616591  0.763284
GraB  0.797150  0.617315  0.976985

Statistical Parity Difference:
          Mean  Lower CI  Upper CI
LR   -0.029313 -0.066373  0.007748
KNN  -0.141263 -0.167213 -0.115312
RF   -0.055212 -0.088393 -0.022032
SVM  -0.036687 -0.088991  0.015616
DT   -0.059550 -0.090797 -0.028303
NB   -0.094363 -0.114759 -0.073966
GraB -0.042750 -0.075032 -0.010468

Equal Opportunity Difference:
          Mean  Lower CI  Upper CI
LR    0.007113 -0.080665  0.094890
KNN  -0.142513 -0.191252 -0.093773
RF   -0.085600 -0.112975 -0.058225
SVM  -0.007387 -0.095155  0.080380
DT   -0.077613 -0.130933 -0.024292
NB   -0.093175 -0.145768 -0.040582
GraB -0.066837 -0.120341 -0.013334

Predictive Equality Difference:
          Mean  Lower CI  Upper CI
LR   -0.01