In [None]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

root_path = config["admin"]["root_dir"]
bias_mitigation_path = config["admin"]["bias_mitigation"]
import sys

sys.path.insert(1, root_path + bias_mitigation_path + "classes")

from DecisionTree_insession import in_session_decision_tree
from KNN_insession import in_session_KNN
from MLP_insession import in_session_MLP
from MetricsClass import Evaluation
from Plotting import Plots
import numpy as np

import pandas as pd
import seaborn as sns
import pickle

Learning Bias Mitigation: Gender <> DL

In [None]:
## define feature cols
feature_cols = [
    "Erstloesung",
    "Schussel",
    "Erfolg",
    "Schwierigkeit",
    "ist_Schulzeit",
    "MehrfachFalsch",
    "vorher_abgebrochen",
    "Fehler",
    "Klassenstufe",
    "Jahredabei",
    "AnzahlAufgaben",
    "Sex__m",
    "Sex__w",
    "Testposition__pruefung",
    "Testposition__training",
    "Testposition__version",
    "Art__GK",
    "Art__GR",
    "Art__GZ",
    "Art__K",
    "Art__LB",
    "UserAttribut",
    "OrderNumber",
    "steps",
]

In [None]:
MLP = in_session_MLP()
MLP.set_range(2, 61)

# set sex as column names
MLP.set_minority_group("Sex__w")
MLP.set_majority_group("Sex__m")
MLP.set_demographic_category("gender")

MLP.load_matrices("06_learning_bias_mitigation/gender/matrix", ".pkl", True)
MLP.set_feature_cols(feature_cols)
MLP.set_minority_group("girls")
MLP.set_majority_group("boys")

optimizer = [
    "Adam",
    "Adam",
    "Adam",
    "SGD",
    "SGD",
    "SGD",
    "Adam",
    "Adam",
    "Adam",
    "SGD",
    "SGD",
    "SGD",
]
loss = [
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
]
metrics_ = [
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
]

metrics = pd.DataFrame(
    columns=[
        "model",
        "group",
        "subgroup",
        "Length",
        "Sentence",
        "Accuracy",
        "Precision",
        "Recall",
        "AUC",
        "FPR",
        "optimizer",
        "loss",
        "metrics",
    ]
)

for optimizer, loss, metrics_ in zip(optimizer, loss, metrics_):
    metrics_MLP = MLP.loop_matrices("gender/matrix", optimizer, loss, metrics_, 24, 24)
    metrics_MLP["optimizer"] = optimizer
    metrics_MLP["loss"] = loss
    metrics_MLP["metrics"] = metrics_

    metrics = pd.concat([metrics, metrics_MLP])

Evaluate

In [None]:
# evaluate metrics for fairness
evaluation_obj = Evaluation(metrics)
evaluation_obj.set_demographic_category("gender")
evaluation_obj.set_minority_group("girls")
evaluation_obj.set_majority_group("boys")
index_list = ["loss", "optimizer", "metrics", "Sentence"]
columns = ["loss", "optimizer", "metrics"]

df_gender = evaluation_obj.evaluate_learning_bias(index_list, columns)

In [None]:
optimizer = [
    "Adam",
    "Adam",
    "Adam",
    "SGD",
    "SGD",
    "SGD",
    "Adam",
    "Adam",
    "Adam",
    "SGD",
    "SGD",
    "SGD",
]
loss = [
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
    "binary_crossentropy",
    "MeanSquaredError",
    "Hinge",
]
metrics_ = [
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "Accuracy",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
    "AUC",
]

In [None]:
## claculate mean of the results and map in data frame
met = ["EO", "PE", "PP", "SA"]
mean_table = pd.DataFrame()
index_ranges = [
    (2, 10, 8, "02-9"),
    (10, 20, 9, "10-19"),
    (20, 30, 9, "20-29"),
    (30, 40, 9, "30-39"),
    (40, 50, 9, "40-49"),
    (50, 60, 10, "50-60"),
]

for loss, optimizer, metrics_ in zip(loss, optimizer, metrics_):
    for x in met:
        for i_start, i_end, div, index_range in index_ranges:
            a = 0
            for i in range(i_start, i_end):
                a = a + df_gender[x][loss][optimizer][metrics_][i]
            a = a / div
            temp = pd.DataFrame(
                {
                    "Metrik": [x],
                    "Model": "DL",
                    "Range": index_range,
                    "Val": a,
                    "loss": loss,
                    "optimizer": optimizer,
                    "metrics_": metrics_,
                }
            )

            mean_table = pd.concat([mean_table, temp])

## pivot results table
mean_table = pd.pivot_table(
    mean_table,
    values=["Val"],
    index=["loss", "optimizer", "metrics_", "Range"],
    columns=["Metrik", "Model"],
)

In [None]:
# show table
table = evaluation_obj.showTable(mean_table)
table