In [None]:
from configparser import ConfigParser

config = ConfigParser()
config.read("../config.ini")

root_path = config["admin"]["root_dir"]
bias_mitigation_path = config["admin"]["bias_mitigation"]
import sys

sys.path.insert(1, root_path + bias_mitigation_path + "classes")

from DecisionTree_insession import in_session_decision_tree
from KNN_insession import in_session_KNN
from MLP_insession import in_session_MLP
from MetricsClass import Evaluation
from Plotting import Plots
import numpy as np
import pandas as pd
import seaborn as sns
import warnings
import pickle

# Ignore the warning message
warnings.filterwarnings("ignore", category=FutureWarning)

Learning Bias Mitigation: Migration Background <> KNN

In [None]:
## define feature cols
feature_cols = [
    "Erstloesung",
    "Schussel",
    "Erfolg",
    "Schwierigkeit",
    "ist_Schulzeit",
    "MehrfachFalsch",
    "vorher_abgebrochen",
    "Fehler",
    "Klassenstufe",
    "Jahredabei",
    "AnzahlAufgaben",
    "Sex__m",
    "Sex__w",
    "Testposition__pruefung",
    "Testposition__training",
    "Testposition__version",
    "Art__GK",
    "Art__GR",
    "Art__GZ",
    "Art__K",
    "Art__LB",
    "UserAttribut",
    "OrderNumber",
    "steps",
]

In [None]:
KNN = in_session_KNN()
KNN.set_range(2, 61)

KNN.set_majority_group("eigSprache")
KNN.set_demographic_category("eigSprache")

KNN.load_matrices("06_learning_bias_mitigation/migration/matrix", ".pkl", True)
KNN.set_feature_cols(feature_cols)
KNN.set_majority_group("deutsch")
KNN.set_minority_group("migration")

n_neighbors = [2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 3, 4, 5, 6, 7, 8, 9, 10]
weights = [
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
]

metrics = pd.DataFrame(
    columns=[
        "model",
        "group",
        "subgroup",
        "Length",
        "Sentence",
        "Accuracy",
        "Precision",
        "Recall",
        "AUC",
        "FPR",
        "n_neighbors",
        "weights",
    ]
)

for n_neighbors, weights in zip(n_neighbors, weights):
    metrics_KNN = KNN.loop_matrices("migration/matrix", n_neighbors, weights)
    metrics_KNN["n_neighbors"] = n_neighbors
    metrics_KNN["weights"] = weights

    metrics = pd.concat([metrics, metrics_KNN])

Evaluate

In [None]:
# evaluate metrics for fairness
evaluation_obj = Evaluation(metrics)
evaluation_obj.set_demographic_category("eigSprache")
evaluation_obj.set_minority_group("migration")
evaluation_obj.set_majority_group("deutsch")
index_list = ["n_neighbors", "weights", "Sentence"]
columns = ["n_neighbors", "weights"]

df_language = evaluation_obj.evaluate_learning_bias(index_list, columns)

In [None]:
# calcaulte ranges means
met = ["EO", "PE", "PP", "SA"]
mean_table = pd.DataFrame()

n_neighbors = [2, 3, 4, 5, 6, 7, 8, 9, 10, 2, 3, 4, 5, 6, 7, 8, 9, 10]
weights = [
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "uniform",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
    "distance",
]
index_ranges = [
    (2, 10, 8, "02-9"),
    (10, 20, 9, "10-19"),
    (20, 30, 9, "20-29"),
    (30, 40, 9, "30-39"),
    (40, 50, 9, "40-49"),
    (50, 60, 10, "50-60"),
]

for n_neighbors, weights in zip(n_neighbors, weights):
    for x in met:
        for i_start, i_end, div, index_range in index_ranges:
            a = 0
            for i in range(i_start, i_end):
                a = a + df_language[x][n_neighbors][weights][i]
            a = a / div
            temp = pd.DataFrame(
                {
                    "Metrik": [x],
                    "Model": "KNN",
                    "Range": index_range,
                    "Val": a,
                    "n_neighbors": n_neighbors,
                    "weights": weights,
                }
            )
            mean_table = pd.concat([mean_table, temp])

## pivot results table
mean_table = pd.pivot_table(
    mean_table,
    values=["Val"],
    index=["n_neighbors", "weights", "Range"],
    columns=["Metrik", "Model"],
)

In [None]:
# show table
table = evaluation_obj.showTable(mean_table)
table