In [None]:
import pandas as pd
import pickle
import seaborn as sns
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from matplotlib import pyplot as plt
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    log_loss,
    roc_auc_score,
    recall_score,
    precision_score,
    accuracy_score,
    plot_roc_curve,
    plot_confusion_matrix,
    roc_curve,
    confusion_matrix,
)
import itertools
from tensorflow.keras.initializers import Constant, TruncatedNormal
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from numpy import mean, absolute

Aggregation Bias Mitigation

Group 1: deutsch

In [None]:
## define count of n from temporal models
n = list(range(2, 61))

In [None]:
## load data per matrix
for i in n:
    path = "../06_optimize_Fairness/eigSprache_allsessions/matrix" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()

    df = df[df.eigSprache == 1]

    # save
    path = "erstsprache/matrix_deutsch" + str(i) + ".pkl"
    df.to_pickle(path)

In [None]:
## define metrics dataframe
metrics = pd.DataFrame(
    columns=[
        "model",
        "group",
        "subgroup",
        "Length",
        "Sentence",
        "Accuracy",
        "Precision",
        "Recall",
        "AUC",
        "FPR",
    ]
)

In [None]:
## feature cols
feature_cols = [
    "Erstloesung",
    "Schussel",
    "Erfolg",
    "Schwierigkeit",
    "ist_Schulzeit",
    "MehrfachFalsch",
    "vorher_abgebrochen",
    "Fehler",
    "Klassenstufe",
    "Jahredabei",
    "Testposition__pruefung",
    "Testposition__training",
    "Testposition__version",
    "Art__GK",
    "Art__GR",
    "Art__GZ",
    "Art__K",
    "Art__LB",
    "UserAttribut",
    "OrderNumber",
    "steps",
]

In [None]:
"""
calculate and extract relevant metrics from y and pred
return metrics
"""


def get_metrics(clf, X, y, cv, pred):
    a = accuracy_score(y, pred)
    p = precision_score(y, pred)
    r = recall_score(y, pred)
    roc_auc = roc_auc_score(y, pred)
    tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
    fpr = fp / (fp + tn)

    return a, p, r, roc_auc, fpr

DTE

In [None]:
## model fitting and validation

# loop through matrices
for i in n:
    path = "erstsprache/matrix_deutsch" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # fit
    clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
    clf = clf.fit(X_train, y_train)
    pred = clf.predict(X_test)

    a, p, r, roc_auc, fpr = get_metrics(clf, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "DTE",
            "group": "erstsprache",
            "subgroup": "deutsch",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

KNN

In [None]:
## model fitting and validation

# loop through matrices
for i in n:
    path = "erstsprache/matrix_deutsch" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # knn
    knn = KNeighborsClassifier(n_neighbors=2)
    knn = knn.fit(X_train, y_train)
    pred = knn.predict(X_test)

    a, p, r, roc_auc, fpr = get_metrics(knn, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "KNN",
            "group": "erstsprache",
            "subgroup": "deutsch",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

MLP

In [None]:
## model fitting and validation

""""
build dropout prediction model
"""


def build_model():
    model = Sequential()
    model.add(Dense(21, input_dim=21, activation="relu"))
    model.add(Dense(44, activation="relu"))
    model.add(Dense(22, activation="relu"))
    model.add(Dense(11, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))

    return model


"""
calculate and extract relevant metrics from y and pred
return metrics
"""


def get_dn_metrics(model, X, y):
    yhat_probs = model.predict(X, verbose=0)
    yhat_classes = (model.predict(X) > 0.5).astype("int32")
    # reduce to 1d array
    yhat_probs = yhat_probs[:, 0]
    yhat_classes = yhat_classes[:, 0]
    a = accuracy_score(y, yhat_classes)
    p = precision_score(y, yhat_classes)
    r = recall_score(y, yhat_classes)
    roc_auc = roc_auc_score(y, yhat_probs)
    tn, fp, fn, tp = confusion_matrix(y, yhat_classes).ravel()
    fpr = fp / (fp + tn)

    return a, p, r, roc_auc, fpr


# loop through matrices
for i in n:
    path = "erstsprache/matrix_deutsch" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()

    y_len = len(feature_cols)
    X = df[feature_cols].astype(float)
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )

    model = build_model()

    model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])

    model.fit(
        x=X_train,
        y=y_train,
        epochs=10,
        batch_size=128,
        verbose=0,
        validation_data=(X_test, y_test),
    )

    scores = model.evaluate(x=X_test, y=y_test, verbose=0)

    a, p, r, roc_auc, fpr = get_dn_metrics(model, X_test, y_test)
    metrics = metrics.append(
        {
            "model": "DL",
            "group": "erstsprache",
            "subgroup": "deutsch",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

Group 2: migration

In [None]:
## load data
for i in n:
    path = "../06_optimize_Fairness/eigSprache_allsessions/matrix" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()

    df = df[df.eigSprache == 0]
    # save
    path = "erstsprache/matrix_migration" + str(i) + ".pkl"
    df.to_pickle(path)

In [None]:
## define feature cols
feature_cols = [
    "Erstloesung",
    "Schussel",
    "Erfolg",
    "Schwierigkeit",
    "ist_Schulzeit",
    "MehrfachFalsch",
    "vorher_abgebrochen",
    "Fehler",
    "Klassenstufe",
    "Jahredabei",
    "Testposition__pruefung",
    "Testposition__training",
    "Testposition__version",
    "Art__GK",
    "Art__GR",
    "Art__GZ",
    "Art__K",
    "Art__LB",
    "UserAttribut",
    "OrderNumber",
    "steps",
]

DTE

In [None]:
## model fitting and validation

# loop through matrices
for i in n:
    path = "erstsprache/matrix_migration" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # fit
    clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
    clf = clf.fit(X_train, y_train)
    pred = clf.predict(X_test)

    a, p, r, roc_auc, fpr = get_metrics(clf, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "DTE",
            "group": "erstsprache",
            "subgroup": "migration",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

KNN

In [None]:
## model fitting and validation

# loop through matrices
for i in n:
    # build models
    path = "erstsprache/matrix_migration" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # fit
    knn = KNeighborsClassifier(n_neighbors=2)
    knn = knn.fit(X_train, y_train)
    pred = knn.predict(X_test)

    a, p, r, roc_auc, fpr = get_metrics(knn, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "KNN",
            "group": "erstsprache",
            "subgroup": "migration",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

MLP

In [None]:
## model fitting and validation

""""
build dropout prediction model
"""


def build_model():
    model = Sequential()
    model.add(Dense(21, input_dim=21, activation="relu"))
    model.add(Dense(44, activation="relu"))
    model.add(Dense(22, activation="relu"))
    model.add(Dense(11, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))

    return model


"""
calculate and extract relevant metrics from y and pred
return metrics
"""


def get_dn_metrics(model, X, y):
    yhat_probs = model.predict(X, verbose=0)
    yhat_classes = (model.predict(X) > 0.5).astype("int32")
    # reduce to 1d array
    yhat_probs = yhat_probs[:, 0]
    yhat_classes = yhat_classes[:, 0]
    a = accuracy_score(y, yhat_classes)
    p = precision_score(y, yhat_classes)
    r = recall_score(y, yhat_classes)
    roc_auc = roc_auc_score(y, yhat_probs)
    tn, fp, fn, tp = confusion_matrix(y, yhat_classes).ravel()
    fpr = fp / (fp + tn)

    return a, p, r, roc_auc, fpr


# loop through matrices
for i in n:
    path = "erstsprache/matrix_migration" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()

    y_len = len(feature_cols)
    X = df[feature_cols].astype(float)
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )

    model = build_model()

    model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])

    model.fit(
        x=X_train,
        y=y_train,
        epochs=10,
        batch_size=128,
        verbose=0,
        validation_data=(X_test, y_test),
    )

    scores = model.evaluate(x=X_test, y=y_test, verbose=0)

    a, p, r, roc_auc, fpr = get_dn_metrics(model, X_test, y_test)
    metrics = metrics.append(
        {
            "model": "DL",
            "group": "erstsprache",
            "subgroup": "migration",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

Model metrics

In [None]:
## plot count of sentences by n and subgroup
ax = sns.lineplot(data=metrics, x="Sentence", y="Length", hue="subgroup")

In [None]:
## construct dfs for both groups from metric df
grouped = metrics.groupby(metrics.subgroup)
df_deutsch = grouped.get_group("deutsch")
df_migration = grouped.get_group("migration")

In [None]:
## plot accuracy by n and model
ax = sns.lineplot(data=df_deutsch, x="Sentence", y="Accuracy", hue="model")

In [None]:
## plot accuracy by n and model
ax = sns.lineplot(data=df_migration, x="Sentence", y="Accuracy", hue="model")

Evaluation

In [None]:
grouped = metrics.groupby(df.group)
df_erstsprache = grouped.get_group("erstsprache")
# calculate fairness metrics
df_erstsprache = df_erstsprache.drop(columns=["group", "Accuracy"])
df_erstsprache = pd.pivot_table(
    df_erstsprache,
    values=["Precision", "Recall", "AUC", "FPR"],
    index=["model", "Sentence"],
    columns=["subgroup"],
)
df_erstsprache["PP"] = (
    df_erstsprache.Precision.deutsch - df_erstsprache.Precision.migration
)
df_erstsprache["EO"] = df_erstsprache.Recall.migration - df_erstsprache.Recall.deutsch
df_erstsprache["SA"] = df_erstsprache.AUC.deutsch - df_erstsprache.AUC.migration
df_erstsprache["PE"] = df_erstsprache.FPR.migration - df_erstsprache.FPR.deutsch
df_erstsprache = df_erstsprache.drop(columns=["AUC", "Precision", "Recall", "FPR"])
df_erstsprache.columns = df_erstsprache.columns.droplevel(1)
df_erstsprache = pd.pivot_table(
    df_erstsprache,
    values=["PP", "EO", "SA", "PE"],
    index=["Sentence"],
    columns=["model"],
)

In [None]:
"""
functions to format results
set two threshols: one at |0.02| in orange and one at |0.05| in red
format all negative values in bold
"""


def threshold001(v, props=""):
    return props if (v > 0.02) or (v < -0.02) else None


def threshold005(v, props=""):
    return props if (v > 0.05) or (v < -0.05) else None


def negativeValue(v, props=""):
    return props if (v < 0) else None


def showTable(df):
    styled = (
        df.style.set_properties(color="black", align="right")
        .set_properties(**{"background-color": "white"})
        .applymap(threshold001, props="color:orange;")
        .applymap(threshold005, props="color:red;")
        .applymap(negativeValue, props="font-weight:bold;")
    )
    return styled

In [None]:
# s = showTable(df_erstsprache)
# s

In [None]:
## create a table that is readable
# take the mean from ten sentences as one cell

met = ["EO", "PE", "PP", "SA"]
model = ["DL", "DTE", "KNN"]
ranges = [
    ("02-9", 8),
    ("10-19", 9),
    ("20-29", 9),
    ("30-39", 9),
    ("40-49", 9),
    ("50-60", 10),
]

frame_means = pd.DataFrame()

# for each metric
for m in met:
    for mo in model:
        for r, div in ranges:
            s = 0
            for i in range(int(r[:2]), int(r[-2:]) + 1):
                s += df_erstsprache[m][mo][i]
            temp = pd.DataFrame(
                {"Metrik": [m], "Model": mo, "Range": r, "Val": s / div}
            )
            frame_means = pd.concat([frame_means, temp])

# pivot table
mean_table = pd.pivot_table(
    frame_means, values=["Val"], index=["Range"], columns=["Metrik", "Model"]
)
showTable(mean_table)

In [None]:
# save
writer = pd.ExcelWriter("df_erstsprache_AggBias.xlsx", engine="xlsxwriter")
df_erstsprache.to_excel(writer, sheet_name="erstsprache")
writer.save()