In [1]:
import pandas as pd
import pickle
import seaborn as sns
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from matplotlib import pyplot as plt
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    log_loss,
    roc_auc_score,
    recall_score,
    precision_score,
    accuracy_score,
    plot_roc_curve,
    plot_confusion_matrix,
    roc_curve,
    confusion_matrix,
)
import itertools
from tensorflow.keras.initializers import Constant, TruncatedNormal
from tensorflow.keras.layers import Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import LabelEncoder
from numpy import mean, absolute

# Oversampling and under sampling
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from collections import Counter

Representational Bias Mitigation: Balance out dataset

In [None]:
## define count of n from temporal models
n = list(range(2, 61))

In [2]:
## load survey data
infile = open("../../02_dropout_prediction/01_keep_it_up/fairness_ready.pkl", "rb")
survey_data = pickle.load(infile)
infile.close()

survey_data = survey_data[["UebungsID", "AbiEltern"]]
survey_data = survey_data.drop_duplicates()

In [3]:
## load data per matrix
# balance out data set
for i in n:
    path = (
        "../../02_dropout_prediction/01_keep_it_up/matrices_allsessions/matrix"
        + str(i)
        + ".pkl"
    )
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index(level=0)
    df = pd.merge(df, survey_data, how="left", on="UebungsID")

    # prepare feature AbiEltern
    df.AbiEltern = df.AbiEltern.astype("float")
    df["AbiEltern"] = df["AbiEltern"].replace([2], 1)
    df_1 = df[df.AbiEltern == 1]
    df_0 = df[df.AbiEltern == 0]
    df = pd.concat([df_0, df_1])

    # Randomly over sample the minority class
    X_df = df.drop(columns=["AbiEltern"])
    y_df = df.AbiEltern
    smote = SMOTE(random_state=42)
    X_train_smote, y_train_smote = smote.fit_resample(X_df, y_df)
    df = X_train_smote.join(
        pd.DataFrame(list(y_train_smote.values), columns=["AbiEltern"])
    )

    # save
    path = "AbiEltern_allsessions/matrix" + str(i) + ".pkl"
    df.to_pickle(path)

In [None]:
## plot count of sentences per matrix
sentence_len = pd.DataFrame(columns=["Sentence", "Count"])

for x in n:
    path = "AbiEltern_allsessions/matrix" + str(x) + ".pkl"
    infile = open(path, "rb")
    get_length = pickle.load(infile)
    infile.close()
    l = len(get_length)
    sentence_len = sentence_len.append({"Sentence": x, "Count": l}, ignore_index=True)

sentence_len["Sentence"] = sentence_len["Sentence"].astype("int")
sentence_len["Count"] = sentence_len["Count"].astype("int")
sns.lineplot(data=sentence_len, x="Sentence", y="Count")

Models

In [None]:
## define metrics dataframe
metrics = pd.DataFrame(
    columns=[
        "model",
        "group",
        "subgroup",
        "Length",
        "Sentence",
        "Accuracy",
        "Precision",
        "Recall",
        "AUC",
        "FPR",
    ]
)

In [None]:
## define feature cols
feature_cols = [
    "Erstloesung",
    "Schussel",
    "Erfolg",
    "Schwierigkeit",
    "ist_Schulzeit",
    "MehrfachFalsch",
    "vorher_abgebrochen",
    "Fehler",
    "Klassenstufe",
    "Jahredabei",
    "AnzahlAufgaben",
    "Sex__m",
    "Sex__w",
    "Testposition__pruefung",
    "Testposition__training",
    "Testposition__version",
    "Art__GK",
    "Art__GR",
    "Art__GZ",
    "Art__K",
    "Art__LB",
    "UserAttribut",
    "OrderNumber",
    "steps",
]

In [None]:
"""
calculate and extract relevant metrics from y and pred
return metrics
"""


def get_metrics(clf, X, y, cv, pred):
    a = accuracy_score(y, pred)
    p = precision_score(y, pred)
    r = recall_score(y, pred)
    roc_auc = roc_auc_score(y, pred)
    tn, fp, fn, tp = confusion_matrix(y, pred).ravel()
    fpr = fp / (fp + tn)

    return a, p, r, roc_auc, fpr

Decision Tree Classifier

In [None]:
## model fitting and validation for each subgroup

# loop through matrices
for i in n:
    path = "AbiEltern_allsessions/matrix" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()

    # prepare features
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")

    # prepare training and validation
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # fit
    clf = DecisionTreeClassifier(criterion="entropy", max_depth=5)
    clf = clf.fit(X_train, y_train)
    pred = clf.predict(X_test)

    # call function to get metrics and append
    a, p, r, roc_auc, fpr = get_metrics(clf, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "DTE",
            "group": "all",
            "subgroup": "all",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

    ##
    # let the model above predict for each subgroup and save results to evaluate later
    group = ["abiEltern", "abiEltern"]
    subgroup = ["abi", "keinAbi"]
    matrice = ["matrices_forte_abi", "matrices_forte_keinAbi"]

    for group, subgroup, matrix in zip(group, subgroup, matrice):
        path = (
            "../../02_dropout_prediction/01_keep_it_up/"
            + matrix
            + "/matrix"
            + str(i)
            + ".pkl"
        )
        infile = open(path, "rb")
        df = pickle.load(infile)
        infile.close()
        df = df.reset_index()
        X = df[feature_cols]
        y = df.y
        y = y.astype("int")
        pred = clf.predict(X)

        # call function to get metrics
        a, p, r, roc_auc, fpr = get_metrics(clf, X, y, cv, pred)
        metrics = metrics.append(
            {
                "model": "DTE",
                "group": group,
                "subgroup": subgroup,
                "Length": len(df),
                "Sentence": i,
                "Accuracy": a,
                "Precision": p,
                "Recall": r,
                "AUC": roc_auc,
                "FPR": fpr,
            },
            ignore_index=True,
        )

KNN

In [None]:
## model fitting and validation for each subgroup

# loop through matrices
for i in n:
    path = "AbiEltern_allsessions/matrix" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()

    # prepare features
    X = df[feature_cols]
    y = df.y
    y = y.astype("int")

    # prepare training and validation
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )
    k = 5
    cv = KFold(n_splits=k, random_state=None)

    # fit
    knn = KNeighborsClassifier(n_neighbors=2)
    knn = knn.fit(X_train, y_train)
    pred = knn.predict(X_test)

    # call function to get metrics and append metrics to df
    a, p, r, roc_auc, fpr = get_metrics(knn, X_test, y_test, cv, pred)
    metrics = metrics.append(
        {
            "model": "KNN",
            "group": "all",
            "subgroup": "all",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

    ##
    # let the model above predict for each subgroup and save results to evaluate later
    group = ["abiEltern", "abiEltern"]
    subgroup = ["abi", "keinAbi"]
    matrice = ["matrices_forte_abi", "matrices_forte_keinAbi"]

    for group, subgroup, matrix in zip(group, subgroup, matrice):
        path = (
            "../../02_dropout_prediction/01_keep_it_up/"
            + matrix
            + "/matrix"
            + str(i)
            + ".pkl"
        )
        infile = open(path, "rb")
        df = pickle.load(infile)
        infile.close()
        df = df.reset_index()
        X = df[feature_cols]
        y = df.y
        y = y.astype("int")
        pred = knn.predict(X)

        # call function to get metrics
        a, p, r, roc_auc, fpr = get_metrics(knn, X, y, cv, pred)
        metrics = metrics.append(
            {
                "model": "KNN",
                "group": group,
                "subgroup": subgroup,
                "Length": len(df),
                "Sentence": i,
                "Accuracy": a,
                "Precision": p,
                "Recall": r,
                "AUC": roc_auc,
                "FPR": fpr,
            },
            ignore_index=True,
        )

MLP

In [None]:
## model fitting and validation for each subgroup

""""
build dropout prediction model
"""


def build_model():
    model = Sequential()
    model.add(Dense(24, input_dim=24, activation="relu"))
    model.add(Dense(48, activation="relu"))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(12, activation="relu"))
    model.add(Dense(1, activation="sigmoid"))

    return model


"""
calculate and extract relevant metrics from y and pred
return metrics
"""


def get_dn_metrics(model, X, y):
    yhat_probs = model.predict(X, verbose=0)
    yhat_classes = (model.predict(X) > 0.5).astype("int32")
    # reduce to 1d array
    yhat_probs = yhat_probs[:, 0]
    yhat_classes = yhat_classes[:, 0]
    a = accuracy_score(y, yhat_classes)
    p = precision_score(y, yhat_classes)
    r = recall_score(y, yhat_classes)
    roc_auc = roc_auc_score(y, yhat_probs)
    tn, fp, fn, tp = confusion_matrix(y, yhat_classes).ravel()
    fpr = fp / (fp + tn)

    return a, p, r, roc_auc, fpr


# loop through matrices
for i in n:
    path = "AbiEltern_allsessions/matrix" + str(i) + ".pkl"
    infile = open(path, "rb")
    df = pickle.load(infile)
    infile.close()
    df = df.reset_index()

    # prepare features
    y_len = len(feature_cols)
    X = df[feature_cols].astype(float)
    y = df.y
    y = y.astype("int")
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=1
    )

    model = build_model()

    model.compile(loss="binary_crossentropy", optimizer="Adam", metrics=["accuracy"])

    model.fit(
        x=X_train,
        y=y_train,
        epochs=10,
        batch_size=128,
        verbose=0,
        validation_data=(X_test, y_test),
    )

    scores = model.evaluate(x=X_test, y=y_test, verbose=0)

    # call function to get metrics and append metrics to df
    a, p, r, roc_auc, fpr = get_dn_metrics(model, X_test, y_test)
    metrics = metrics.append(
        {
            "model": "DL",
            "group": "all",
            "subgroup": "all",
            "Length": len(df),
            "Sentence": i,
            "Accuracy": a,
            "Precision": p,
            "Recall": r,
            "AUC": roc_auc,
            "FPR": fpr,
        },
        ignore_index=True,
    )

    ##
    # let the model above predict for each subgroup and save results to evaluate later
    group = ["abiEltern", "abiEltern"]
    subgroup = ["abi", "keinAbi"]
    matrice = ["matrices_forte_abi", "matrices_forte_keinAbi"]

    for group, subgroup, matrix in zip(group, subgroup, matrice):
        path = (
            "../../02_dropout_prediction/01_keep_it_up/"
            + matrix
            + "/matrix"
            + str(i)
            + ".pkl"
        )
        infile = open(path, "rb")
        df = pickle.load(infile)
        infile.close()
        df = df.reset_index()
        y_len = len(feature_cols)
        X = df[feature_cols].astype(float)
        y = df.y
        y = y.astype("int")

        # call function to get metrics and append to df
        a, p, r, roc_auc, fpr = get_dn_metrics(model, X, y)
        metrics = metrics.append(
            {
                "model": "DL",
                "group": group,
                "subgroup": subgroup,
                "Length": len(df),
                "Sentence": i,
                "Accuracy": a,
                "Precision": p,
                "Recall": r,
                "AUC": roc_auc,
                "FPR": fpr,
            },
            ignore_index=True,
        )

In [None]:
## construct dfs for all groups from metric df
grouped = metrics.groupby(metrics.group)
df_all = grouped.get_group("all")
modell = df_all.groupby(df_all.model)
dte = modell.get_group("DTE")
knn = modell.get_group("KNN")
dl = modell.get_group("DL")

In [None]:
## plot accuracy by n and model
ax = sns.lineplot(data=df_all, x="Sentence", y="Accuracy", hue="model")

In [None]:
## plot count of sentences by n and model
ax = sns.lineplot(data=df_all, x="Sentence", y="Length", hue="model")

Evaluate

In [None]:
grouped = metrics.groupby(df.group)
df_abiEltern = grouped.get_group("abiEltern")

# calculate fairness metrics
df_abiEltern = df_abiEltern.drop(columns=["group", "Accuracy"])
df_abiEltern = pd.pivot_table(
    df_abiEltern,
    values=["Precision", "Recall", "AUC", "FPR"],
    index=["model", "Sentence"],
    columns=["subgroup"],
)
df_abiEltern["PP"] = df_abiEltern.Precision.abi - df_abiEltern.Precision.keinAbi
df_abiEltern["EO"] = df_abiEltern.Recall.keinAbi - df_abiEltern.Recall.abi
df_abiEltern["SA"] = df_abiEltern.AUC.abi - df_abiEltern.AUC.keinAbi
df_abiEltern["PE"] = df_abiEltern.FPR.keinAbi - df_abiEltern.FPR.abi
df_abiEltern = df_abiEltern.drop(columns=["AUC", "Precision", "Recall", "FPR"])
df_abiEltern.columns = df_abiEltern.columns.droplevel(1)
df_abiEltern = pd.pivot_table(
    df_abiEltern, values=["PP", "EO", "SA", "PE"], index=["Sentence"], columns=["model"]
)

In [None]:
"""
functions to format results
set two threshols: one at |0.02| in orange and one at |0.05| in red
format all negative values in bold
"""


def threshold001(v, props=""):
    return props if (v > 0.02) or (v < -0.02) else None


def threshold005(v, props=""):
    return props if (v > 0.05) or (v < -0.05) else None


def negativeValue(v, props=""):
    return props if (v < 0) else None


def showTable(df):
    styled = (
        df.style.set_properties(color="black", align="right")
        .set_properties(**{"background-color": "white"})
        .applymap(threshold001, props="color:orange;")
        .applymap(threshold005, props="color:red;")
        .applymap(negativeValue, props="font-weight:bold;")
    )
    return styled

In [None]:
## create a table that is readable
# take the mean from ten sentences as one cell

met = ["EO", "PE", "PP", "SA"]
model = ["DL", "DTE", "KNN"]
ranges = [
    ("02-9", 8),
    ("10-19", 9),
    ("20-29", 9),
    ("30-39", 9),
    ("40-49", 9),
    ("50-60", 10),
]

frame_means = pd.DataFrame()

# for each metric
for m in met:
    for mo in model:
        for r, div in ranges:
            s = 0
            for i in range(int(r[:2]), int(r[-2:]) + 1):
                s += df_abiEltern[m][mo][i]
            temp = pd.DataFrame(
                {"Metrik": [m], "Model": mo, "Range": r, "Val": s / div}
            )
            frame_means = pd.concat([frame_means, temp])

# pivot table
mean_table = pd.pivot_table(
    frame_means, values=["Val"], index=["Range"], columns=["Metrik", "Model"]
)
showTable(mean_table)

In [None]:
## save results
writer = pd.ExcelWriter("df_abiEltern.xlsx", engine="xlsxwriter")
df_abiEltern.to_excel(writer, sheet_name="AbiEltern")
writer.save()