# Requirements

In [2]:
import sys
sys.path.append("../") # go to parent dir

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.special import expit

from src.settings import LOGS_ROOT

sns.set(rc = {'figure.figsize':(8,8)})
sns.set_style("whitegrid")

# A smarter approach for results

## Data load

In [4]:
# ABIDE 869
ds = "ABIDE 869"
data_paths = [
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_0/0002/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_1/0009/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_2/0008/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_3/0004/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_4/0000/scores.npz")
]
accuracies = [0.655, 0.684, 0.713, 0.747, 0.746]
AUCs = [0.6912234042553193, 0.7150265957446809, 0.7523936170212767, 0.8257161892071951, 0.7970643684352275]


In [3]:
# OASIS
ds = "OASIS"
data_paths = [
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_0/0001/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_1/0002/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_2/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_3/0002/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_4/0005/scores.npz")
]
accuracies = [0.8181818181818182, 0.8, 0.8121212121212121, 0.8414634146341463, 0.8597560975609756]
AUCs = [0.855410866636731, 0.7151648351648352, 0.7371428571428571, 0.8031674208144797, 0.8404977375565611]


In [15]:
# FBIRN
ds = "FBIRN"
data_paths = [
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_0/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_1/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_2/0004/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_3/0003/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_4/0005/scores.npz")
]
accuracies = [0.8095238095238095, 0.7903225806451613, 0.7741935483870968, 0.8709677419354839, 0.7580645161290323]
AUCs = [0.8477822580645161, 0.8104166666666667, 0.875, 0.8989583333333333, 0.8645833333333333]

In [4]:
data_list = []
for data_path in data_paths:
    data_list.append(np.load(data_path))

logits_list = []
targets_list = []
for data in data_list:
    logits_list.append(data["logits"])
    targets_list.append(data["targets"])

data_list = []

for logits, targets in zip(logits_list, targets_list):
    data_list.append(
        pd.DataFrame(
            {
                "y0": logits_list[ :, 0],
                "y1": logits_list[ :, 1],
                "class": targets,
            }
        )
    )

## Logistic regression

In [7]:
from sklearn.linear_model import LogisticRegression

X_list = []
y_list = []

for data in data_list:
    X_list.append(data.to_numpy()[ :, 0:2])
    y_list.append(data.to_numpy()[ :, 2].astype(int))

coef_list = []
intercept_list = []
scores = []
for X, y in zip(X_list, y_list):
    clf = LogisticRegression(random_state=0).fit(X, y)
    coef_list.append(clf.coef_)
    intercept_list.append(clf.intercept_)
    scores.append(clf.predict_proba(X))

In [None]:
for i in range(len(data_list)):
    # Fold i, Accuracy = accuracies[i]
    X_plot = np.linspace(2, 5, 100)
    Y_plot = -coef_list[i][0, 0]/coef_list[i][0, 1]*X_plot - intercept_list[i][0]/coef_list[i][0, 1]

    sns.scatterplot(data=data_list[i], x="y0", y="y1", hue="class", alpha=0.7)

    plt.plot(X_plot, Y_plot, color='r')
    plt.title(f"{ds}, Fold {i+1}, Accuracy = {accuracies[i]:.3f}, AUC = {AUCs[i]:.3f}")
    plt.savefig(f"{ds}-{i}.png", dpi=150)

    plt.clf()

## Tune threshold

In [12]:
import torch
from sklearn.metrics import accuracy_score
from numpy import arange

tuned_accuracies = []
tuned_thresholds = []
thresholds = arange(0, 1, 0.001)

for i in range(len(data_list)):
    temp_accuracies = []
    for threshold in thresholds:
        y_pred = np.array(scores[i][:, 1] > threshold).astype(np.int32)
        accuracy = accuracy_score(y_true=y_list[i], y_pred=y_pred)
        temp_accuracies.append(accuracy)
    max = np.argmax(temp_accuracies).astype(np.int32)
    tuned_accuracies.append(temp_accuracies[max])
    tuned_thresholds.append(thresholds[max])


In [None]:
for i in range(len(data_list)):
    # Fold i, Accuracy = accuracies[i]
    X_plot = np.linspace(2, 5, 100)
    Y_plot_tuned = -coef_list[i][0, 0]/coef_list[i][0, 1]*X_plot - intercept_list[i][0]/coef_list[i][0, 1] - np.log((1-tuned_thresholds[i])/tuned_thresholds[i])/coef_list[i][0, 1]

    Y_plot = -coef_list[i][0, 0]/coef_list[i][0, 1]*X_plot - intercept_list[i][0]/coef_list[i][0, 1]

    plt.plot(X_plot, Y_plot, color='r', label="original LR hyperplane")
    plt.plot(X_plot, Y_plot_tuned, color='b', label="tuned threshold LR hyperplane")
    plt.title(f"{ds}, Fold {i+1}, accuracy = {accuracies[i]:.3f}, tuned accuracy = {tuned_accuracies[i]:.3f}, AUC = {AUCs[i]:.3f}, tuned threshold = {tuned_thresholds[i]:.3f}")
    sns.scatterplot(data=data_list[i], x="y0", y="y1", hue="class", alpha=0.7)
    plt.savefig(f"{ds}-{i}-tuned.png", dpi=150)

    plt.clf()