# Requirements

In [2]:
import sys
sys.path.append("../") # go to parent dir

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.special import expit

from src.settings import LOGS_ROOT

sns.set(rc = {'figure.figsize':(8,8)})
sns.set_style("whitegrid")

# A smarter approach for results

## Data load

In [4]:
# ABIDE 869
ds = "ABIDE 869"
data_paths = [
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_0/0002/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_1/0009/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_2/0008/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_3/0004/scores.npz"),
    LOGS_ROOT.joinpath("220716.075108-experiment-mlp-abide_869/k_4/0000/scores.npz")
]
accuracies = [0.655, 0.684, 0.713, 0.747, 0.746]
AUCs = [0.6912234042553193, 0.7150265957446809, 0.7523936170212767, 0.8257161892071951, 0.7970643684352275]


In [9]:
# OASIS
ds = "OASIS"
data_paths = [
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_0/0001/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_1/0002/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_2/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_3/0002/scores.npz"),
    LOGS_ROOT.joinpath("220721.184622-experiment-mlp-oasis/k_4/0005/scores.npz")
]
accuracies = [0.8181818181818182, 0.8, 0.8121212121212121, 0.8414634146341463, 0.8597560975609756]
AUCs = [0.855410866636731, 0.7151648351648352, 0.7371428571428571, 0.8031674208144797, 0.8404977375565611]


In [15]:
# FBIRN
ds = "FBIRN"
data_paths = [
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_0/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_1/0008/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_2/0004/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_3/0003/scores.npz"),
    LOGS_ROOT.joinpath("220721.175931-experiment-mlp-fbirn/k_4/0005/scores.npz")
]
accuracies = [0.8095238095238095, 0.7903225806451613, 0.7741935483870968, 0.8709677419354839, 0.7580645161290323]
AUCs = [0.8477822580645161, 0.8104166666666667, 0.875, 0.8989583333333333, 0.8645833333333333]

In [16]:
data_list = []
for data_path in data_paths:
    data_list.append(np.load(data_path))

mean_scores_list = []
targets_list = []
for data in data_list:
    mean_scores_list.append(data["mean_scores"])
    targets_list.append(data["targets"])

mean_data_list = []

for mean_scores, targets in zip(mean_scores_list, targets_list):
    mean_data_list.append(
        pd.DataFrame(
            {
                "y0": mean_scores[ :, 0],
                "y1": mean_scores[ :, 1],
                "class": targets,
            }
        )
    )

## Logistic regression

In [17]:
from sklearn.linear_model import LogisticRegression

X_list = []
y_list = []

for mean_data in mean_data_list:
    X_list.append(mean_data.to_numpy()[ :, 0:2])
    y_list.append(mean_data.to_numpy()[ :, 2].astype(int))

coef_list = []
intercept_list = []
for X, y in zip(X_list, y_list):
    clf = LogisticRegression(random_state=0).fit(X, y)
    coef_list.append(clf.coef_)
    intercept_list.append(clf.intercept_)

In [21]:
for i in range(len(data_list)):
    # Fold i, Accuracy = accuracies[i]
    X_plot = np.linspace(2, 7, 100)
    Y_plot = -coef_list[i][0, 0]/coef_list[i][0, 1]*X_plot - intercept_list[i][0]/coef_list[i][0, 1]

    sns.scatterplot(data=mean_data_list[i], x="y0", y="y1", hue="class", alpha=0.7)

    plt.plot(X_plot, Y_plot, color='r')
    plt.title(f"{ds}, Fold {i+1}, Accuracy = {accuracies[i]:.3f}, AUC = {AUCs[i]:.3f}")
    plt.savefig(f"{ds}-{i}.png", dpi=150)

    plt.clf()

<Figure size 576x576 with 0 Axes>

## Roc auc score

In [111]:
from sklearn.metrics import roc_auc_score
import torch

# print(mean_data_list[1])
logits = torch.tensor(mean_data_list[1].loc[:, ["y0", "y1"]].to_numpy())
targets = torch.tensor(mean_data_list[1].loc[:, "class"].to_numpy())
# logits = mean_data_list[1][:, 2]

print(logits.shape)
print(targets.shape)
scores = torch.softmax(logits, dim=-1)
print(scores.shape)
# print(scores[:, 1])

auc_scores = roc_auc_score(targets, scores[:, 1])

print(auc_scores)

torch.Size([165, 2])
torch.Size([165])
torch.Size([165, 2])
0.7151648351648352


In [107]:
X = mean_data_list[1].to_numpy()[ :, 0:2]
y = mean_data_list[1].to_numpy()[ :, 2].astype(int)

clf = LogisticRegression(random_state=0).fit(X, y)
print(clf.coef_)
print(clf.intercept_)

LR_accuracy = clf.score(X, y)
print("LR accuracy = ", LR_accuracy)
LR_scores_proba = torch.tensor(clf.predict_proba(X))
y = torch.tensor(y)

# average{‘micro’, ‘macro’, ‘samples’, ‘weighted’}

# print(LR_scores)
LR_auc_scores = roc_auc_score(y, LR_scores_proba[:, 1], average='samples')
print("LR AUC = ", LR_auc_scores)

[[-0.39112423  0.77873981]]
[-1.56985064]
LR accuracy =  0.793939393939394
LR AUC =  0.7175824175824176


## Balanced/unbalanced

In [39]:
zerosss = 0
onesss = 0
for target in targets:
    if target == 0:
        zerosss +=1
    else:
        onesss +=1

print("Zeros: ", zerosss, "; ones: ", onesss)

Zeros:  130 ; ones:  35


## MISC

In [None]:
data = np.load(data_path)
print("data content: ", data.files)

raw_scores=data["raw_scores"]
mean_scores=data["mean_scores"]
targets=data["targets"]
print("raw_scores.shape: ", raw_scores.shape)
print("mean_scores.shape: ", mean_scores.shape)
print("targets.shape: ", targets.shape)

data = []
for i in range(raw_scores.shape[0]):
    data.append(
        pd.DataFrame(
            {
                "y0": np.append(raw_scores[i, :, 0], mean_scores[i, 0]),
                "y1": np.append(raw_scores[i, :, 1], mean_scores[i, 1]),
                "class": [str(targets[i])] * raw_scores.shape[1] + [str(targets[i]) + " mean"],
            }
        )
    )

mean_data = pd.DataFrame(
    {
        "y0": mean_scores[ :, 0],
        "y1": mean_scores[ :, 1],
        "class": targets,
    }
)


In [None]:
palette = {"0":"mistyrose",
           "0 mean":"red", 
           "1":"lightsteelblue",
           "1 mean":"darkblue",
           }

g = sns.FacetGrid(final_data, col="subject", col_wrap=4, height=2, hue="class", palette=palette, legend_out=True)
g.map(sns.scatterplot, "y0", "y1")
g.add_legend()
g.refline(y=0.5, x=0.5)