In [127]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")

In [2]:
df = pd.read_csv("data/data_ger/Metadata.csv", index_col=1).drop("Unnamed: 0", axis=1)
df["cogdx_desc"] = df["cogdx"].map({1: "No CI", 2: "Mild CI", 3: "Mild CI +", 4: "AD", 5: "AD +", 6: "Other"})

In [15]:
wang_AD = np.bitwise_and(np.bitwise_and(df["ceradsc"] <= 2, df["braaksc"] >= 4), df["cogdx"] == 4)
wang_CT = np.bitwise_and(np.bitwise_and(df["ceradsc"] >= 3, df["braaksc"] <= 3), df["cogdx"] == 1)
df["wang"] = "Other"
df.loc[wang_AD, "wang"] = "AD"
df.loc[wang_CT, "wang"] = "CT"
df["wang"].value_counts()

wang
Other    253
AD       102
CT        72
Name: count, dtype: int64

In [94]:
from sklearn.metrics import confusion_matrix

# Prepare data
df_wang = df[df["wang"] != "Other"]
X = df_wang["msex"].values.reshape(-1, 1)
y = df_wang["wang"].map({"AD": 1, "CT": 0}).values
print(f"X: {X.shape}, y: {y.shape}")

cm = confusion_matrix(X, y)
cm = pd.DataFrame(cm, index=["F", "M"], columns=["CT", "AD"])
print(cm)

X: (174, 1), y: (174,)
   CT  AD
F  33  60
M  39  42


In [125]:
pd.Series(y).value_counts(normalize=True)

1    0.586207
0    0.413793
Name: proportion, dtype: float64

In [122]:
# Train some classifiers to predict the Wang labels from msex column
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, LeaveOneOut

# clf = LogisticRegression()
clf = LogisticRegression()
scores = cross_val_score(clf, X, y, cv=LeaveOneOut())
print(scores)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

[1. 1. 1. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 0. 1. 1. 0.
 1. 1. 0. 1. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1.
 0. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0.
 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1.
 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0.
 0. 1. 0. 0. 0. 1. 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1.
 0. 1. 0. 0. 0. 1.]
Accuracy: 0.59 (+/- 0.99)
