In [1]:
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from riskslim import RiskSLIMClassifier

### Alzheimer's EEG: Modeling

The features from the previous notebook are now passed into a sparse risk model. The variables found to be important in classification were timescales (label as spectral "knee_freq" below) and 1/f slope. The model produced an AUC=0.8.

In [2]:
# Load features
df = pd.read_csv("alzheimers_features.csv")
df = df.loc[(df["label"] == "C") | (df["label"] == "A")]

# X and y
X = df.iloc[:, 1:].to_numpy()

y = np.zeros(len(X), dtype=int)
y[df["label"] == 'A'] = 1 # alzheimer's
y[df["label"] == 'C'] = 0 # controls

variable_names = list(df.columns[1:])

# Scaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Impute missing values
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
X = imp.fit_transform(X)


# Drop offset, constant, and exponent parameters
#   for faster computation
keep = []

for i, v in enumerate(variable_names):
    if ("offset" not in v) and ("const" not in v) and ("exp" not in v):
        keep.append(i)
        
X = X[:, keep]
variable_names = np.array(variable_names)[keep].tolist()

### RiskSLIM

RiskSLIM is a sparse linear integer model with risk scores. This model provides improved interpretablity compared to standard logistic regression.

In [3]:
# RiskSLIM settings
settings = {
    "max_runtime": 30.0,
    "max_tolerance": np.finfo("float").eps,
    "loss_computation": "fast",
    "round_flag": True,
    "polish_flag": True,
    "chained_updates_flag": True,
    "add_cuts_at_heuristic_solutions": True,
    "initialization_flag": True,
    "init_max_runtime": 120.0,
    "init_max_coefficient_gap": 0.9,
    "cplex_randomseed": 0,
    "cplex_mipemphasis": 0,
}

# Initialize model
rs = RiskSLIMClassifier(
    max_size=5, 
    max_coef=6,
    max_abs_offset=10,
    variable_names=variable_names,
    outcome_name="alzheimers",
    verbose=False,
    settings=settings
)

# Fit
rs.fit(X, y)

rs

In [71]:
rs.create_report("report.html")

PosixPath('report.html')

In [12]:
from IPython.display import IFrame

IFrame(src="report.html", width=1200, height=1000)