In [1]:
import pandas as pd
import tensorflow as tf

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
model_results = {}

In [2]:
df = pd.read_csv(Path('..', 'data', 'alzheimers_disease_data.csv'))

# Drop unnecessary columns:
# PatientID:        unique identifier for each record
# DoctorInCharge:   confidential data
df.drop(columns=['PatientID', 'DoctorInCharge'], inplace=True)

# Collect remaining features
features = df.columns[:-1]

In [3]:
for f in features:
    drop_df = df.drop(columns=[f])

    # Split preprocessed data into features and target arrays
    X = drop_df.drop(columns='Diagnosis')
    y = drop_df['Diagnosis']

    # Split the preprocessed data into a training and testing dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Scale the data
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    # Define the model
    nn = tf.keras.models.Sequential()
    nn.add(tf.keras.layers.Input(shape=(31,)))
    nn.add(tf.keras.layers.Dense(units=2, activation='tanh'))
    nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    # Compile the model
    nn.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'precision', 'recall'])

    results = []
    epochs_trained = 0

    for i in range(40):
        # Train the model
        trained_model = nn.fit(
            X_train_scaled, 
            y_train, 
            epochs=5,
            verbose=0
        )

        epochs_trained += len(trained_model.epoch)

        # Evaluate the model using the test data
        metrics = nn.evaluate(X_test_scaled, y_test, verbose=0)
        results.append({
            'Epochs Trained' : epochs_trained,
            'Loss' : metrics[0],
            'Accuracy' : metrics[1],
            'Precision' : metrics[2],
            'Recall' : metrics[3]
        })

    results_df = pd.DataFrame(results)
    results_df.set_index('Epochs Trained', inplace=True)
    model_results[f'Eliminate {f}'] = results_df

In [8]:
results_summary = []

for name, results in model_results.items():
    results_summary.append({
        'Model' : name,
        'Loss' : results['Loss'].min(),
        'Accuracy' : results['Accuracy'].max(),
        'Precision' : results['Precision'].max(),
        'Recall' : results['Recall'].max()
    })

summary_df = pd.DataFrame(results_summary)
summary_df.set_index('Model', inplace=True)

In [16]:
summary_df.sort_values('Accuracy')

Unnamed: 0_level_0,Loss,Accuracy,Precision,Recall
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Eliminate ADL,0.543745,0.732342,0.725664,0.569307
Eliminate FunctionalAssessment,0.486673,0.754647,0.695364,0.590674
Eliminate MemoryComplaints,0.4467,0.801115,0.765101,0.631016
Eliminate DiastolicBP,0.429963,0.802974,0.767196,0.719807
Eliminate BehavioralProblems,0.4391,0.806691,0.757225,0.689119
Eliminate Age,0.402668,0.819703,0.74269,0.751381
Eliminate Hypertension,0.392699,0.825279,0.807229,0.722222
Eliminate CholesterolTotal,0.397659,0.828996,0.808219,0.737374
Eliminate MMSE,0.409944,0.834572,0.740741,0.777778
Eliminate DietQuality,0.37651,0.840149,0.751515,0.752941
