In [1]:
import pandas as pd
import tensorflow as tf

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
model_results = {}

In [2]:
df = pd.read_csv(Path('..', 'data', 'alzheimers_disease_data.csv'))

# Drop unnecessary columns:
# PatientID:        unique identifier for each record
# DoctorInCharge:   confidential data
df.drop(columns=['PatientID', 'DoctorInCharge'], inplace=True)

# Collect remaining features
features = df.columns[:-1]

In [3]:
for f in features:
    drop_df = df.drop(columns=[f])

    # Split preprocessed data into features and target arrays
    X = drop_df.drop(columns='Diagnosis')
    y = drop_df['Diagnosis']

    # Split the preprocessed data into a training and testing dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Scale the data
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    # Define the model
    nn = tf.keras.models.Sequential()
    nn.add(tf.keras.layers.Input(shape=(31,)))
    nn.add(tf.keras.layers.Dense(units=2, activation='tanh'))
    nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    # Compile the model
    nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

    results = []
    epochs_trained = 0

    for i in range(30):
        # Train the model
        trained_model = nn.fit(
            X_train_scaled, 
            y_train, 
            epochs=5
        )

        epochs_trained += len(trained_model.epoch)

        # Evaluate the model using the test data
        model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
        results.append({
            'Epochs Trained' : epochs_trained,
            'Accuracy' : model_accuracy,
            'Loss' : model_loss
        })

    results_df = pd.DataFrame(results)
    results_df.set_index('Epochs Trained', inplace=True)
    results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
    results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
    model_results[f'Drop {f}'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5654 - loss: 0.6781
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5890 - loss: 0.6680
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6318 - loss: 0.6515
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6458 - loss: 0.6429
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6938 - loss: 0.6093
17/17 - 0s - 11ms/step - accuracy: 0.7249 - loss: 0.6038
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7142 - loss: 0.6005
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7681 - loss: 0.5656
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7930 - loss: 0.5445
Epoch 4

In [4]:
def print_model_results(model_results):
    print('Maximum accuracy: {} at epoch {}'.format(round(model_results['Accuracy'].max(), 4), model_results['Accuracy'].idxmax()))
    print('Minimum loss:     {} at epoch {}'.format(round(model_results['Loss'].min(), 4), model_results['Loss'].idxmin()))

In [5]:
for name, results in model_results.items():
    print(name)
    print_model_results(results)
    print()

Drop Age
Maximum accuracy: 0.842 at epoch 110
Minimum loss:     0.401 at epoch 85

Drop Gender
Maximum accuracy: 0.8253 at epoch 125
Minimum loss:     0.4257 at epoch 25

Drop Ethnicity
Maximum accuracy: 0.8625 at epoch 125
Minimum loss:     0.3771 at epoch 45

Drop EducationLevel
Maximum accuracy: 0.8513 at epoch 50
Minimum loss:     0.3706 at epoch 45

Drop BMI
Maximum accuracy: 0.8494 at epoch 60
Minimum loss:     0.3735 at epoch 35

Drop Smoking
Maximum accuracy: 0.8457 at epoch 40
Minimum loss:     0.3824 at epoch 40

Drop AlcoholConsumption
Maximum accuracy: 0.8532 at epoch 145
Minimum loss:     0.3542 at epoch 150

Drop PhysicalActivity
Maximum accuracy: 0.842 at epoch 75
Minimum loss:     0.3838 at epoch 55

Drop DietQuality
Maximum accuracy: 0.8439 at epoch 85
Minimum loss:     0.3853 at epoch 80

Drop SleepQuality
Maximum accuracy: 0.8699 at epoch 105
Minimum loss:     0.3582 at epoch 85

Drop FamilyHistoryAlzheimers
Maximum accuracy: 0.8401 at epoch 145
Minimum loss:     0.3