In [1]:
import pandas as pd
import tensorflow as tf

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
model_results = {}

In [2]:
df1 = pd.read_csv(Path('..', 'data', 'alzheimers_disease_data.csv'))

In [3]:
# Drop unnecessary columns:
# PatientID:        unique identifier for each record
# DoctorInCharge:   confidential data
df1.drop(columns=['PatientID', 'DoctorInCharge'], inplace=True)

# Model 1

In [4]:
# Split preprocessed data into features and target arrays
X = df1.drop(columns='Diagnosis')
y = df1['Diagnosis']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Determine the number of input neurons
print(f'Input neurons: {len(X_train_scaled[0])}')

Input neurons: 32


In [5]:
# Define the model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Input(shape=(32,)))
nn.add(tf.keras.layers.Dense(units=2, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

In [6]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

results = []
epochs_trained = 0

for i in range(30):
    # Train the model
    trained_model = nn.fit(
        X_train_scaled, 
        y_train, 
        epochs=5
    )

    epochs_trained += len(trained_model.epoch)

    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
    results.append({
        'Epochs Trained' : epochs_trained,
        'Accuracy' : model_accuracy,
        'Loss' : model_loss
    })

results_df = pd.DataFrame(results)
results_df.set_index('Epochs Trained', inplace=True)
results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
model_results['Model 1'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4545 - loss: 0.7024
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5372 - loss: 0.6903
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6379 - loss: 0.6809
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6202 - loss: 0.6772
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6449 - loss: 0.6672
17/17 - 0s - 10ms/step - accuracy: 0.6487 - loss: 0.6624
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6420 - loss: 0.6624
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6688 - loss: 0.6466
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6577 - loss: 0.6411
Epoch 4

In [7]:
results_df

Unnamed: 0_level_0,Accuracy,Loss,Acurracy - Loss
Epochs Trained,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
55,0.849442,0.361789,0.487654
50,0.847584,0.36139,0.486194
45,0.847584,0.361839,0.485745
60,0.843866,0.361779,0.482087
85,0.843866,0.364676,0.47919
35,0.843866,0.364986,0.47888
80,0.842007,0.363492,0.478516
65,0.840149,0.362458,0.477691
40,0.840149,0.362914,0.477234
75,0.840149,0.363223,0.476925


# Model 2

In [8]:
features = df1.columns[:-1]

In [9]:
for f in features:
    df = df1.drop(columns=[f])

    # Split preprocessed data into features and target arrays
    X = df.drop(columns='Diagnosis')
    y = df['Diagnosis']

    # Split the preprocessed data into a training and testing dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y)

    # Scale the data
    scaler = StandardScaler()
    X_scaler = scaler.fit(X_train)
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)

    # Define the model
    nn = tf.keras.models.Sequential()
    nn.add(tf.keras.layers.Input(shape=(31,)))
    nn.add(tf.keras.layers.Dense(units=2, activation='tanh'))
    nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

    # Compile the model
    nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

    results = []
    epochs_trained = 0

    for i in range(30):
        # Train the model
        trained_model = nn.fit(
            X_train_scaled, 
            y_train, 
            epochs=5
        )

        epochs_trained += len(trained_model.epoch)

        # Evaluate the model using the test data
        model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
        results.append({
            'Epochs Trained' : epochs_trained,
            'Accuracy' : model_accuracy,
            'Loss' : model_loss
        })

    results_df = pd.DataFrame(results)
    results_df.set_index('Epochs Trained', inplace=True)
    results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
    results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
    model_results[f'Drop {f}'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5272 - loss: 0.7374
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5633 - loss: 0.6975
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.5872 - loss: 0.6826
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6018 - loss: 0.6659
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6282 - loss: 0.6383
17/17 - 0s - 12ms/step - accuracy: 0.6338 - loss: 0.6295
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6510 - loss: 0.6174
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6772 - loss: 0.5952
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7009 - loss: 0.5776
Epoc

# Compare Model Results

In [10]:
def print_model_results(model_results):
    print('Maximum accuracy: {} at epoch {}'.format(round(model_results['Accuracy'].max(), 4), model_results['Accuracy'].idxmax()))
    print('Minimum loss:     {} at epoch {}'.format(round(model_results['Loss'].min(), 4), model_results['Loss'].idxmin()))

In [11]:
for name, results in model_results.items():
    print(name)
    print_model_results(results)
    print()

Model 1
Maximum accuracy: 0.8513 at epoch 20
Minimum loss:     0.3614 at epoch 50

Drop Age
Maximum accuracy: 0.8587 at epoch 20
Minimum loss:     0.3366 at epoch 75

Drop Gender
Maximum accuracy: 0.855 at epoch 30
Minimum loss:     0.3708 at epoch 40

Drop Ethnicity
Maximum accuracy: 0.8625 at epoch 125
Minimum loss:     0.3352 at epoch 130

Drop EducationLevel
Maximum accuracy: 0.8513 at epoch 20
Minimum loss:     0.3694 at epoch 35

Drop BMI
Maximum accuracy: 0.8513 at epoch 100
Minimum loss:     0.4 at epoch 30

Drop Smoking
Maximum accuracy: 0.8513 at epoch 115
Minimum loss:     0.3655 at epoch 150

Drop AlcoholConsumption
Maximum accuracy: 0.8123 at epoch 85
Minimum loss:     0.4103 at epoch 25

Drop PhysicalActivity
Maximum accuracy: 0.8625 at epoch 35
Minimum loss:     0.3291 at epoch 55

Drop DietQuality
Maximum accuracy: 0.8383 at epoch 20
Minimum loss:     0.385 at epoch 45

Drop SleepQuality
Maximum accuracy: 0.8606 at epoch 145
Minimum loss:     0.3618 at epoch 135

Drop F