In [1]:
import pandas as pd
import tensorflow as tf

from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
model_results = {}

In [2]:
df = pd.read_csv(Path('..', 'data', 'alzheimers_disease_data.csv'))

In [3]:
# Drop unnecessary columns:
# PatientID:        unique identifier for each record
# DoctorInCharge:   confidential data
df.drop(columns=['PatientID', 'DoctorInCharge'], inplace=True)

# Model 1

In [4]:
# Split preprocessed data into features and target arrays
X = df.drop(columns='Diagnosis')
y = df['Diagnosis']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Determine the number of input neurons
print(f'Input neurons: {len(X_train_scaled[0])}')

Input neurons: 32


In [5]:
# Define the model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Input(shape=(32,)))
nn.add(tf.keras.layers.Dense(units=2, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

In [6]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

results = []
epochs_trained = 0

for i in range(20):
    # Train the model
    trained_model = nn.fit(
        X_train_scaled, 
        y_train, 
        epochs=5
    )

    epochs_trained += len(trained_model.epoch)

    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
    results.append({
        'Epochs Trained' : epochs_trained,
        'Accuracy' : model_accuracy,
        'Loss' : model_loss
    })

results_df = pd.DataFrame(results)
results_df.set_index('Epochs Trained', inplace=True)
results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
model_results['Model 1'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5469 - loss: 0.8031
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5745 - loss: 0.7282
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6369 - loss: 0.6641
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6740 - loss: 0.6083
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7261 - loss: 0.5641
17/17 - 0s - 15ms/step - accuracy: 0.6784 - loss: 0.6054
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7355 - loss: 0.5481
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7538 - loss: 0.5112
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7872 - loss: 0.4918
Epoch 4

# Model 2

In [7]:
# Most useful colums from drop_test
df2 = df[['FunctionalAssessment', 'MemoryComplaints', 'ADL', 'Diagnosis']]

In [8]:
# Split preprocessed data into features and target arrays
X = df2.drop(columns='Diagnosis')
y = df2['Diagnosis']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Determine the number of input neurons
print(f'Input neurons: {len(X_train_scaled[0])}')

Input neurons: 3


In [9]:
# Define the model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Input(shape=(3,)))
nn.add(tf.keras.layers.Dense(units=5, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

In [10]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

results = []
epochs_trained = 0

for i in range(40):
    # Train the model
    trained_model = nn.fit(
        X_train_scaled, 
        y_train, 
        epochs=5
    )

    epochs_trained += len(trained_model.epoch)

    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
    results.append({
        'Epochs Trained' : epochs_trained,
        'Accuracy' : model_accuracy,
        'Loss' : model_loss
    })

results_df = pd.DataFrame(results)
results_df.set_index('Epochs Trained', inplace=True)
results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
model_results['Model 2'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6192 - loss: 0.6728
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7912 - loss: 0.6084
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7942 - loss: 0.5662
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8158 - loss: 0.5196
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8121 - loss: 0.5044
17/17 - 0s - 12ms/step - accuracy: 0.8290 - loss: 0.4828
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8029 - loss: 0.4937
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8294 - loss: 0.4712
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8038 - loss: 0.4766
Epoch 4

# Model 3

In [11]:
# Drop the most useful colums from drop_test
df3 = df.drop(columns=['FunctionalAssessment', 'MemoryComplaints', 'ADL'])

In [12]:
# Split preprocessed data into features and target arrays
X = df3.drop(columns='Diagnosis')
y = df3['Diagnosis']

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Scale the data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Determine the number of input neurons
print(f'Input neurons: {len(X_train_scaled[0])}')

Input neurons: 29


In [21]:
# Define the model
nn = tf.keras.models.Sequential()
nn.add(tf.keras.layers.Input(shape=(29,)))
nn.add(tf.keras.layers.Dense(units=50, activation='tanh'))
nn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

# Check the structure of the model
nn.summary()

In [22]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

results = []
epochs_trained = 0

for i in range(40):
    # Train the model
    trained_model = nn.fit(
        X_train_scaled, 
        y_train, 
        epochs=5
    )

    epochs_trained += len(trained_model.epoch)

    # Evaluate the model using the test data
    model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)
    results.append({
        'Epochs Trained' : epochs_trained,
        'Accuracy' : model_accuracy,
        'Loss' : model_loss
    })

results_df = pd.DataFrame(results)
results_df.set_index('Epochs Trained', inplace=True)
results_df['Acurracy - Loss'] = results_df['Accuracy'] - results_df['Loss']
results_df.sort_values('Acurracy - Loss', ascending=False, inplace=True)
model_results['Model 3'] = results_df

Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5072 - loss: 0.7470
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6509 - loss: 0.6321
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6820 - loss: 0.6070
Epoch 4/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6935 - loss: 0.5936
Epoch 5/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6947 - loss: 0.5881
17/17 - 0s - 11ms/step - accuracy: 0.7026 - loss: 0.5879
Epoch 1/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6948 - loss: 0.5878
Epoch 2/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6841 - loss: 0.5967
Epoch 3/5
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7067 - loss: 0.5802
Epoch 4

# Compare Model Results

In [23]:
def print_model_results(model_results):
    print('Maximum accuracy: {} at epoch {}'.format(round(model_results['Accuracy'].max(), 4), model_results['Accuracy'].idxmax()))
    print('Minimum loss:     {} at epoch {}'.format(round(model_results['Loss'].min(), 4), model_results['Loss'].idxmin()))

In [24]:
for name, results in model_results.items():
    print(name)
    print_model_results(results)
    print()

Model 1
Maximum accuracy: 0.8253 at epoch 40
Minimum loss:     0.4299 at epoch 25

Model 2
Maximum accuracy: 0.8439 at epoch 190
Minimum loss:     0.4058 at epoch 190

Model 3
Maximum accuracy: 0.7045 at epoch 10
Minimum loss:     0.5868 at epoch 10

