<div width="100%">
    <img width="100%" src="https://storage.googleapis.com/kaggle-datasets-images/1242233/2072411/724eb55eb9ef7e89e2b6a9308acb30cf/dataset-cover.jpg" />
</div>

In [None]:
import numpy as np
import pandas as pd

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical

from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.utils import to_categorical

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

<h1 id="dataset" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Dataset
        <a class="anchor-link" href="#dataset" target="_self">¶</a>
    </center>
</h1>

# Feature engineering

## Load data

In [None]:
path = '../input/alzheimer-features/alzheimer.csv'
df = pd.read_csv(path)

df['MMSE'] = df['MMSE'].fillna(0)
df['SES'] = df['SES'].fillna(0)

# pick only demented or nondemented
df = df[df['Group'].isin(['Demented','Nondemented'])]

df.head()

## Categorical encoding

In [None]:
group_to_idx = {'Nondemented': 0, 'Demented': 1}
idx_to_group = {group_to_idx[k]:k for k in group_to_idx.keys()}

sex_to_idx = {'M': 0, 'F': 1}
idx_to_sex = {sex_to_idx[k]:k for k in sex_to_idx.keys()}

df.replace(group_to_idx, inplace=True)
df.replace(sex_to_idx, inplace=True)
df = df.reset_index()

## Describe features

In [None]:
df.describe().T

## Standard Scaler

In [None]:
scaler = StandardScaler()
df[['Age', 'EDUC', 'SES', 'MMSE', 'eTIV']] = scaler.fit_transform(df[['Age', 'EDUC', 'SES', 'MMSE', 'eTIV']])

## Correlation matrix

In [None]:
plt.figure(figsize=(14,8))

corr = df.corr()

sns.heatmap(corr, 
        cmap="Blues", annot=True,
        xticklabels=corr.columns,
        yticklabels=corr.columns)

## PCA dimension reduction

In [None]:
pca = PCA(n_components=2)
X_tr = pca.fit_transform(df[['CDR','SES', 'ASF']])

In [None]:
group_one = df[df['Group'] == 1].index
group_zero = df[df['Group'] == 0].index

plt.figure(figsize=(14,8))
blue, _ = plt.plot(X_tr[group_one], 'bo', label='Nondemented')
red, _ = plt.plot(X_tr[group_zero], 'ro', label='Demented')
plt.legend(handles=[red, blue])
plt.show()

In [None]:
# one hot encoding
y = to_categorical(df['Group'].values)

# train/test data split
X_train, X_test, y_train, y_test = train_test_split(
                                    X_tr, y, test_size=0.5, random_state=42)

<h1 id="base" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Auto-Encoder
        <a class="anchor-link" href="#base" target="_self">¶</a>
    </center>
</h1>

In [None]:
def base_autoencoder(X_train, X_test):
    model = Sequential()
    model.add(Dense(10, input_dim=2, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(2, activation='linear'))
    
    model.compile(loss='mse', optimizer=SGD(lr=0.01, momentum=0.9))
    model.fit(X_train, X_test, epochs=100, verbose=0)
    
    train_mse = model.evaluate(X_train, X_train, verbose=0)
    test_mse = model.evaluate(X_test, X_test, verbose=0)
    print('> reconstruction error train=%.3f, test=%.3f' % (train_mse, test_mse))
    return model

<h1 id="evaluate" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Evaluate
        <a class="anchor-link" href="#evaluate" target="_self">¶</a>
    </center>
</h1>

In [None]:
def evaluate(model, X_train, y_train, X_test, y_test):
    # remember the current output layer
    output_layer = model.layers[-1]
    
    # remove the output layer
    model.pop()
    # mark all remaining layers as non-trainable
    for layer in model.layers:
        layer.trainable = False
        
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.01, momentum=0.9), metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=100, verbose=0)
    
    # evaluate model
    _, train_acc = model.evaluate(X_train, y_train, verbose=0)
    _, test_acc = model.evaluate(X_test, y_test, verbose=0)
    
    # put the model back together
    model.pop()
    model.add(output_layer)
    model.compile(loss='mse', optimizer=SGD(lr=0.01, momentum=0.9))
    return train_acc, test_acc

In [None]:
scores = dict()

# create base encoder
model = base_autoencoder(X_train, X_test)

# evaluate
train_acc, test_acc = evaluate(model, X_train, y_train, X_test, y_test)
print('> lassifier accuracy layers=%d, train=%.3f, test=%.3f' % (len(model.layers), train_acc, test_acc))
scores[len(model.layers)] = (train_acc, test_acc)

<h1 id="layer" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Add-Layer
        <a class="anchor-link" href="#layer" target="_self">¶</a>
    </center>
</h1>

In [None]:
def add_layer(model, X_train, X_test):
    # remember the current output layer
    output_layer = model.layers[-1]
    
    # remove the output layer
    model.pop()
    # mark all remaining layers as non-trainable
    for layer in model.layers:
        layer.trainable = False
        
    # add a new hidden layer
    model.add(Dense(10, activation='relu', kernel_initializer='he_uniform'))
    model.add(output_layer)
    
    # fit model
    model.fit(X_train, X_test, epochs=100, verbose=0)
    
    # evaluate reconstruction loss
    train_mse = model.evaluate(X_train, X_train, verbose=0)
    test_mse = model.evaluate(X_test, X_test, verbose=0)
    
    print('> reconstruction error train=%.3f, test=%.3f' % (train_mse, test_mse))

In [None]:
n_layers = 5

for _ in range(n_layers):
    # add layer
    add_layer(model, X_train, X_test)
    # evaluate model
    train_acc, test_acc = evaluate(model, X_train, y_train, X_test, y_test)
    print('> classifier accuracy layers=%d, train=%.3f, test=%.3f' % (len(model.layers), train_acc, test_acc))
    scores[len(model.layers)] = (train_acc, test_acc)

<h1 id="evaluate" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Evaluate
        <a class="anchor-link" href="#evaluate" target="_self">¶</a>
    </center>
</h1>

In [None]:
keys = list(scores.keys())
plt.figure(figsize=(14,8))
plt.plot(keys, [scores[k][0] for k in keys], label='train', marker='.')
plt.plot(keys, [scores[k][1] for k in keys], label='test', marker='.')
plt.legend()
plt.show()

<h1 id="reference" style="color:#01499b; background:white; border:0.5px dotted #01499b;"> 
    <center>Reference
        <a class="anchor-link" href="#reference" target="_self">¶</a>
    </center>
</h1>

Machine Learning Mastery - [Greedy layer wise tutorial](https://machinelearningmastery.com/greedy-layer-wise-pretraining-tutorial/)