# Stacked Denoising AutoEncoder

1. Input Layer : Energy, Contrast, Entropy, Homogeneity, SumAverage, Dissimilarity, AutoCorrelation, Skewness, Kurtosis, Average HU Value
2. Feature 1 : 7
3. Feature 2 : 3
4. Softmax : 2
5. Learning rate : .1 ~ .01
6. Noise factor : 10% on input vector
7. Activation function : sigmoid
8. Cost function : negative log-likelihood

## Three models
1. Feature Extraction : Input -> HL1 -> HL2 -> Decoder -> Weight update
2. Predict Features : Input -> HL1 -> HL2 -> Output
3. Classification : Input of 3 -> HL1 -> HL2 -> Classifier -> Output

In [2]:
import numpy as np
import pandas as pd
from keras.layers import Input, Dense
from keras.models import Model
from keras import optimizers, regularizers
from keras.callbacks import TensorBoard
from keras.utils import to_categorical

# Function for cross validation
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_validate

Using TensorFlow backend.


In [19]:
# Preparing dataset - total 27039 images : 14294 nodules + 12745 non-nodules
df = pd.read_csv('./Datas/nodules_all_190625.csv', index_col=0)
# select seed to randomize - [0, 3136, 8405, 4242, 5293]
np.random.seed(0)
print(df.shape, '\nDataset loading complete')

(26910, 11) 
Dataset loading complete


In [43]:
# Slice the inputs and labels
# iloc : use integer index for slice
# loc : use label for slice
cols = df.shape[1] - 1
x_train = df.iloc[:, :cols] 
y_train = df.iloc[:, cols]
y_labels_train = to_categorical(y_train)

# add noise to the input data
noise_factor = 0.1
x_train_noisy = x_train * (1 + noise_factor *\
            np.random.binomial(n=1, p=0.1, size=x_train.shape) )
x_train_noisy = np.clip(x_train_noisy, 0., 1.)

In [49]:
## Build Autoencoder
# Input placeholder
input_data = Input(shape=(cols,))

# Hidden Layer 1
hidden_1 = Dense(8, activation='tanh', kernel_initializer='he_normal',
                    )(input_data)

# Hidden Layer 2
hidden_2 = Dense(6, activation='tanh', kernel_initializer='he_normal',
                    )(hidden_1)

# Decoded Layer 1
decoded_1 = Dense(8, activation='sigmoid', kernel_initializer='he_normal')(hidden_2)

# Decoded Layer 2 (reconstructed data)
decoded_2 = Dense(cols, activation='sigmoid', kernel_initializer='he_normal')(decoded_1)

In [50]:
# this model maps an input to its reconstruction
from keras import optimizers

autoencoder_recon = Model(input_data, decoded_2)
ada = optimizers.Adadelta(decay=0)
sgd = optimizers.SGD(lr=0.01)
autoencoder_recon.compile(optimizer=ada, loss='binary_crossentropy')
autoencoder_recon.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 10)                0         
_________________________________________________________________
dense_64 (Dense)             (None, 8)                 88        
_________________________________________________________________
dense_65 (Dense)             (None, 6)                 54        
_________________________________________________________________
dense_66 (Dense)             (None, 8)                 56        
_________________________________________________________________
dense_67 (Dense)             (None, 10)                90        
Total params: 288
Trainable params: 288
Non-trainable params: 0
_________________________________________________________________


In [51]:
autoencoder_recon.fit(x_train_noisy, x_train,
                      epochs=100, batch_size=256,
                      validation_split=0.05,
                      verbose=1,
                      shuffle=True,
                      # callbacks=[TensorBoard(log_dir='./logs/autoencoder_190601-5')]
                     )

Train on 25564 samples, validate on 1346 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/10

Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7fa01c968f28>

In [52]:
from keras import Sequential

# Encoder
autoencoder_encoder = Model(input_data, hidden_2)

def create_model(): 
    # create model 
    model = Sequential() 
    model.add(Dense(2, activation='softmax')) 
    # Compile model 
    model.compile(optimizer='adadelta', loss='categorical_crossentropy',
                            metrics=['accuracy'])
    return model

x_feature = autoencoder_encoder.predict(x_train)

# Cross Validation

model = KerasClassifier(build_fn=create_model, epochs=50, batch_size=256)
kfold = KFold(n_splits=5, shuffle=True, random_state=0)
result = cross_validate(model, x_feature, y_train, verbose=0,
                        scoring=['accuracy','precision','recall','f1','roc_auc'], 
                        cv=kfold)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50


Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50


Epoch 48/50
Epoch 49/50
Epoch 50/50


## Evaluation Metrics

### 3. Sensitivity(True positive rate, TPR) or Recall
$$ TPR=\frac{TP}{TP+FN} $$

### 4. Precision(Positive predictive value, PPV)
$$ PPV=\frac{TP}{TP+FP} $$

In [53]:
print(result['test_accuracy'], '\n Accuracy Average : ', result['test_accuracy'].mean())
print(result['test_roc_auc'], '\n ROC Average : ', result['test_roc_auc'].mean())
print(result['test_recall'], '\n Recall Average : ', result['test_recall'].mean())
print(result['test_precision'], '\n Precision Average : ', result['test_precision'].mean())
print(result['test_f1'], '\n F-score Average : ', result['test_f1'].mean())

[0.73708658 0.72686734 0.74043107 0.73968785 0.74080268] 
 Accuracy Average :  0.7369751021924935
[0.8082379  0.79395119 0.81005568 0.80089125 0.80815232] 
 ROC Average :  0.804257668348707
[0.78739054 0.7813921  0.80264072 0.79168123 0.7819469 ] 
 Recall Average :  0.7890102987526728
[0.73560209 0.72556025 0.73590315 0.73778502 0.7392905 ] 
 Precision Average :  0.7348282013092242
[0.7606158  0.7524419  0.7678245  0.76378351 0.76002064] 
 F-score Average :  0.7609372703381856


In [32]:
decoded = autoencoder_recon.predict(x_train)
print(decoded)

[[0.03539762 0.03038412 0.13725859 ... 0.2112279  0.08401334 0.5017235 ]
 [0.03661236 0.03129858 0.1386151  ... 0.21433076 0.08619881 0.53268117]
 [0.04142064 0.0347321  0.14365745 ... 0.225766   0.09392393 0.61848646]
 ...
 [0.02695176 0.02376816 0.12559134 ... 0.18867755 0.06719133 0.22075084]
 [0.02653459 0.02353242 0.12515193 ... 0.18733719 0.06687602 0.22435096]
 [0.02631643 0.02332839 0.12464425 ... 0.18687914 0.06629669 0.2146121 ]]


In [None]:
# 1. Accuracy
print(result, '\n Average Accuracy : ', np.average(result))

In [None]:
# 2. Area under the ROC-curve (AUC)
predicted = autoencoder_classify.predict(x_train)
prediction_result = np.argmax(predicted, axis=1)

from sklearn.metrics import roc_curve, auc
fpr_keras, tpr_keras, thresholds_keras = roc_curve(y_train.values, predicted[:, 1])

import matplotlib.pyplot as plt
plt.plot(fpr_keras, tpr_keras, label='Keras (area = {:.3f})'.format(auc_keras))
plt.plot([0, 1], [0, 1], 'k--')
plt.legend(loc='best')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
TN, FP, FN, TP = confusion_matrix(y_train.values, prediction_result).ravel()

In [None]:
sensitivity = TP/(TP+FN)
print('Sensitivity : ', sensitivity)

In [None]:
precision = TP/(TP+FP)
print('Precision : ', precision)

### 5.F-score 
$$ \text{F-score} = \frac{(PPV \times TPR)}{PPV+TPR} $$

In [None]:
f_score = (precision * sensitivity) / (precision + sensitivity)
print('F-score : ', f_score)

In [None]:
print(autoencoder_classify.layers)

In [12]:
autoencoder_recon.layers[1].get_weights()

[array([[-0.768768  ,  0.88006055,  0.21781045, -0.0924427 , -0.00164266,
          0.14979221],
        [-0.5339661 ,  0.1496545 , -0.24011269, -0.32375374, -0.11565746,
         -0.418027  ],
        [-0.39518514, -0.31077665, -0.3810753 , -0.23387174, -0.5065482 ,
         -0.08469566],
        [-0.83951217, -0.34855592,  0.8481752 ,  0.13584559, -0.07653016,
         -0.35563365],
        [ 0.03268032, -0.10152446,  0.36609066,  0.48369616, -0.1670611 ,
          0.59071624],
        [-0.43042526,  0.06052483,  0.0949368 , -0.6796002 , -0.12490448,
         -0.3713372 ],
        [-0.04966786, -0.9637751 ,  0.12060661, -0.18130566,  0.28857023,
         -0.49279264],
        [-0.11784459, -0.94431806, -0.0221076 , -0.5024076 ,  0.08656775,
         -0.360977  ],
        [-0.13445432,  0.5711164 , -0.6203959 , -0.6823861 ,  0.07927413,
          0.21285404],
        [ 0.5014185 , -0.29338622, -0.78989136,  0.7650266 , -0.81578267,
          0.3174904 ]], dtype=float32),
 array([ 0.00

In [None]:
autoencoder_classify.layers[-1].get_weights()

In [None]:
import matplotlib.pyplot as plt
weight, bias = autoencoder_recon.layers[1].get_weights()

plt.imshow(weight, cmap=plt.cm.autumn)
plt.colorbar()
plt.title('Weights Colormap - 1')

In [None]:
weight_avg = np.average(weight, axis=1)
plt.plot(range(1,9), weight_avg)
plt.title('Weights Average - 1')
plt.xticks(range(1,9))

In [None]:
df_weight = pd.DataFrame(weight, 
                         columns=['Feature 1','Feature 2','Feature 3', 'Feature 4',
                                 'Feature 5','Feature 6', 'Feature 7'])

df_weight.to_csv("./Weights_sigmoid_1.csv")

In [None]:
decoded_all = autoencoder_recon.predict(x_train)
print(decoded_all)
print(x_train)