In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import sklearn
import sklearn.datasets

### Prepare Data

In [2]:
# Read in the data and set up the input and output
diagnosis_df = pd.read_csv('data/diagnosis.csv')
X = diagnosis_df
y = diagnosis_df['Diagnosis'].map({'M': 1, 'B': 0})

In [3]:
# Split test, train, and demo
from sklearn.model_selection import train_test_split

X_use, demo_input, y_use, y_demo = train_test_split(X, y, random_state=2, test_size=0.02)
X_train, X_test, y_train, y_test = train_test_split(X_use, y_use, random_state=3)

In [4]:
# Save a copy of the demo input data as csv and as X_demo
demo_input.drop('Diagnosis', axis=1).to_csv('data/demo_input_30.csv', index=False)
X_demo = demo_input

In [5]:
# Drop irrelevant columns from X sets
for dataframe in [X_train, X_test, X_demo]:
    dataframe.drop(['Diagnosis', 'ID number'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [6]:
# Scale X
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
X_demo_scaled = X_scaler.transform(X_demo)

In [7]:
# Convert y to categorical
from keras.utils import to_categorical

y_train_categorical = to_categorical(y_train)
y_test_categorical = to_categorical(y_test)
y_demo_categorical = to_categorical(y_demo)

Using TensorFlow backend.


### Train Model

In [81]:
# Create deep neural network model
# Use 3 layers, 6 nodes each
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(units=6, activation='relu', input_dim=30))
model.add(Dense(units=6, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

# Fit model to training data
model.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=1000,
    shuffle=True,
    verbose=2
)

Epoch 1/1000
 - 1s - loss: 0.6278 - acc: 0.5492
Epoch 2/1000
 - 0s - loss: 0.5307 - acc: 0.7698
Epoch 3/1000
 - 0s - loss: 0.4642 - acc: 0.8489
Epoch 4/1000
 - 0s - loss: 0.4102 - acc: 0.8657
Epoch 5/1000
 - 0s - loss: 0.3669 - acc: 0.8729
Epoch 6/1000
 - 0s - loss: 0.3264 - acc: 0.8945
Epoch 7/1000
 - 0s - loss: 0.2925 - acc: 0.8945
Epoch 8/1000
 - 0s - loss: 0.2651 - acc: 0.8969
Epoch 9/1000
 - 0s - loss: 0.2420 - acc: 0.8993
Epoch 10/1000
 - 0s - loss: 0.2242 - acc: 0.9017
Epoch 11/1000
 - 0s - loss: 0.2086 - acc: 0.9089
Epoch 12/1000
 - 0s - loss: 0.1963 - acc: 0.9161
Epoch 13/1000
 - 0s - loss: 0.1855 - acc: 0.9209
Epoch 14/1000
 - 0s - loss: 0.1761 - acc: 0.9209
Epoch 15/1000
 - 0s - loss: 0.1677 - acc: 0.9257
Epoch 16/1000
 - 0s - loss: 0.1599 - acc: 0.9281
Epoch 17/1000
 - 0s - loss: 0.1526 - acc: 0.9353
Epoch 18/1000
 - 0s - loss: 0.1460 - acc: 0.9353
Epoch 19/1000
 - 0s - loss: 0.1400 - acc: 0.9376
Epoch 20/1000
 - 0s - loss: 0.1348 - acc: 0.9400
Epoch 21/1000
 - 0s - loss: 0

 - 0s - loss: 0.0300 - acc: 0.9904
Epoch 168/1000
 - 0s - loss: 0.0298 - acc: 0.9904
Epoch 169/1000
 - 0s - loss: 0.0297 - acc: 0.9904
Epoch 170/1000
 - 0s - loss: 0.0295 - acc: 0.9904
Epoch 171/1000
 - 0s - loss: 0.0293 - acc: 0.9904
Epoch 172/1000
 - 0s - loss: 0.0292 - acc: 0.9904
Epoch 173/1000
 - 0s - loss: 0.0289 - acc: 0.9904
Epoch 174/1000
 - 0s - loss: 0.0287 - acc: 0.9904
Epoch 175/1000
 - 0s - loss: 0.0286 - acc: 0.9904
Epoch 176/1000
 - 0s - loss: 0.0284 - acc: 0.9904
Epoch 177/1000
 - 0s - loss: 0.0282 - acc: 0.9904
Epoch 178/1000
 - 0s - loss: 0.0284 - acc: 0.9880
Epoch 179/1000
 - 0s - loss: 0.0288 - acc: 0.9880
Epoch 180/1000
 - 0s - loss: 0.0285 - acc: 0.9880
Epoch 181/1000
 - 0s - loss: 0.0280 - acc: 0.9880
Epoch 182/1000
 - 0s - loss: 0.0276 - acc: 0.9904
Epoch 183/1000
 - 0s - loss: 0.0272 - acc: 0.9904
Epoch 184/1000
 - 0s - loss: 0.0272 - acc: 0.9904
Epoch 185/1000
 - 0s - loss: 0.0270 - acc: 0.9904
Epoch 186/1000
 - 0s - loss: 0.0267 - acc: 0.9904
Epoch 187/1000


 - 0s - loss: 0.0118 - acc: 0.9952
Epoch 332/1000
 - 0s - loss: 0.0118 - acc: 0.9952
Epoch 333/1000
 - 0s - loss: 0.0117 - acc: 0.9952
Epoch 334/1000
 - 0s - loss: 0.0117 - acc: 0.9952
Epoch 335/1000
 - 0s - loss: 0.0117 - acc: 0.9952
Epoch 336/1000
 - 0s - loss: 0.0115 - acc: 0.9952
Epoch 337/1000
 - 0s - loss: 0.0115 - acc: 0.9952
Epoch 338/1000
 - 0s - loss: 0.0114 - acc: 0.9952
Epoch 339/1000
 - 0s - loss: 0.0114 - acc: 0.9952
Epoch 340/1000
 - 0s - loss: 0.0113 - acc: 0.9952
Epoch 341/1000
 - 0s - loss: 0.0113 - acc: 0.9952
Epoch 342/1000
 - 0s - loss: 0.0112 - acc: 0.9952
Epoch 343/1000
 - 0s - loss: 0.0112 - acc: 0.9952
Epoch 344/1000
 - 0s - loss: 0.0112 - acc: 0.9952
Epoch 345/1000
 - 0s - loss: 0.0111 - acc: 0.9952
Epoch 346/1000
 - 0s - loss: 0.0110 - acc: 0.9952
Epoch 347/1000
 - 0s - loss: 0.0110 - acc: 0.9952
Epoch 348/1000
 - 0s - loss: 0.0109 - acc: 0.9952
Epoch 349/1000
 - 0s - loss: 0.0109 - acc: 0.9952
Epoch 350/1000
 - 0s - loss: 0.0109 - acc: 0.9952
Epoch 351/1000


 - 0s - loss: 0.0048 - acc: 1.0000
Epoch 496/1000
 - 0s - loss: 0.0048 - acc: 1.0000
Epoch 497/1000
 - 0s - loss: 0.0048 - acc: 1.0000
Epoch 498/1000
 - 0s - loss: 0.0047 - acc: 1.0000
Epoch 499/1000
 - 0s - loss: 0.0047 - acc: 1.0000
Epoch 500/1000
 - 0s - loss: 0.0047 - acc: 1.0000
Epoch 501/1000
 - 0s - loss: 0.0047 - acc: 1.0000
Epoch 502/1000
 - 0s - loss: 0.0047 - acc: 1.0000
Epoch 503/1000
 - 0s - loss: 0.0046 - acc: 1.0000
Epoch 504/1000
 - 0s - loss: 0.0045 - acc: 1.0000
Epoch 505/1000
 - 0s - loss: 0.0045 - acc: 1.0000
Epoch 506/1000
 - 0s - loss: 0.0044 - acc: 1.0000
Epoch 507/1000
 - 0s - loss: 0.0044 - acc: 1.0000
Epoch 508/1000
 - 0s - loss: 0.0043 - acc: 1.0000
Epoch 509/1000
 - 0s - loss: 0.0043 - acc: 1.0000
Epoch 510/1000
 - 0s - loss: 0.0043 - acc: 1.0000
Epoch 511/1000
 - 0s - loss: 0.0043 - acc: 1.0000
Epoch 512/1000
 - 0s - loss: 0.0042 - acc: 1.0000
Epoch 513/1000
 - 0s - loss: 0.0042 - acc: 1.0000
Epoch 514/1000
 - 0s - loss: 0.0041 - acc: 1.0000
Epoch 515/1000


Epoch 655/1000
 - 0s - loss: 8.1571e-04 - acc: 1.0000
Epoch 656/1000
 - 0s - loss: 0.0010 - acc: 1.0000
Epoch 657/1000
 - 0s - loss: 8.1082e-04 - acc: 1.0000
Epoch 658/1000
 - 0s - loss: 6.6759e-04 - acc: 1.0000
Epoch 659/1000
 - 0s - loss: 6.3845e-04 - acc: 1.0000
Epoch 660/1000
 - 0s - loss: 5.7398e-04 - acc: 1.0000
Epoch 661/1000
 - 0s - loss: 5.4582e-04 - acc: 1.0000
Epoch 662/1000
 - 0s - loss: 5.2881e-04 - acc: 1.0000
Epoch 663/1000
 - 0s - loss: 5.1421e-04 - acc: 1.0000
Epoch 664/1000
 - 0s - loss: 5.1124e-04 - acc: 1.0000
Epoch 665/1000
 - 0s - loss: 5.0081e-04 - acc: 1.0000
Epoch 666/1000
 - 0s - loss: 4.9684e-04 - acc: 1.0000
Epoch 667/1000
 - 0s - loss: 4.8760e-04 - acc: 1.0000
Epoch 668/1000
 - 0s - loss: 4.8408e-04 - acc: 1.0000
Epoch 669/1000
 - 0s - loss: 4.7783e-04 - acc: 1.0000
Epoch 670/1000
 - 0s - loss: 4.6947e-04 - acc: 1.0000
Epoch 671/1000
 - 0s - loss: 4.6479e-04 - acc: 1.0000
Epoch 672/1000
 - 0s - loss: 4.6013e-04 - acc: 1.0000
Epoch 673/1000
 - 0s - loss: 4.5

Epoch 807/1000
 - 0s - loss: 1.3723e-04 - acc: 1.0000
Epoch 808/1000
 - 0s - loss: 1.3569e-04 - acc: 1.0000
Epoch 809/1000
 - 0s - loss: 1.3436e-04 - acc: 1.0000
Epoch 810/1000
 - 0s - loss: 1.3332e-04 - acc: 1.0000
Epoch 811/1000
 - 0s - loss: 1.3198e-04 - acc: 1.0000
Epoch 812/1000
 - 0s - loss: 1.3136e-04 - acc: 1.0000
Epoch 813/1000
 - 0s - loss: 1.3016e-04 - acc: 1.0000
Epoch 814/1000
 - 0s - loss: 1.2833e-04 - acc: 1.0000
Epoch 815/1000
 - 0s - loss: 1.2736e-04 - acc: 1.0000
Epoch 816/1000
 - 0s - loss: 1.2697e-04 - acc: 1.0000
Epoch 817/1000
 - 0s - loss: 1.2594e-04 - acc: 1.0000
Epoch 818/1000
 - 0s - loss: 1.2467e-04 - acc: 1.0000
Epoch 819/1000
 - 0s - loss: 1.2365e-04 - acc: 1.0000
Epoch 820/1000
 - 0s - loss: 1.2400e-04 - acc: 1.0000
Epoch 821/1000
 - 0s - loss: 1.2245e-04 - acc: 1.0000
Epoch 822/1000
 - 0s - loss: 1.2113e-04 - acc: 1.0000
Epoch 823/1000
 - 0s - loss: 1.2029e-04 - acc: 1.0000
Epoch 824/1000
 - 0s - loss: 1.1869e-04 - acc: 1.0000
Epoch 825/1000
 - 0s - loss:

Epoch 959/1000
 - 0s - loss: 4.0480e-05 - acc: 1.0000
Epoch 960/1000
 - 0s - loss: 4.0288e-05 - acc: 1.0000
Epoch 961/1000
 - 0s - loss: 3.9735e-05 - acc: 1.0000
Epoch 962/1000
 - 0s - loss: 3.9902e-05 - acc: 1.0000
Epoch 963/1000
 - 0s - loss: 3.9361e-05 - acc: 1.0000
Epoch 964/1000
 - 0s - loss: 3.9142e-05 - acc: 1.0000
Epoch 965/1000
 - 0s - loss: 3.8628e-05 - acc: 1.0000
Epoch 966/1000
 - 0s - loss: 3.8354e-05 - acc: 1.0000
Epoch 967/1000
 - 0s - loss: 3.8112e-05 - acc: 1.0000
Epoch 968/1000
 - 0s - loss: 3.7751e-05 - acc: 1.0000
Epoch 969/1000
 - 0s - loss: 3.7486e-05 - acc: 1.0000
Epoch 970/1000
 - 0s - loss: 3.7190e-05 - acc: 1.0000
Epoch 971/1000
 - 0s - loss: 3.6968e-05 - acc: 1.0000
Epoch 972/1000
 - 0s - loss: 3.6543e-05 - acc: 1.0000
Epoch 973/1000
 - 0s - loss: 3.6444e-05 - acc: 1.0000
Epoch 974/1000
 - 0s - loss: 3.6040e-05 - acc: 1.0000
Epoch 975/1000
 - 0s - loss: 3.5731e-05 - acc: 1.0000
Epoch 976/1000
 - 0s - loss: 3.5475e-05 - acc: 1.0000
Epoch 977/1000
 - 0s - loss:

<keras.callbacks.History at 0x2700bf34160>

In [82]:
# Test test data with deep model
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Deep Neural Network - Loss: 0.0848088935877091, Accuracy: 0.9928057553956835


In [83]:
# Test demo data with deep model
model_loss_demo, model_accuracy_demo = model.evaluate(
    X_demo_scaled, y_demo_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss_demo}, Accuracy: {model_accuracy_demo}")

Deep Neural Network - Loss: 1.6887985054836463e-07, Accuracy: 1.0


In [86]:
accuracy_suffix = str(model_accuracy).strip('0.')[0:4]

# Save model to use in demo
model.save(f'models/diagnosis_model_{accuracy_suffix}.h5')

# Save X_train to csv in order to scale demo data in app
X_train.to_csv(f'data/X_train_{accuracy_suffix}.csv', index=False)

In [64]:
pd.read_csv('data/demo_input.csv').shape

(12, 31)

In [85]:
from keras.models import load_model


In [87]:
# Load model, test to make sure it works
from keras.models import load_model
loaded_model = load_model('models/diagnosis_model_9928.h5')

model_loss, model_accuracy = loaded_model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(f"Deep Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Deep Neural Network - Loss: 0.0848088935877091, Accuracy: 0.9928057553956835


In [89]:
loaded_model.predict_classes(X_demo_scaled)

array([0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0], dtype=int64)