# Simple feed-forward neural network classification of diabetes data with Keras and Tensorflow

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# load PIMA indians diabetes dataset
diabetes = pd.read_csv('pima-indians-diabetes.csv')

In [None]:
diabetes.shape

In [None]:
diabetes.head()

In [None]:
diabetes.tail()

In [None]:
diabetes.min()

In [None]:
diabetes.info()

In [None]:
diabetes.describe()

In [None]:
diabetes

In [None]:
# rename columns to short forms
diabetes.columns = [
    "NumTimesPrg", "PlGlcConc", "BloodP",
    "SkinThick", "TwoHourSerIns", "BMI",
    "DiPedFunc", "Age", "HasDiabetes"]

In [None]:
# get number of values with 0 value
diabetes.isin([0]).sum()

In [None]:
# get number of values with missing value
diabetes.isna().sum()

In [None]:
# replace 0 values with mean of the whole column
for col in ['PlGlcConc', 'BloodP', 'SkinThick', 'TwoHourSerIns', 'BMI']:
    median_val = diabetes[col].mean()
    diabetes[col] = diabetes[col].replace(to_replace=0, value=median_val)

In [None]:
diabetes

In [None]:
diabetes.hist()

In [None]:
# show correlations
corr = diabetes.corr()
corr

In [None]:
import seaborn as sns
sns.heatmap(corr, annot = True)

In [None]:
# split to train/test datasets
from sklearn.model_selection import train_test_split
train_set, test_set = train_test_split(diabetes, test_size=0.01, random_state=42)

In [None]:
train_set

In [None]:
train_set.shape

In [None]:
test_set.shape

In [None]:
# split dataset to inputs/labels
train_set_labels = train_set["HasDiabetes"].copy()
train_set = train_set.drop("HasDiabetes", axis=1)
test_set_labels = test_set["HasDiabetes"].copy()
test_set = test_set.drop("HasDiabetes", axis=1)

In [None]:
# learn scaling from train dataset
from sklearn.preprocessing import MinMaxScaler as Scaler
scaler = Scaler()
scaler.fit(train_set)

In [None]:
train_set_scaled = scaler.transform(train_set)
test_set_scaled = scaler.transform(test_set)

In [None]:
X_train = train_set_scaled
X_test = test_set_scaled

y_train = np.asarray(train_set_labels)
y_test = np.asarray(test_set_labels)

In [None]:
# Create NN
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

# define a sequential model
model = Sequential()

# add hidden of fully connected layer
model.add(Dense(64, activation='relu', input_dim=8))
# add hidden of fully connected layer
model.add(Dense(64, activation='relu'))
# add output layer, try different activation for the classification layer, like softmax
model.add(Dense(1, activation='sigmoid'))

In [None]:
# build model
model.compile(optimizer="adam",
             loss='binary_crossentropy',
             metrics=['binary_accuracy'])

In [None]:
#Plot the NN if Graphiz lib is available only
#from keras.utils import plot_model
#plot_model(model, to_file='/tmp/model.png', show_shapes=True,)

In [None]:
hist = model.fit(X_train,
          y_train,
          epochs=1000,
          verbose=1,
          batch_size=32,
          validation_data=(X_test,y_test)
)

In [None]:
history_dict = hist.history

loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')

plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
res = model.predict(X_test)
res

In [None]:
# Make prediction for custom data

# 1. Number of times pregnant (max=17)
# 2. Plasma glucose concentration a 2 hours in an oral glucose tolerance test (max=199)
# 3. Diastolic blood pressure (mm Hg) (max=122)
# 4. Triceps skin fold thickness (mm) (max=99)
# 5. 2-Hour serum insulin (mu U/ml) (max=846)
# 6. Body mass index (weight in kg/(height in m)^2) (max=67)
# 7. Diabetes pedigree function (max=2.42)
# 8. Age (years) (max=81)

single_pacient_data = pd.DataFrame([[0, 166, 86, 35, 0, 43.6, 0.627, 65]])
# scale input data
single_pacient_data_scaled = scaler.transform(single_pacient_data)
X_test_single = np.asarray(single_pacient_data_scaled)

result = model.predict(X_test_single)
print("predicted: ", result)

result = model.predict_classes(X_test_single)
print("predicted clas: ", result)

In [None]:
# see which we predicted correctly and which not
predicted_classes = model.predict_classes(X_test)

correct_indices = np.nonzero(predicted_classes == y_test)[0]
incorrect_indices = np.nonzero(predicted_classes != y_test)[0]

print(len(correct_indices)," classified correctly")
print(len(incorrect_indices)," classified incorrectly")