### Imports

In [1]:
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

diabetes = pd.read_csv('../sample_data/pima-indians-diabetes.csv')
diabetes = diabetes.drop('Group', axis=1)

### Examine out our data:

In [2]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1


In [3]:
diabetes.shape

(768, 9)

### Normalize out our data (Similar to Feature Extraction):

In [4]:
columns_to_normalize = ['Number_pregnant', 'Age']

In [5]:
diabetes[columns_to_normalize] = diabetes[columns_to_normalize].apply(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

We see that `Number_pregnant` and `Age` values have changed:

In [6]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333,1
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667,0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333,1
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0,0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2,1


### Prep our data for use in TensorFlow:

In [7]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [8]:
feat_cols = [
    num_preg,
    plasma_gluc,
    dias_press,
    tricep,
    insulin,
    bmi,
    diabetes_pedigree,
    age
]

### Separate the *Features* (`Inputs/ Xs`) from the *Labels* (`Outputs/ Y`):

In [9]:
x_data = diabetes.drop('Class', axis=1)
x_data.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,0.483333
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,0.166667
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,0.183333
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,0.0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,0.2


In [10]:
lables = diabetes['Class']
lables.head()

0    1
1    0
2    1
3    0
4    1
Name: Class, dtype: int64

### Split our data into Training and Testing:

In [11]:
X_train, X_test, y_train, y_test = train_test_split(x_data, lables)

In [12]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(576, 8) (576,) (192, 8) (192,)


### Configure our `Supervised Learning Algorithm`:

In [13]:
input_func = tf.estimator.inputs.pandas_input_fn(
    x=X_train,
    y=y_train,
    batch_size=10,
    num_epochs=1000,
    shuffle=True
)

In [14]:
dnn_model = tf.estimator.DNNClassifier(
    hidden_units=[10,10,10], # 3 hidden layers each with 10 neurons
    feature_columns=feat_cols,
    n_classes=2
)

### Use the model (`Supervised Learning Algorithm`) to produce a `Prediction Function`:

In [15]:
dnn_model.train(input_fn=input_func, steps=1000)

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifier at 0x135de5128>

In [16]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(
    x=X_test,
    shuffle=False
)

### Produce predictions in a human-readable format:

In [17]:
predictions = [pred['probabilities'] for pred in list(dnn_model.predict(pred_input_func))]
prediction_probabilities = pd.DataFrame(predictions, columns=['Prob 0', 'Prob 1'])
prediction_probabilities.insert(0, "Actual", list(y_test))

### Examine 5 Predictions:

In [25]:
prediction_probabilities[100:105]

Unnamed: 0,Actual,Prob 0,Prob 1
100,0,0.495744,0.504256
101,1,0.292644,0.707356
102,1,0.362606,0.637394
103,0,0.936927,0.063073
104,1,0.216723,0.783277


### Evaluate overal model accuracy:

In [21]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(
    x=X_test,
    y=y_test,
    batch_size=10,
    num_epochs=1,
    shuffle=False
)

dnn_model.evaluate(eval_input_func)

{'accuracy': 0.7864583,
 'accuracy_baseline': 0.6770834,
 'auc': 0.8284119,
 'auc_precision_recall': 0.738409,
 'average_loss': 0.46401748,
 'label/mean': 0.32291666,
 'loss': 4.454568,
 'precision': 0.7692308,
 'prediction/mean': 0.30088872,
 'recall': 0.48387095,
 'global_step': 1000}