In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd


In [2]:
df = pd.read_csv("diabetes.csv")

In [3]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [7]:
cols_to_norm = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction"]



In [8]:
df[cols_to_norm] = df[cols_to_norm].apply(lambda x: (x-x.min())/(x.max()-x.min()) )

In [9]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1


In [10]:
# Alternative was to use the minmax
from sklearn.preprocessing import MinMaxScaler


MinMaxScaler(copy=True, feature_range=(0, 1))

In [19]:
feat_cols = []
for i in cols_to_norm:
    feat_cols.append(tf.feature_column.numeric_column(i))
    
age = tf.feature_column.numeric_column('Age')

In [20]:
assigned_age = tf.feature_column.bucketized_column(age, boundaries=[20,30,40,50,60,70,80,90])

In [21]:
feat_cols.append(assigned_age)

In [22]:
feat_cols

[_NumericColumn(key='Pregnancies', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='Glucose', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='BloodPressure', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='SkinThickness', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='Insulin', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='BMI', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _NumericColumn(key='DiabetesPedigreeFunction', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 _BucketizedColumn(source_column=_NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), boundaries=(20, 30, 40, 50, 60, 70, 80, 90))]

In [23]:
x_data = df.drop('Outcome', axis=1)

In [25]:
x_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33


In [26]:
from sklearn.model_selection import train_test_split

In [27]:
labels = df['Outcome']
x_train, x_test, y_train, y_test = train_test_split(x_data, labels, test_size=0.3, random_state=123)

# Evaluate


In [28]:
input_func = tf.estimator.inputs.pandas_input_fn(x=x_train, y=y_train
                                                , batch_size=10
                                                , num_epochs=1000
                                                , shuffle=True)

In [30]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols, n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/tx/ftbzjwns6fgg8d6cvfwrz6k40000gn/T/tmpb11_s52f', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}


In [33]:
model.train(input_fn=input_func, steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /var/folders/tx/ftbzjwns6fgg8d6cvfwrz6k40000gn/T/tmpb11_s52f/model.ckpt-1100
INFO:tensorflow:Saving checkpoints for 1101 into /var/folders/tx/ftbzjwns6fgg8d6cvfwrz6k40000gn/T/tmpb11_s52f/model.ckpt.
INFO:tensorflow:loss = 2.08575, step = 1101
INFO:tensorflow:global_step/sec: 463.953
INFO:tensorflow:loss = 3.12087, step = 1201 (0.217 sec)
INFO:tensorflow:global_step/sec: 486.285
INFO:tensorflow:loss = 6.83051, step = 1301 (0.206 sec)
INFO:tensorflow:global_step/sec: 484.768
INFO:tensorflow:loss = 4.95529, step = 1401 (0.206 sec)
INFO:tensorflow:global_step/sec: 477.35
INFO:tensorflow:loss = 7.55013, step = 1501 (0.210 sec)
INFO:tensorflow:global_step/sec: 473.59
INFO:tensorflow:loss = 4.6458, step = 1601 (0.211 sec)
INFO:tensorflow:global_step/sec: 468.67
INFO:tensorflow:loss = 4.10257, step = 1701 (0.213 sec)
INFO:tensorflow:global_step/sec: 458.423
INFO:tensorflow:loss = 5.35524, step = 1801 (0.218 s

<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x115c53358>

In [35]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=x_test, y=y_test, batch_size=10, num_epochs=1, shuffle=False)

In [36]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2018-01-01-11:41:09
INFO:tensorflow:Restoring parameters from /var/folders/tx/ftbzjwns6fgg8d6cvfwrz6k40000gn/T/tmpb11_s52f/model.ckpt-2100
INFO:tensorflow:Finished evaluation at 2018-01-01-11:41:09
INFO:tensorflow:Saving dict for global step 2100: accuracy = 0.757576, accuracy_baseline = 0.619048, auc = 0.827678, auc_precision_recall = 0.744831, average_loss = 0.512469, global_step = 2100, label/mean = 0.380952, loss = 4.93251, prediction/mean = 0.338772


In [38]:
results

{'accuracy': 0.75757575,
 'accuracy_baseline': 0.61904764,
 'auc': 0.82767802,
 'auc_precision_recall': 0.74483079,
 'average_loss': 0.51246905,
 'global_step': 2100,
 'label/mean': 0.38095239,
 'loss': 4.9325147,
 'prediction/mean': 0.33877197}

# Predictions

In [39]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x=x_test, batch_size=10, num_epochs=1, shuffle=False)
predictions = model.predict(pred_input_func)

In [40]:
my_pred = list(predictions)
my_pred

INFO:tensorflow:Restoring parameters from /var/folders/tx/ftbzjwns6fgg8d6cvfwrz6k40000gn/T/tmpb11_s52f/model.ckpt-2100


[{'class_ids': array([1]),
  'classes': array([b'1'], dtype=object),
  'logistic': array([ 0.71197867], dtype=float32),
  'logits': array([ 0.90501332], dtype=float32),
  'probabilities': array([ 0.28802136,  0.71197867], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.38503578], dtype=float32),
  'logits': array([-0.46822786], dtype=float32),
  'probabilities': array([ 0.61496425,  0.38503578], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.41249239], dtype=float32),
  'logits': array([-0.35367143], dtype=float32),
  'probabilities': array([ 0.58750761,  0.41249239], dtype=float32)},
 {'class_ids': array([0]),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.10971252], dtype=float32),
  'logits': array([-2.09368086], dtype=float32),
  'probabilities': array([ 0.89028746,  0.10971253], dtype=float32)},
 {'class_ids': array([0]),
  'classes': arra

In [41]:
[i['class_ids'][0] for i in my_pred]

[1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0]