# TensorFlow Classification

In [1]:
import pandas as pd

In [2]:
diabetes = pd.read_csv('diabetes.csv')

In [3]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,6,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,1,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,8,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,1,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


### Normalize the dataset

In [5]:
cols_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree']

In [6]:
diabetes[cols_to_norm] = diabetes[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))

### Create the Feature Columns to be accessed in the model

In [7]:
import tensorflow as tf

In [10]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluc = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

#Categorical column
assigned_group = tf.feature_column.categorical_column_with_vocabulary_list('Group',['A','B','C','D'])
# Alternative
# assigned_group = tf.feature_column.categorical_column_with_hash_bucket('Group', hash_bucket_size=10)

age_buckets = tf.feature_column.bucketized_column(age, boundaries=[20,30,40,50,60,70,80])

### Putting them together

In [11]:
feat_cols = [num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree ,assigned_group, age_buckets]

### Train Test Split

In [18]:
diabetes.head()

Unnamed: 0,Number_pregnant,Glucose_concentration,Blood_pressure,Triceps,Insulin,BMI,Pedigree,Age,Class,Group
0,0.352941,0.743719,0.590164,0.353535,0.0,0.500745,0.234415,50,1,B
1,0.058824,0.427136,0.540984,0.292929,0.0,0.396423,0.116567,31,0,C
2,0.470588,0.919598,0.52459,0.0,0.0,0.347243,0.253629,32,1,B
3,0.058824,0.447236,0.540984,0.232323,0.111111,0.418778,0.038002,21,0,B
4,0.0,0.688442,0.327869,0.353535,0.198582,0.642325,0.943638,33,1,C


In [15]:
#Drop the Class column because it will be predicted by model and will be provided as a label to train test split
x_data = diabetes.drop('Class',axis=1)

In [12]:
labels = diabetes['Class']

In [13]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(x_data,labels,test_size=0.33, random_state=101)

### Input Function

In [259]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

### Creating the Model

In [260]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_model_dir': 'C:\\Users\\Marcial\\AppData\\Local\\Temp\\tmp0qgb7dfs', '_save_summary_steps': 100, '_tf_random_seed': 1, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_session_config': None}


In [261]:
model.train(input_fn=input_func,steps=1000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\Marcial\AppData\Local\Temp\tmp0qgb7dfs\model.ckpt.
INFO:tensorflow:loss = 6.93147, step = 1
INFO:tensorflow:global_step/sec: 214.874
INFO:tensorflow:loss = 4.84295, step = 101 (0.466 sec)
INFO:tensorflow:global_step/sec: 223.933
INFO:tensorflow:loss = 7.28367, step = 201 (0.448 sec)
INFO:tensorflow:global_step/sec: 227.144
INFO:tensorflow:loss = 5.93065, step = 301 (0.439 sec)
INFO:tensorflow:global_step/sec: 231.054
INFO:tensorflow:loss = 4.88241, step = 401 (0.433 sec)
INFO:tensorflow:global_step/sec: 225.003
INFO:tensorflow:loss = 7.39924, step = 501 (0.444 sec)
INFO:tensorflow:global_step/sec: 228.566
INFO:tensorflow:loss = 4.06486, step = 601 (0.438 sec)
INFO:tensorflow:global_step/sec: 230.433
INFO:tensorflow:loss = 5.13115, step = 701 (0.434 sec)
INFO:tensorflow:global_step/sec: 229.748
INFO:tensorflow:loss = 6.61412, step = 801 (0.435 sec)
INFO:tensorflow:global_step/sec: 225.494


<tensorflow.python.estimator.canned.linear.LinearClassifier at 0x1d01c710588>

In [262]:
# Useful link ofr your own data
# https://stackoverflow.com/questions/44664285/what-are-the-contraints-for-tensorflow-scope-names

## Evaluation

In [289]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(
      x=X_test,
      y=y_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [281]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Starting evaluation at 2017-08-29-23:48:59
INFO:tensorflow:Restoring parameters from C:\Users\Marcial\AppData\Local\Temp\tmp0qgb7dfs\model.ckpt-1000
INFO:tensorflow:Finished evaluation at 2017-08-29-23:49:00
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.716535, accuracy_baseline = 0.65748, auc = 0.782745, auc_precision_recall = 0.619158, average_loss = 0.539032, global_step = 1000, label/mean = 0.34252, loss = 5.26593, prediction/mean = 0.387154


In [290]:
results

{'accuracy': 0.71653545,
 'accuracy_baseline': 0.65748036,
 'auc': 0.78274488,
 'auc_precision_recall': 0.61915845,
 'average_loss': 0.53903216,
 'global_step': 1000,
 'label/mean': 0.34251967,
 'loss': 5.2659297,
 'prediction/mean': 0.38715369}

## Predictions

In [293]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(
      x=X_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)

In [304]:
# Predictions is a generator! 
predictions = model.predict(pred_input_func)

In [305]:
list(predictions)

INFO:tensorflow:Restoring parameters from C:\Users\Marcial\AppData\Local\Temp\tmp0qgb7dfs\model.ckpt-1000


[{'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logistic': array([ 0.55796117], dtype=float32),
  'logits': array([ 0.23289156], dtype=float32),
  'probabilities': array([ 0.44203883,  0.55796117], dtype=float32)},
 {'class_ids': array([1], dtype=int64),
  'classes': array([b'1'], dtype=object),
  'logistic': array([ 0.65618318], dtype=float32),
  'logits': array([ 0.64633107], dtype=float32),
  'probabilities': array([ 0.34381679,  0.65618318], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.39375734], dtype=float32),
  'logits': array([-0.43154556], dtype=float32),
  'probabilities': array([ 0.60624272,  0.39375731], dtype=float32)},
 {'class_ids': array([0], dtype=int64),
  'classes': array([b'0'], dtype=object),
  'logistic': array([ 0.35286093], dtype=float32),
  'logits': array([-0.60648727], dtype=float32),
  'probabilities': array([ 0.64713913,  0.35286093], dtype=flo

# DNN Classifier

In [17]:
X_train, X_test, y_train, y_test = train_test_split(x_data,labels,test_size=0.33, random_state=101)

In [21]:
#Assigned group is the numeric feature column for 'Group'
embedded_group_column = tf.feature_column.embedding_column(assigned_group, dimension=4)
featureColumns = [num_preg ,plasma_gluc,dias_press ,tricep ,insulin,bmi,diabetes_pedigree,
             embedded_group_column, age_buckets]

In [22]:
inputFunction = tf.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=100,num_epochs=1000,shuffle=True)

In [25]:
dnnClassifierModel = tf.estimator.DNNClassifier(hidden_units=[512, 256, 128],
                                                feature_columns=featureColumns,
                                                n_classes=2,
                                                activation_fn=tf.nn.tanh,
                                                optimizer=lambda: tf.train.AdamOptimizer(
                                                    learning_rate=tf.train.exponential_decay(learning_rate=0.001,
                                                    global_step=tf.train.get_global_step(),
                                                    decay_steps=1000,
                                                    decay_rate=0.96)))

dnnClassifierModel.train(input_fn=inputFunction,steps=1000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/1x/5c0qcdm55cvdsjtwhfs67glw0000gn/T/tmpi6glyxnv', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x128702dd8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for 

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1287167b8>

In [27]:
dnnClassifierModel.train(input_fn=inputFunction,steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/1x/5c0qcdm55cvdsjtwhfs67glw0000gn/T/tmpi6glyxnv/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1000 into /var/folders/1x/5c0qcdm55cvdsjtwhfs67glw0000gn/T/tmpi6glyxnv/model.ckpt.
INFO:tensorflow:loss = 18.225155, step = 1001
INFO:tensorflow:global_step/sec: 143.457
INFO:tensorflow:loss = 17.78348, step = 1101 (0.698 sec)
INFO:tensorflow:global_step/sec: 172.517
INFO:tensorflow:loss = 9.873206, step = 1201 (0.579 sec)
INFO:tensorflow:global_step/sec: 178.354
INFO:tensorflow:loss = 9.713499, step = 1301 (0.561 sec)
INFO:tensorflow:global_step/sec: 173.572
INFO:tensorflow:loss = 6.836135, step = 1401 (0.576 sec)
INFO:tensorflow:global_step/sec: 169.5
INFO:tensorflow:loss = 3.3992896, step = 1501

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1287167b8>

In [28]:
evaluateInputFunction = tf.estimator.inputs.pandas_input_fn(
      x=X_test,
      y=y_test,
      batch_size=10,
      num_epochs=1,
      shuffle=False)
dnnClassifierModel.evaluate(evaluateInputFunction)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-18-16:13:22
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/1x/5c0qcdm55cvdsjtwhfs67glw0000gn/T/tmpi6glyxnv/model.ckpt-2000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-02-18-16:13:23
INFO:tensorflow:Saving dict for global step 2000: accuracy = 0.72440946, accuracy_baseline = 0.65748036, auc = 0.78649586, auc_precision_recall = 0.6650593, average_loss = 1.1505088, global_step = 2000, label/mean = 0.34251967, loss = 11.239585, precision = 0.5934066, prediction/mean = 0.375141, recall = 0.62068963
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2000: /var/folders/1x/5c0qcdm55cvdsjtwhfs67glw0000gn/T/tmpi6glyxnv/model.ckpt-2000


{'accuracy': 0.72440946,
 'accuracy_baseline': 0.65748036,
 'auc': 0.78649586,
 'auc_precision_recall': 0.6650593,
 'average_loss': 1.1505088,
 'global_step': 2000,
 'label/mean': 0.34251967,
 'loss': 11.239585,
 'precision': 0.5934066,
 'prediction/mean': 0.375141,
 'recall': 0.62068963}

# Linear Classificatication with TensorFlow and Dense Neural Nets