# Diabetes Prediction using Machine Learning

# Method 1 Linear Classification Model 

### Import the required library

In [22]:
import tensorflow as tf

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report


%matplotlib inline

### Read the Data Set and Clean the data set 

In [23]:
def Data_Process():
    
    """
    This will read the CSV and Normalize the Data and
    Perform Train Test Split and Return
    X_Train, X_Test, Y_Train, Y_Test
    
    """
    # Name for the column  or Features Map
    columns_to_named = ["Pregnancies","Glucose","BloodPressure",
           "SkinThickness","Insulin","BMI","DiabetesPedigreeFunction",
           "Age","Class"]
    
    # Read the Dataset and Rename the Column
    df = pd.read_csv("pima-indians-diabetes.csv",header=0,names=columns_to_named)

    col_norm =['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction']
    
    # Normalization using Custom Lambda Function
    
    df1_norm = df[col_norm].apply(lambda x :( (x - x.min()) / (x.max()-x.min()) ) )
    
    X_Data = df1_norm
    Y_Data = df["Class"]
    
    X_Train, X_Test, Y_Train, Y_Test = train_test_split(X_Data,Y_Data, test_size=0.3,random_state=101)
    
    return X_Train, X_Test, Y_Train, Y_Test

### Create a Feature Column

In [27]:
def create_feature_column():
    
    feat_Pregnancies = tf.feature_column.numeric_column('Pregnancies')
    feat_Glucose = tf.feature_column.numeric_column('Glucose')
    feat_BloodPressure = tf.feature_column.numeric_column('BloodPressure')
    feat_SkinThickness_tricep = tf.feature_column.numeric_column('SkinThickness')
    feat_Insulin = tf.feature_column.numeric_column('Insulin')
    feat_BMI = tf.feature_column.numeric_column('BMI')
    feat_DiabetesPedigreeFunction  = tf.feature_column.numeric_column('DiabetesPedigreeFunction')
    
    feature_column = [feat_Pregnancies, feat_Glucose, feat_BloodPressure, 
                  feat_SkinThickness_tricep, feat_Insulin, 
                 feat_BMI , feat_DiabetesPedigreeFunction] 
    
    return feature_column

In [30]:
def create_input_test_func(X_Train, X_Test, Y_Train, Y_Test):
    
    input_func = tf.estimator.inputs.pandas_input_fn(x=X_Train, y=Y_Train,
                                                 batch_size=40,num_epochs =1000, 
                                                 shuffle=True)
    eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_Test,
                                                      y=Y_Test,
                                                      batch_size=40,
                                                      num_epochs=1,
                                                      shuffle=False)
    return input_func, eval_input_func    

## Create a Model

In [43]:
# this Function Return X_train, X_Test, Y_Train, Y_Test
X_Train, X_Test, Y_Train, Y_Test = data_cleaning()

# Create and return List of feature Coulmn
feature_column = create_feature_column()

#create a Input and Test Function
input_func, eval_input_func = create_input_test_func(X_Train, X_Test, Y_Train, Y_Test)

create the Model

In [44]:
model = tf.estimator.LinearClassifier(feature_columns=feature_column, 
                                      n_classes=2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/yh/7gktt0ls0fj77fnrs694ht6m0000gn/T/tmpgzgu_ciu', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a2f5c3be0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [45]:
history = model.train(input_fn=input_func, steps = 5000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/yh/7gktt0ls0fj77fnrs694ht6m0000gn/T/tmpgzgu_ciu/model.ckpt.
INFO:tensorflow:loss = 27.725887, step = 1
INFO:tensorflow:global_step/sec: 337.183
INFO:tensorflow:loss = 23.74131, step = 101 (0.298 sec)
INFO:tensorflow:global_step/sec: 550.891
INFO:tensorflow:loss = 22.92276, step = 201 (0.183 sec)
INFO:tensorflow:global_step/sec: 604.507
INFO:tensorflow:loss = 25.088806, step = 301 (0.165 sec)
INFO:tensorflow:global_step/sec: 603.45
INFO:tensorflow:loss = 21.590435, step = 401 (0.166 sec)
INFO:tensorflow:global_step/sec: 519.943
INFO:tensorflow:loss = 21.507149, step = 501 (0.191 sec)
INFO:tensorflow:global_step/sec: 525.503
INFO:tensorflow:loss = 21.12466, step = 601 (0.193 sec)
INFO:tensorflow:g

In [46]:
results = model.evaluate(eval_input_func)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-05-17T12:38:42Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/yh/7gktt0ls0fj77fnrs694ht6m0000gn/T/tmpgzgu_ciu/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-05-17-12:38:42
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.73593074, accuracy_baseline = 0.64935064, auc = 0.7901646, auc_precision_recall = 0.66901124, average_loss = 0.5250682, global_step = 5000, label/mean = 0.35064936, loss = 20.215128, precision = 0.6851852, prediction/mean = 0.35369077, recall = 0.45679012
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 5000: /var/folders/yh/7gktt0ls0fj77fnrs694ht6m0000gn/T/tmpgzgu_ciu/model.ckpt-5000


In [47]:
results

{'accuracy': 0.73593074,
 'accuracy_baseline': 0.64935064,
 'auc': 0.7901646,
 'auc_precision_recall': 0.66901124,
 'average_loss': 0.5250682,
 'label/mean': 0.35064936,
 'loss': 20.215128,
 'precision': 0.6851852,
 'prediction/mean': 0.35369077,
 'recall': 0.45679012,
 'global_step': 5000}