In [None]:
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
diabetes = pd.read_csv('data/pima-indians-diabetes.csv')

In [None]:
diabetes.head()

In [None]:
diabetes.columns

In [None]:
cols_to_norm = ['Number_pregnant', 'Glucose_concentration', 'Blood_pressure', 'Triceps',
       'Insulin', 'BMI', 'Pedigree']

In [None]:
diabetes[cols_to_norm] = diabetes[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max()-x.min() ))

In [None]:
diabetes.head()

In [None]:
num_preg = tf.feature_column.numeric_column('Number_pregnant')
plasma_gluv = tf.feature_column.numeric_column('Glucose_concentration')
dias_press = tf.feature_column.numeric_column('Blood_pressure')
tricep = tf.feature_column.numeric_column('Triceps')
insulin = tf.feature_column.numeric_column('Insulin')
bmi = tf.feature_column.numeric_column('BMI')
diabetes_pedigree = tf.feature_column.numeric_column('Pedigree')
age = tf.feature_column.numeric_column('Age')

In [None]:
assigned_group = tf.feature_column.categorical_column_with_vocabulary_list('Group',['A','B','C','D'])

In [None]:
#assigned_group = tf.feature_column.categorical_column_with_hash_bucket('Group',hash_bucket_size=10)

In [None]:
diabetes['Age'].hist(bins=20)

In [None]:
age_bucket = tf.feature_column.bucketized_column(age,boundaries=[20,30,40,50,60,70,80])

In [None]:
feat_cols = [num_preg,plasma_gluv,dias_press,tricep,insulin,bmi,diabetes_pedigree,assigned_group,age_bucket]

In [None]:
feat_cols

In [None]:
# Train and test split

In [None]:
x_data = diabetes.drop('Class',axis=1)

In [None]:
x_data.head()

In [None]:
labels = diabetes['Class']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_data,labels,test_size = 0.3)

In [None]:
input_func = tf.estimator.inputs.pandas_input_fn(x=X_train, y=y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [None]:
model = tf.estimator.LinearClassifier(feature_columns=feat_cols,n_classes=2)

In [None]:
model.train(input_fn=input_func,steps=1000)

In [None]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_train, y=y_train,batch_size=10,num_epochs=1,shuffle=False)

In [None]:
results = model.evaluate(eval_input_func)

In [None]:
results

In [None]:
## Predictions

In [None]:
pred_input_func = tf.estimator.inputs.pandas_input_fn(x=X_train,batch_size=10,num_epochs=1,shuffle=False)

In [None]:
predictions = model.predict(pred_input_func)

In [None]:
my_pred = list(predictions)

In [None]:
my_pred

In [None]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols,n_classes=2)

In [None]:
embedded_group_col = tf.feature_column.embedding_column(assigned_group,dimension=4)

In [None]:
feat_cols = [num_preg,plasma_gluv,dias_press,tricep,insulin,bmi,diabetes_pedigree,embedded_group_col,age_bucket]

In [None]:
input_func = tf.estimator.inputs.pandas_input_fn(X_train, y_train,batch_size=10,num_epochs=1000,shuffle=True)

In [None]:
dnn_model = tf.estimator.DNNClassifier(hidden_units=[10,10,10],feature_columns=feat_cols,n_classes=2)

In [None]:
dnn_model.train(input_fn=input_func,steps=1000)

In [None]:
eval_input_func = tf.estimator.inputs.pandas_input_fn(x=X_test, y=y_test,batch_size=10,num_epochs=1,shuffle=False)

In [None]:
dnn_model.evaluate(eval_input_func)