In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [None]:
# Read csv and create train and test
df = pd.read_csv("/kaggle/input/pima-indians-diabetes-database/diabetes.csv")
df.pop("DiabetesPedigreeFunction")
train, test = train_test_split(df, test_size=0.2)
train_y = train.pop("Outcome")
test_y = test.pop("Outcome")

In [None]:
train.describe()

In [None]:
test.describe()

In [None]:
train.head()

In [None]:
test.head()

In [None]:
train_y.head()

In [None]:
test_y.head()

In [None]:
table = df.Age.hist(bins=20)
table.set_xlabel("Age")
table.set_ylabel("Number of People")
plt.show()

In [None]:
explode = (0.0, 0.05)
plt.pie([list(train_y).count(0) + list(test_y).count(0), list(train_y).count(1) + list(test_y).count(1)], labels = ["Not Diabetic","Diabetic"], autopct='%1.2f%%', startangle=90, explode=explode, shadow=True)
plt.show()

In [None]:
table2 = df.groupby("Outcome").mean().plot(kind="bar", figsize=(10,7))
table2.set_xlabel("(0: Not Diabetic   1: Diabetic)")
table2.set_ylabel("Mean")
plt.show()

In [None]:
train.hist(figsize = (12,12))
plt.show()

In [None]:
test.hist(figsize = (12, 12))
plt.show()

In [None]:
features = list(test.columns)
probability_labels = ["Not Diabetic", "Diabetic"]

In [None]:
def input_fn(features, labels, training, batch_size=256):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    if training:
        dataset = dataset.shuffle(1000).repeat()
    return dataset.batch(batch_size)

my_feature_columns = []
for key in train.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key, dtype=tf.uint32))

classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[300, 100],
    n_classes=3)

classifier.train(
    input_fn=lambda: input_fn(train, train_y, training=True),
    steps=1000)

def input_fn2(features, batch_size=256):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

evaluate = classifier.evaluate(input_fn=lambda: input_fn(test, test_y, training=False), steps=1)
print(f"Accuracy: {evaluate['accuracy']:.2f}")
print(f"Avg. Loss: {evaluate['average_loss']:.2f}")
print(f"Loss: {evaluate['loss']:.2f}")

In [None]:
# If you want manual test.
"""
predict = {}
print("Please type numeric values as prompted.\n")
for feature in features:
    valid = True
    while valid: 
        val = input(feature + ": ")
        if not val.isdigit(): valid = False
    predict[feature] = [float(val)]
print("")
predictions = classifier.predict(input_fn=lambda: input_fn2(predict))
for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]
    
print("Prediction is '{}' (%{:.1f} chance)".format(probability_labels[class_id], 100 * probability))
"""