In [68]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Activation, Dropout
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

In [69]:
df = pd.read_csv("./Dataset/diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [71]:
df['Outcome'].replace({1: 'Yes', 0: 'No'}, inplace=True)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,Yes
1,1,85,66,29,0,26.6,0.351,31,No
2,8,183,64,0,0,23.3,0.672,32,Yes
3,1,89,66,23,94,28.1,0.167,21,No
4,0,137,40,35,168,43.1,2.288,33,Yes


In [72]:
df = pd.get_dummies(df, columns=['Outcome'])
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome_No,Outcome_Yes
0,6,148,72,35,0,33.6,0.627,50,0,1
1,1,85,66,29,0,26.6,0.351,31,1,0
2,8,183,64,0,0,23.3,0.672,32,0,1
3,1,89,66,23,94,28.1,0.167,21,1,0
4,0,137,40,35,168,43.1,2.288,33,0,1


In [75]:
# Split the dataset into features (X) and target (y)
X = df.drop(columns=['Outcome_No', 'Outcome_Yes'], axis=1)  # Features
y = df.drop(columns=['Pregnancies', 'Glucose', 'BloodPressure','SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction','Age'], axis=1)  # labels

In [78]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets (80-20 split)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print('Shape of X_train and X_test:', X_train.shape, X_test.shape)
print('Shape of y_train and y_test:', y_train.shape, y_test.shape)

Shape of X_train and X_test: (614, 8) (154, 8)
Shape of y_train and y_test: (614, 2) (154, 2)


In [79]:
from sklearn.preprocessing import StandardScaler
# Scale features (standardization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [80]:
# Store mean and std for each feature
means = scaler.mean_
stds = scaler.scale_
means

array([  3.74267101, 120.85504886,  69.41530945,  20.3990228 ,
        81.43811075,  31.98338762,   0.46916775,  32.90716612])

In [81]:
stds

array([  3.31056497,  32.00895893,  18.49751705,  15.42140098,
       116.14014299,   7.73431907,   0.33657233,  11.49406506])

In [82]:
print(y.shape)

(768, 2)


In [None]:
DENSE1_SIZE = 48
DENSE2_SIZE = 16
NUM_OF_EPOCHS = 50 
BATCH_SIZE = 8

model = tf.keras.Sequential()

input_shape = X.shape[1]
print(input_shape)
model.add(tf.keras.layers.Flatten(input_shape =(X.shape[1],)))

model.add(tf.keras.layers.Dense(DENSE1_SIZE, activation='relu'))


output_shape = y.shape[1]
print(output_shape)
model.add(tf.keras.layers.Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

8
2
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 8)                 0         
                                                                 
 dense_7 (Dense)             (None, 48)                432       
                                                                 
 dense_8 (Dense)             (None, 16)                784       
                                                                 
 dense_9 (Dense)             (None, 2)                 34        
                                                                 
Total params: 1,250
Trainable params: 1,250
Non-trainable params: 0
_________________________________________________________________


In [85]:
history = model.fit(X_train, y_train, batch_size=BATCH_SIZE, 
                    epochs=NUM_OF_EPOCHS,
                    verbose=1, validation_split=0.2)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [86]:
# Evaluate the model on test data
y_pred = (model.predict(X_test) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 70.78%


In [87]:
model.save('DiabetesPredictionModel.h5')

In [88]:
def representative_dataset():
    for _ in range(100):
      data =  X_test
      yield [data.astype(np.float32)]
        
print(representative_dataset())

<generator object representative_dataset at 0x000001A502865048>


In [89]:
tf.saved_model.save(model, "saved_diabetes_seq_model_keras_dir")
converter = tf.lite.TFLiteConverter.from_saved_model("saved_diabetes_seq_model_keras_dir")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset

tflite_model = converter.convert()

INFO:tensorflow:Assets written to: saved_diabetes_seq_model_keras_dir\assets


In [90]:
with open('DiabetesPredictionModel.tflite', 'wb') as f:
  f.write(tflite_model)

In [91]:
interpreter = tf.lite.Interpreter(model_path="DiabetesPredictionModel.tflite")
interpreter.allocate_tensors()

In [92]:
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print('input_details:\n', input_details)
print('output_details:\n', output_details)

input_details:
 [{'name': 'serving_default_flatten_2_input:0', 'index': 0, 'shape': array([1, 8]), 'shape_signature': array([-1,  8]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]
output_details:
 [{'name': 'StatefulPartitionedCall:0', 'index': 14, 'shape': array([1, 2]), 'shape_signature': array([-1,  2]), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0), 'quantization_parameters': {'scales': array([], dtype=float32), 'zero_points': array([], dtype=int32), 'quantized_dimension': 0}, 'sparsity_parameters': {}}]


In [None]:
# Test the model on random input data
input_shape = input_details[0]['shape']

# Random Input Test
input0_data = np.random.random_sample(input_shape).astype(np.float32)
print("Random input0_data:", input0_data)
interpreter.set_tensor(input_details[0]['index'], input0_data)
interpreter.invoke()
output0_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for random input0_data:", output0_data)

# Verify with original Keras model
keras_output0_data = model.predict(input0_data)
print("Keras Model output for random input0_data:", keras_output0_data)

# Custom Test Input 1
input1_data = np.array([[]], dtype=np.float32)
print("\nCustom input1_data:", input1_data)
interpreter.set_tensor(input_details[0]['index'], input1_data)
interpreter.invoke()
output1_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for custom input1_data:", output1_data)

# Verify with original Keras model
keras_output1_data = model.predict(input1_data)
print("Keras Model output for custom input1_data:", keras_output1_data)

# Custom Test Input 2
input2_data = np.array([[1.29, 1.94, -0.29, -1.32, -0.70, -1.12, 0.60, -0.08 ]], dtype=np.float32)
print("\nCustom input2_data:", input2_data)
interpreter.set_tensor(input_details[0]['index'], input2_data)
interpreter.invoke()
output2_data = interpreter.get_tensor(output_details[0]['index'])
print("TFLite Model output for custom input2_data:", output2_data)

# Verify with original Keras model
keras_output2_data = model.predict(input2_data)
print("Keras Model output for custom input2_data:", keras_output2_data)

Random input0_data: [[0.30747336 0.16529647 0.02792511 0.3318631  0.39363113 0.05414251
  0.2926122  0.5794854 ]]
TFLite Model output for random input0_data: [[0.5 0.5]]
Keras Model output for random input0_data: [[0.49457732 0.50542265]]

Custom input1_data: [[-0.83 -1.   -0.18  0.17  0.11 -0.5  -0.9  -1.04]]
TFLite Model output for custom input1_data: [[0.99609375 0.00390625]]
Keras Model output for custom input1_data: [[0.99768174 0.00231829]]

Custom input2_data: [[ 1.29  1.94 -0.29 -1.32 -0.7  -1.12  0.6  -0.08]]
TFLite Model output for custom input2_data: [[0.125 0.875]]
Keras Model output for custom input2_data: [[0.12612183 0.8738781 ]]


In [94]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
# Initialize lists to store predictions
y_pred_tflite = []
y_pred_keras = []
y_true = y_test  # Assuming y_test contains the ground truth labels

# Run predictions on the TFLite model
for i in range(len(X_test)):
    input_data = np.array(X_test[i:i+1], dtype=np.float32)
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details[0]['index'])
    y_pred_tflite.append((output_data[0] > 0.5).astype(int))  # Binarize the output

# Run predictions on the original Keras model
y_pred_keras = (model.predict(X_test) > 0.5).astype(int)

# Convert to numpy arrays for evaluation
y_pred_tflite = np.array(y_pred_tflite).flatten()
y_pred_keras = y_pred_keras.flatten()
y_true = np.array(y_true).flatten()

# Calculate metrics for TFLite model
print("Evaluation metrics for TFLite Model:")
print("Accuracy:", accuracy_score(y_true, y_pred_tflite))
print(confusion_matrix(y_true, y_pred_tflite))
print(classification_report(y_true, y_pred_tflite, target_names=["Non-diabetic", "Diabetic"]))

# Calculate metrics for Keras model
print("\nEvaluation metrics for Keras Model:")
print("Accuracy:", accuracy_score(y_true, y_pred_keras))
print(confusion_matrix(y_true, y_pred_keras))
print(classification_report(y_true, y_pred_keras, target_names=["Non-diabetic", "Diabetic"]))

Evaluation metrics for TFLite Model:
Accuracy: 0.7077922077922078
[[109  45]
 [ 45 109]]
              precision    recall  f1-score   support

Non-diabetic       0.71      0.71      0.71       154
    Diabetic       0.71      0.71      0.71       154

    accuracy                           0.71       308
   macro avg       0.71      0.71      0.71       308
weighted avg       0.71      0.71      0.71       308


Evaluation metrics for Keras Model:
Accuracy: 0.7077922077922078
[[109  45]
 [ 45 109]]
              precision    recall  f1-score   support

Non-diabetic       0.71      0.71      0.71       154
    Diabetic       0.71      0.71      0.71       154

    accuracy                           0.71       308
   macro avg       0.71      0.71      0.71       308
weighted avg       0.71      0.71      0.71       308



In [95]:
import os

# Define the file paths
keras_model_path = "DiabetesPredictionModel.h5"
tflite_model_path = "DiabetesPredictionModel.tflite"

# Get file sizes in KB
keras_model_size = os.path.getsize(keras_model_path) / 1024  # Size in KB
tflite_model_size = os.path.getsize(tflite_model_path) / 1024  # Size in KB

print(f"Keras model size: {keras_model_size:.2f} KB")
print(f"TFLite model size: {tflite_model_size:.2f} KB")

Keras model size: 46.91 KB
TFLite model size: 4.50 KB


In [96]:
# Function to convert some hex values into an array for C programming
import time, sys

# Function to convert some hex values into an array for C programming
def hex_to_c_array(hex_data, var_name):
    c_str = ""

    # Create header guard
    c_str += '#ifndef ' + var_name.upper() + '_H\n'
    c_str += "#define " + var_name.upper() + '_H\n\n'

    c_str += "/*\n Author: Likhith S \n"
    c_str += " CAUTION: This is an auto generated file.\n DO NOT EDIT OR MAKE ANY CHANGES TO IT.\n"

# Time stamping of this model data in the generated file
    localtime = time.asctime( time.localtime(time.time()) )
    c_str += " This model data was generated on " + localtime+ '\n\n'
    print("This model data was generated on:", localtime)

# Add information about the verisons of tools and packages used in generating this header file
    c_str += " Tools used:\n Python:" + str(sys.version) + "\n Numpy:" + str(np.version.version) + \
          "\n TensorFlow:" + str(sys.version) + "\n Keras: "+ str(tf.keras.__version__) + "\n\n"
    print("Tools used: Python:", sys.version, "\n Numpy:", np.version.version, \
          "\n TensorFlow:", sys.version, "\n Keras: ", tf.keras.__version__, "\n\n")

# Training details of the model
    c_str += ' Model details are:\n'
    c_str += ' NUM_OF_EPOCHS = ' + str(NUM_OF_EPOCHS) + '\n'
    c_str += ' BATCH_SIZE    = ' + str(BATCH_SIZE) + '\n*/\n'
    
# Generate 'C' constants for the no. of nodes in each layer
    c_str += '\nconst int ' + 'DENSE1_SIZE' + ' = ' + str(DENSE1_SIZE) + ';\n'
    c_str +=   'const int ' + 'DENSE2_SIZE' + ' = ' + str(DENSE2_SIZE) + ';\n'      
    
    # Add array length at the top of the file
    c_str += '\nconst unsigned int ' + var_name + '_len = ' + str(len(hex_data)) + ';\n'

    # Declare C variable
    c_str += 'alignas(8) const unsigned char ' + var_name + '[] = {'
    hex_array = []
    for i, val in enumerate(hex_data):
        # Construct string from hex
        hex_str = format(val, '#04x')

        # Add formating so each line stays within 80 characters
        if (i + 1) < len(hex_data):
          hex_str += ','
        if (i + 1) % 12 == 0:
          hex_str += '\n'
        hex_array.append(hex_str)

    # Add closing brace
    c_str += '\n' + format(''.join(hex_array)) + '\n};\n\n'

    # Close out header guard
    c_str += '#endif //' + var_name.upper() + '_H'

    return c_str

In [97]:
# Write TFLite model to a C source (or header) file
with open("diabetes_model_esp32" + '.h', 'w') as file:
  file.write(hex_to_c_array(tflite_model, "diabetes_model_esp32"))

This model data was generated on: Mon Nov  4 10:14:02 2024
Tools used: Python: 3.7.1 (default, Oct 28 2018, 08:39:03) [MSC v.1912 64 bit (AMD64)] 
 Numpy: 1.21.5 
 TensorFlow: 3.7.1 (default, Oct 28 2018, 08:39:03) [MSC v.1912 64 bit (AMD64)] 
 Keras:  2.10.0 


