In [1]:
# for array computations and loading data
import numpy as np

# for building linear regression models and preparing data
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# for graphing and visualization
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

# for building an training neural networks
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# reduce display precision on numpy arrays
np.set_printoptions(precision = 2)

# surpress warnings
tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

2024-06-18 13:44:21.195502: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Load the dataset from a text file
data = np.loadtxt('./diabetes.csv', delimiter=',')

# Split the inputs and outputs into separate arrays
x = data[:,:-1]
y = data[:,-1]

# Convert y into 2-D because the commands later will require it (x is already 2-D)
y = np.expand_dims(y, axis=1)

print(f"the shape of the inputs x is: {x.shape}")
print(f"the shape of the targets y is: {y.shape}")

the shape of the inputs x is: (100000, 3)
the shape of the targets y is: (100000, 1)


In [3]:
# Get 60% of the dataset as the training set. Put the remaining 40% in temporary variables.
X_train, x_, y_train, y_ = train_test_split(x, y, test_size=0.40, random_state=1)

# Split the 40% subset above into two: one half for cross validation and the other for the test set
X_cv, X_test, y_cv, y_test = train_test_split(x_, y_, test_size=0.50, random_state=1)

# Delete temporary variables
del x_, y_

print(f"the shape of the training set (input) is: {X_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {X_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n")
print(f"the shape of the test set (input) is: {X_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

the shape of the training set (input) is: (60000, 3)
the shape of the training set (target) is: (60000, 1)

the shape of the cross validation set (input) is: (20000, 3)
the shape of the cross validation set (target) is: (20000, 1)

the shape of the test set (input) is: (20000, 3)
the shape of the test set (target) is: (20000, 1)


In [4]:
# Initiating class to scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_cv_scaled = scaler.transform(X_cv)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Build the model
tf.random.set_seed(20)

model = Sequential(
    [
        Dense(25, activation = 'relu'),
        Dense(15, activation = 'relu'),
        Dense(1, activation = 'linear')
    ],
)

In [10]:
# Setup the loss and optimizer
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
)

print(f"Training Model...")

# Train the model
model.fit(
    X_train_scaled, y_train,
    epochs=20,
    verbose=0
)
    
print("Done!\n")

# Set the threshold for classification
threshold = 0.5

# Record the fraction of misclassified examples for the training set
yhat = model.predict(X_train_scaled)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >= threshold, 1, 0)
training_error = np.mean(yhat != y_train)

# Record the fraction of misclassified examples for the cross validation set
yhat = model.predict(X_cv_scaled)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >= threshold, 1, 0)
cv_error = np.mean(yhat != y_cv)

# Record the fraction of misclassified examples for the test set
yhat = model.predict(X_test_scaled)
yhat = tf.math.sigmoid(yhat)
yhat = np.where(yhat >= threshold, 1, 0)
test_error = np.mean(yhat != y_test)

# Print the result
print(
    f"Training Set Classification Error: {training_error:.5f}\n" +
    f"CV Set Classification Error: {cv_error:.5f}\n" +
    f"Test Set Classification Error: {test_error:.5f}"
)


Training Model...
Done!

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 955us/step
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 996us/step
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 949us/step
Training Set Classification Error: 0.02803
CV Set Classification Error: 0.02835
Test Set Classification Error: 0.02820


In [11]:
def prediction(bmi, A1c, glucose):
    testArray = np.array([
        [bmi, A1c, glucose],
    ])
    testArray_scaled = scaler.transform(testArray)
    output = model.predict(testArray_scaled)
    output = tf.math.sigmoid(output)
    output = np.where(output >= threshold, 1, 0) 
    final = ','.join(str(item) for innerlist in output for item in innerlist)
    return final

In [17]:
print(prediction(27.32,6.5,100))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
0


In [20]:
model.export("./saved_models/")

Saved artifact at './saved_models/'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  140476573622512: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140476573707424: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140476574559392: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140476574608192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140476574681744: TensorSpec(shape=(), dtype=tf.resource, name=None)
  140476574951904: TensorSpec(shape=(), dtype=tf.resource, name=None)
