In [10]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random


seed_constant = 27
np.random.seed(seed_constant)
random.seed(seed_constant)
tf.random.set_seed(seed_constant)

In [11]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [12]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = df.iloc[:, :-1].to_numpy()
y = df.iloc[:, [-1]].to_numpy()

X = X.astype(np.float32)
y = y.astype(np.float32)

# Standardize the data
ss = StandardScaler()
X = ss.fit_transform(X)

# Split with training and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

print(X_train.shape, y_train.shape)
print(X_train)

(460, 8) (460, 1)
[[-0.84488505  2.4444783   0.35643175 ...  1.3843619   2.784923
  -0.9564617 ]
 [-1.1418515   2.413181   -0.16054575 ...  1.1812909   0.09097707
  -0.44593516]
 [ 1.827813    1.4742666   0.25303626 ...  0.7624575   0.1966813
   0.06459136]
 ...
 [ 0.04601434 -1.1546935   1.0802002  ...  0.95283633 -0.9449236
  -0.70119846]
 [-0.25095212  1.5055639   0.25303626 ... -0.26558846 -0.61573064
  -0.1906719 ]
 [-0.84488505 -0.18448201  0.04624525 ...  0.33093184  0.1725203
  -0.10558415]]


In [13]:
dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(len(X_train))

In [14]:
W = tf.Variable(tf.random.normal((8, 1)), name='weight')
b = tf.Variable(tf.random.normal((1,)), name='bias')

In [15]:
# Sigmoid Function
def sigmoid(X):
    h = tf.divide(1., 1. + tf.exp(-(tf.matmul(X, W) + b)))
    return h

# Loss function 
def loss_fn(h, y):
    cost = -tf.reduce_mean(y * tf.math.log(h) + (1 - y) * tf.math.log(1 - h))
    return cost

# Accuracy function for decision boundary
def accuracy_fn(h, y):
    y_hat = tf.cast(h > 0.5, dtype=tf.float32)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(y_hat, y), dtype=tf.int32))
    return accuracy

# Gradient function
def grad(X, y):
    with tf.GradientTape() as tape:
        h = sigmoid(X)
        loss = loss_fn(h, y)
    return tape.gradient(loss, [W, b])

In [16]:
#HANYA BOLEH RUBAH EPOCH DAN LEARNING RATE
EPOCH = 1000
LR = 0.1

# Optimizer (Stochastic Gradient Descent)
optimizer = tf.keras.optimizers.SGD(learning_rate=LR)

for e in range(EPOCH):
    for x, y in iter(dataset.batch(len(X_train))):
        h = sigmoid(x)
        grads = grad(x, y)
        optimizer.apply_gradients(grads_and_vars=zip(grads, [W, b]))
        
        if e % 100 == 0:
            print('Epoch: {}, Loss: {:.4f}'.format(e, loss_fn(h, y)))

Epoch: 0, Loss: 0.7047
Epoch: 100, Loss: 0.5197
Epoch: 200, Loss: 0.4863
Epoch: 300, Loss: 0.4772
Epoch: 400, Loss: 0.4747
Epoch: 500, Loss: 0.4740
Epoch: 600, Loss: 0.4738
Epoch: 700, Loss: 0.4737
Epoch: 800, Loss: 0.4737
Epoch: 900, Loss: 0.4737


In [17]:
y_hat = tf.cast(sigmoid(X_test) > 0.5, dtype=tf.int32)
# print('Test Result = {}'.format(tf.cast(sigmoid(X_test) > 0.5, dtype=tf.int32)))
print('Test Accuracy: {:.4f}'.format(np.sum(y_test.reshape(-1) == y_hat.numpy().reshape(-1)) / y_test.shape[0]))

Test Accuracy: 0.7630
