When we differentiate Sigmoid, ~0.25 is the max value we can get. This causes **Vanishing Gradient** problem, 
which after going through lists of hidden layers (during back propagation) gets infinitely close to 0.

Therefore, to solve this problem we use **ReLU**
where: 

- ReLU(x) <= 0, return 0
- ReLU(x) > 0, return x

In [5]:
import tensorflow as tf
import numpy as np

tf.random.set_seed(0)
np.random.seed(0)

# load data
data_set = np.loadtxt("ThoraricSurgery.csv", delimiter=",")
x_data = data_set[:, 0:17].astype(np.float32)
y_data = data_set[:, [17]].astype(np.float32)

# set weights & bias
w1 = tf.Variable(tf.random.normal([17, 30]))
b1 = tf.Variable(tf.random.normal([30]))
w2 = tf.Variable(tf.random.normal([30, 1]))
b2 = tf.Variable(tf.random.normal([1]))

# forward prop
def forward(x):
    hidden = tf.sigmoid(tf.matmul(x, w1) + b1)
    hypothesis = tf.sigmoid(tf.matmul(hidden, w2) + b2)
    return hypothesis

# loss func
def loss_fn(x, y):
    hypothesis = forward(x)
    cost = -tf.reduce_mean(y * tf.math.log(hypothesis + 1e-7) + 
                           (1 - y) * tf.math.log(1 - hypothesis + 1e-7))
    return cost

# acc func
def accuracy(y_true, y_pred):
    predicted_class = tf.cast(y_pred > 0.5, dtype=tf.float32)
    return tf.reduce_mean(tf.cast(tf.equal(y_true, predicted_class), dtype=tf.float32))

# optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

# train
for epoch in range(10001):
    with tf.GradientTape() as tape:
        loss = loss_fn(x_data, y_data)
    gradients = tape.gradient(loss, [w1, b1, w2, b2])
    optimizer.apply_gradients(zip(gradients, [w1, b1, w2, b2]))

    if epoch % 1000 == 0:
        pred = forward(x_data)
        acc_val = accuracy(y_data, pred)
        print(f"epoch = {epoch}, cost = {loss.numpy():.4f}, accuracy = {acc_val.numpy():.4f}")

# final Acc
predictions = forward(x_data)
predicted_class = tf.cast(predictions > 0.5, dtype=tf.float32)
acc = tf.reduce_mean(tf.cast(tf.equal(predicted_class, y_data), dtype=tf.float32))
print(f"\n Final Accuracy: {acc.numpy():.4f}")

# Pred New Data
new_data = np.array([[132, 2, 2.12, 1.72, 1, 0, 0, 0, 0, 0, 12, 0, 0, 0, 1, 0, 74]], dtype=np.float32)
new_prediction = forward(new_data)
new_predicted_class = tf.cast(new_prediction > 0.5, dtype=tf.float32)

print("\nNew Data Prediction (Probability):", new_prediction.numpy())
print("New Data Predicted Class:", new_predicted_class.numpy())


epoch = 0, cost = 1.8352, accuracy = 0.7574
epoch = 1000, cost = 0.4128, accuracy = 0.8511
epoch = 2000, cost = 0.4071, accuracy = 0.8511
epoch = 3000, cost = 0.4032, accuracy = 0.8511
epoch = 4000, cost = 0.3985, accuracy = 0.8511
epoch = 5000, cost = 0.3958, accuracy = 0.8532
epoch = 6000, cost = 0.3921, accuracy = 0.8574
epoch = 7000, cost = 0.3938, accuracy = 0.8574
epoch = 8000, cost = 0.3874, accuracy = 0.8532
epoch = 9000, cost = 0.3829, accuracy = 0.8574
epoch = 10000, cost = 0.3788, accuracy = 0.8553

 Final Accuracy: 0.8553

New Data Prediction (Probability): [[0.1960173]]
New Data Predicted Class: [[0.]]
