In [None]:
def calculate_model_equation(x1, w1, x2, w2, b):
    return x1 * w1 + x2 * w2 + b

In [None]:
import numpy as np
def sigmoid_function_to_calc_prob(z):
    return 1 / (1 + np.exp((-z)))

In [None]:
def calculate_loss_wrt_weights(y, p, x):
    loss_wrt_prob = -(y / p - (1 - y) / (1 - p))
    sigmoid_derivative = p * (1 - p)
    linear_part = x
    return loss_wrt_prob * sigmoid_derivative * linear_part

In [None]:
def calculate_loss_wrt_bias(p, y):
    loss_wrt_prob = -(y / p - (1 - y) / (1 - p))
    sigmoid_derivative = p * (1 - p)
    return loss_wrt_prob * sigmoid_derivative

In [None]:
import pandas as pd
df = pd.DataFrame({"study hours" : [6, 7, 8, 9, 5, 6, 2, 0, 11], "not study hours" : [4, 7, 3, 8, 5, 5, 2, 5, 1], "pass or fail" : [1, 0, 1, 0, 0, 1, 0, 0, 1]},
    columns=["study hours", "not study hours", "pass or fail"]
)
df

In [None]:
# Logistic regression is a supervised machine learning algorithm used for binary classification.
# It models the probability of an event occurring, where the predicted probability lies between 0 and 1. 
# For each data point (row), the model follows these steps:

# loss_function = -[ylog(p) + (1-y)log(1-p)] (single data point (each row))
# This loss represents the error energy between the predicted probability and the true label.

# Since the loss is not directly dependent on the weights, we apply the chain rule:

# error energy between probability and truth

# derivative:
# delta l / delta w (l is not directly dependent on w)

# chain rule: delta l / delta w = (delta l / delta p) . (delta p / delta z) . (delta z / delta w)

# a. loss wrt p: delta l / delta p = -(y/p - (1-y) / (1-p))
# b. sigmoid derivative: delta p / delta z = p(1-p)
# c. linear part: delta z / delta w = x


# delta l / delta w = (p - y)x
# delta l / delta b = (p - y)

# update rule(learning) = w new = w - eta(p-y)x
# b new = b - eta(p-y)

# eta is learning rate
# eta : if it is larger (minimum miss, loss oscillate,  learning fail)
# eta : if it smaller (learning slow)


# eta: no fixed value
# scaled data = 0.01, 0.05, 0.1
# unscaled data = very small etaâ€‹

In [None]:
def model_learning():
    w1, w2, b = 0, 0 ,0
    epoch = 100
    eta = 0.01

    for i in range(epoch):
        for idx, raw in df.iterrows():
            gradients = None
            x1, x2, y = raw.to_list()
            z = calculate_model_equation(x1, w1, x2, w2, b)
            p = sigmoid_function_to_calc_prob(z)
            p = min(max(p, 1e-7), 1 - 1e-7)
            # error_energy = p - y

            gradients = calculate_loss_wrt_weights(y = y, p = p, x = x1)
            w1 = w1 - eta * gradients
            gradients = None
            gradients = calculate_loss_wrt_weights(y = y, p = p, x = x2)
            w2 = w2 - eta * gradients
            gradients = None
            gradients = calculate_loss_wrt_bias(p = p, y = y)
            b = b - eta * gradients

            # print(f"weight 1: {round(w1, 2)}\tweight 2: {round(w2, 2)}\tbias : {round(b, 2)}")

    print(f"weight 1: {round(w1, 2)}\tweight 2: {round(w2, 2)}\tbias : {round(b, 2)}")

            


In [None]:
model_learning()

In [None]:
def model_testing():
    x1, x2 = 2, 5
    z = calculate_model_equation(x1=x1, w1=0.97, x2=x2, w2=-1.21, b=-0.05)
    p = sigmoid_function_to_calc_prob(z)
    print(f"probability: {round(p, 2)}")
model_testing()