In [19]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt

In [20]:
def sigmoid(z):
    g = 1/(1+np.exp(-z))
    return g

In [21]:
# z_tmp = np.arange(-10,11)
# y = sigmoid(z_tmp)
# np.set_printoptions(precision=3) 
# print(np.c_[z_tmp, y])

In [22]:
# fig, ax = plt.subplots(1, 1, figsize = (5,3))
# ax.plot(z_tmp, y, c="r")
# ax.set_title("Sigmoid function")
# ax.set_ylabel('sigmoid(z)')
# ax.set_xlabel('z')

In [23]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [24]:
def compute_cost(X, y, w, b, *argv):
    m, n = X.shape
    loss_sum = 0
    cost = 0.0
    z_wb = 0
    for i in range(m):
        #for j in range(n):
        z_wb += (np.dot(X[i], w) + b)
        
        f_wb = sigmoid(z_wb)
        loss = -y[i]*np.log(f_wb)-(1-y[i])*np.log(1-f_wb)
        loss_sum += loss
    cost = (1/m)*loss_sum
    return cost

In [25]:
m, n = X_train.shape
initial_w = np.zeros(n)
initial_b = 0.
cost = compute_cost(X_train, y_train, initial_w, initial_b)
cost

0.6931471805599452

In [26]:
test_w = np.array([0.2, 0.2])
test_b = -24.
cost = compute_cost(X_train, y_train, test_w, test_b)
cost

58.666666666676065

In [27]:
def compute_gradient(X, y, w, b, *argv):
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    z_wb = 0
    for i in range(m):
        z_wb += (np.dot(X[i], w) + b)
        f_wb = sigmoid(z_wb)
        err = f_wb - y[i]
        dj_db += err
        for j in range(n):
            dj_dw[j] += err * X[i, j]
    dj_dw /= m
    dj_db /= m    
    return dj_db, dj_dw

In [28]:
dj_db, dj_dw = compute_gradient(X_train, y_train, initial_w, initial_b)
print(f'dj_db at initial w and b (zeros):{dj_db}' )
print(f'dj_dw at initial w and b (zeros):{dj_dw.tolist()}' )

dj_db at initial w and b (zeros):0.0
dj_dw at initial w and b (zeros):[-0.25, -0.16666666666666666]


In [29]:
test_w = np.array([ 0.2, -0.5])
test_b = -24
dj_db, dj_dw  = compute_gradient(X_train, y_train, test_w, test_b)

print('dj_db at test w and b:', dj_db)
print('dj_dw at test w and b:', dj_dw.tolist())

dj_db at test w and b: -0.49999999999671535
dj_dw at test w and b: [-0.9999999999983578, -0.8333333333284063]


In [35]:
def gradient_descent(X, y, w, b, cost_function, gradient_function, alpha, num_iters, lambda_):
    m = len(X)
    J_history = []
    w_history = []
#     w = copy.deepcopy(w_in)
#     b = b_in
    
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X, y, w, b, lambda_)
        
        w -= alpha * dj_dw
        b -= alpha * dj_db        
        
        if i<100000:
            J_history.append(cost_function(X, y, w, b, lambda_))
            
        if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
            w_history.append(w)
            print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f}   ")
            
    return w, b, J_history, w_history

In [36]:
np.random.seed(1)
initial_w = 0.01 * (np.random.rand(2) - 0.5)
initial_b = -8
print(initial_w, initial_b)
# Some gradient descent settings
iterations = 10000
alpha = 0.001

w,b, J_history,_ = gradient_descent(X_train ,y_train, initial_w, initial_b, 
                                   compute_cost, compute_gradient, alpha, iterations, 0)

[-0.00082978  0.00220324] -8
Iteration    0: Cost    19.99   
Iteration 1000: Cost    12.55   
Iteration 2000: Cost     5.13   
Iteration 3000: Cost     0.38   
Iteration 4000: Cost     0.13   
Iteration 5000: Cost     0.10   
Iteration 6000: Cost     0.09   
Iteration 7000: Cost     0.08   
Iteration 8000: Cost     0.08   
Iteration 9000: Cost     0.07   
Iteration 9999: Cost     0.07   
