In [11]:
import numpy as np
import matplotlib.pyplot as plt
import math,copy



In [2]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [7]:
def sigmoid(z):
    g=1/(1+np.exp(-z))
    return g

In [13]:
def compute_logistic_cost(x,y,w,b):
    m=x.shape[0]
    cost=0.0
    for i in range(m):
        z_i=np.dot(x[i],w)+b
        f_wb_i=sigmoid(z_i)
        cost+=-y[i]*np.log(f_wb_i) - (1-y[i])*np.log(1-f_wb_i)
    cost=cost/m
    return cost


In [16]:
def compute_gradient(X,y,w,b):
    m,n=X.shape
    dj_dw=np.zeros((n,))
    dj_db=0.
    for i in range(m):
        f_wb_i=sigmoid(np.dot(X[i],w+b))
        err_i=f_wb_i-y[i]
        for j in range(n):
            dj_dw[j]=dj_dw[j]+err_i*X[i,j]
        dj_db=dj_db+err_i
    dj_dw=dj_dw/m
    dj_db=dj_db/m
    return dj_dw,dj_db

In [20]:
X_tmp = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_tmp = np.array([0, 0, 0, 1, 1, 1])
w_tmp = np.array([2.,3.])
b_tmp = 1.
dj_db_tmp, dj_dw_tmp = compute_gradient(X_tmp, y_tmp, w_tmp, b_tmp)
print(f"dj_db: {dj_db_tmp}" )
print(f"dj_dw: {dj_dw_tmp.tolist()}" )

dj_db: [0.49941779 0.49958225]
dj_dw: 0.49950253243962756


In [21]:
def gradient_descent(X, y, w_in, b_in, alpha, num_iters): 
    """
    Performs batch gradient descent
    
    Args:
      X (ndarray (m,n)   : Data, m examples with n features
      y (ndarray (m,))   : target values
      w_in (ndarray (n,)): Initial values of model parameters  
      b_in (scalar)      : Initial values of model parameter
      alpha (float)      : Learning rate
      num_iters (scalar) : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,))   : Updated values of parameters
      b (scalar)         : Updated value of parameter 
    """
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters
        dj_db, dj_dw = compute_gradient(X, y, w, b)   

        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               
        b = b - alpha * dj_db               
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_logistic_cost(X, y, w, b) )

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")
        
    return w, b, J_history   

In [22]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

Iteration    0: Cost [0.6932253 0.6931819]   
Iteration 1000: Cost [10.68340246 10.84728451]   
Iteration 2000: Cost [21.54558419 21.7095211 ]   
Iteration 3000: Cost [32.40796216 32.57189909]   
Iteration 4000: Cost [43.27034019 43.43427712]   
Iteration 5000: Cost [54.13271822 54.29665515]   
Iteration 6000: Cost [64.99509625 65.15903319]   
Iteration 7000: Cost [75.85747428 76.02141122]   
Iteration 8000: Cost [86.71985231 86.88378925]   
Iteration 9000: Cost [97.58223034 97.74616728]   

updated parameters: w:[-81.42742003 -81.42742003], b:[81.69971478 81.37184091]
