In [32]:
import numpy as np

In [33]:
def sigmoid(z):
  return 1.0 / (1 + np.exp(-z))

In [34]:
def predict(features, weights):
  '''
  Returns 1D array of probabilities
  that the class label == 1
  '''
  z = np.dot(features, weights)
  print('W.X',z)
  return sigmoid(z)

In [35]:
def update_weights(features, labels, weights, lr):
    '''
    Vectorized Gradient Descent

    Features:(200, 3)
    Labels: (200, 1)
    Weights:(3, 1)
    '''
    N = len(features)

    #1 - Get Predictions
    predictions = predict(features, weights)
    print('S(W.T)', predictions)

    print('S(W.T) - Y', predictions - labels)
    #2 Transpose features from (200, 3) to (3, 200)
    # So we can multiply w the (200,1)  cost matrix.
    # Returns a (3,1) matrix holding 3 partial derivatives --
    # one for each feature -- representing the aggregate
    # slope of the cost function across all observations
    gradient = np.dot(features.T,  predictions - labels)

    #3 Take the average cost derivative for each feature
    gradient /= N

    #4 - Multiply the gradient by our learning rate
    gradient *= lr
    print('g', gradient)

    #5 - Subtract from our weights to minimize cost
    weights -= gradient

    return weights


In [36]:
features = [
    [1,0],
    [1,1],
    [1,0.4],
    [1,-1.2],
    [1,-0.1]
]
features = np.array(features)

In [37]:
labels = [
    1,
    -1,
    1,
    -1,
    1
]

In [38]:
w0 = [1, 0.5]

In [39]:
w1 = update_weights(features, labels, w0,0.001)
w1

W.X [1.   1.5  1.2  0.4  0.95]
S(W.T) [0.73105858 0.81757448 0.76852478 0.59868766 0.72111518]
S(W.T) - Y [-0.26894142  1.81757448 -0.23147522  1.59868766 -0.27888482]
g [ 5.27392135e-04 -3.31104641e-05]


array([0.99947261, 0.50003311])

In [40]:
w2 = update_weights(features, labels, w1,0.001)
w2

W.X [0.99947261 1.49950572 1.19948585 0.39943288 0.9494693 ]
S(W.T) [0.73095487 0.81750074 0.76843331 0.59855139 0.72100844]
S(W.T) - Y [-0.26904513  1.81750074 -0.23156669  1.59855139 -0.27899156]
g [ 5.27289751e-04 -3.30976901e-05]


array([0.99894532, 0.50006621])

In [41]:
w3 = update_weights(features, labels, w2,0.001)
w3

W.X [0.99894532 1.49901153 1.1989718  0.39886587 0.9489387 ]
S(W.T) [0.73085117 0.817427   0.76834182 0.59841514 0.72090169]
S(W.T) - Y [-0.26914883  1.817427   -0.23165818  1.59841514 -0.27909831]
g [ 5.27187365e-04 -3.30849217e-05]


array([0.99841813, 0.50009929])

In [42]:
w4 = update_weights(features, labels, w3,0.001)
w4

W.X [0.99841813 1.49851742 1.19845785 0.39829898 0.9484082 ]
S(W.T) [0.73074745 0.81735325 0.76825033 0.5982789  0.72079494]
S(W.T) - Y [-0.26925255  1.81735325 -0.23174967  1.5982789  -0.27920506]
g [ 5.27084975e-04 -3.30721590e-05]


array([0.99789105, 0.50013237])

In [43]:
w5 = update_weights(features, labels, w4,0.001)
w5

W.X [0.99789105 1.49802341 1.19794399 0.39773221 0.94787781]
S(W.T) [0.73064373 0.81727949 0.76815883 0.59814268 0.72068819]
S(W.T) - Y [-0.26935627  1.81727949 -0.23184117  1.59814268 -0.27931181]
g [ 5.26982583e-04 -3.30594019e-05]


array([0.99736406, 0.50016542])