In [281]:
# Import
import numpy as np

In [282]:
# Random data input X
N = 100     # Samples
D = 2   # Features

X = np.random.randn(N,D)

In [283]:
# Center the first 50 points at (-2,-2)
X[:50,:] = X[:50,:] - 2*np.ones((50,D))

# Center the last 50 points at (2, 2)
X[50:,:] = X[50:,:] + 2*np.ones((50,D))

In [284]:
# Labels: first 50 are 0, last 50 are 1 (Target data output - T)
T = np.array([0]*50 + [1]*50)
T

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [285]:
# Add a column of ones for w0 = b (bias/intercept)
ones = np.ones((N, 1))
Xb = np.concatenate((ones, X), axis=1)

In [286]:
Xb
# w0, w1, w2

array([[ 1.        , -0.17964464, -2.74006488],
       [ 1.        , -0.8595939 , -3.44864218],
       [ 1.        , -0.91669062, -0.08933613],
       [ 1.        , -0.61507475, -0.37446307],
       [ 1.        , -2.73086598, -1.11803964],
       [ 1.        , -1.61716679, -1.31473427],
       [ 1.        , -1.34023225, -1.59978416],
       [ 1.        , -2.74746607, -0.85431239],
       [ 1.        , -0.8461826 , -1.27007798],
       [ 1.        , -2.98078627, -1.86202269],
       [ 1.        , -1.54918211, -1.01223231],
       [ 1.        , -2.15795734, -0.1819301 ],
       [ 1.        , -1.30211371, -2.92612827],
       [ 1.        , -0.38087954, -0.40719605],
       [ 1.        , -2.46052196, -3.94264879],
       [ 1.        , -2.35613523, -2.01846934],
       [ 1.        , -0.77871036, -1.97956655],
       [ 1.        , -1.78594181, -0.39963399],
       [ 1.        , -2.14486799, -3.39460996],
       [ 1.        , -2.2810591 , -1.71493845],
       [ 1.        , -1.91924545, -1.502

In [287]:
# Randomly initialize the weights
w = np.random.randn(D + 1)

In [288]:
# Calculate the model output
z = Xb.dot(w)

In [289]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [290]:
# Output
Y = sigmoid(z)
Y

array([0.84143933, 0.84914861, 0.48958032, 0.56052874, 0.41782137,
       0.56861847, 0.63412618, 0.38316069, 0.64639243, 0.48523841,
       0.53744923, 0.3641476 , 0.77828994, 0.59065418, 0.77854655,
       0.57587421, 0.73147795, 0.43157319, 0.75326834, 0.5454758 ,
       0.55878087, 0.58017721, 0.32382538, 0.72630619, 0.57204882,
       0.67857922, 0.70782062, 0.6815071 , 0.28712457, 0.89224604,
       0.49977269, 0.74808986, 0.49199077, 0.64962902, 0.47368263,
       0.48191673, 0.51897855, 0.43845976, 0.72343392, 0.77653404,
       0.56293135, 0.68542371, 0.72528343, 0.62006634, 0.76429625,
       0.54944898, 0.60674425, 0.47156331, 0.39750417, 0.86917062,
       0.75178811, 0.45140408, 0.44130565, 0.59567734, 0.67083537,
       0.57732422, 0.62249788, 0.62229727, 0.43638415, 0.77868983,
       0.3072128 , 0.46099034, 0.78388703, 0.52635166, 0.34161866,
       0.25968829, 0.6341688 , 0.3556029 , 0.51443239, 0.70880901,
       0.44992461, 0.48785039, 0.63163328, 0.71400313, 0.65018

In [291]:
# Calculate the cross-entropy error (Cost function/Objective function)
def cross_entropy(T, Y):
    E = 0
    for i in range(len(T)):
        if T[i] == 1:
            E -= np.log(Y[i])
        else:
            E -= np.log(1 - Y[i])
    return E

In [292]:
# Function optimization - Gradient Descent (100 times)
learning_rate = 0.1
for i in range(100):

    # Displays the current value of the cost function (cross-entropy loss) every 10 iterations.
    if i % 10 == 0:
        print(cross_entropy(T, Y))

    # gradient descent weight udpate with regularization
    w += learning_rate * ( Xb.T.dot(T - Y) - 0.1*w )

    # recalculate Y
    Y = sigmoid(Xb.dot(w))

print("Final w:", w)

84.16255650859462
8.676862877590482
5.6900514522154175
3.179569747666161
1.6943264883438613
1.3139890871144753
1.2755545315919867
1.310651095066485
1.3754534352536
1.4490860669274002
Final w: [1.47959999 4.8283767  1.76203136]
