Gradient descent learning with multiple inputs

In [7]:
import numpy as np

In [None]:
# Dataset of size `(3, 4)`: 3 training examples, each with 4 features
xs = np.array([[8.50, 9.5, 9.9, 9.0],
               [0.65, 0.8, 0.8, 0.9],
               [1.20, 1.3, 0.5, 1.0]])

ys = np.array([1.0, 1.0, 0.0, 1.0])

# A single training example
x_i = xs[:, 0]
y_i = ys[0]

w = np.array([0.1, 0.2, -0.1])

# A simple linear model
def forward(x):
    y_hat = w.dot(x)
    return y_hat

y_hat = forward(x_i)
l = (y_hat - y_i) ** 2.0

# Here, we ignore the extra `2.0` scaling factor that would normally
# be a part of the "real" partial derivative of the loss function with
# respect to the weights, since the author doesn't include it
dl_dw =  (y_hat - y_i) * x_i # * 2.0?

# Our only hyperparameter: the learning rate
lr = 0.01

w -= lr * dl_dw
print("Weights:" + str(w))
print("Weight Deltas:" + str(dl_dw))

Now, with several steps of learning

In [None]:
xs = np.array([[8.50, 9.5, 9.9, 9.0],
               [0.65, 0.8, 0.8, 0.9],
               [1.20, 1.3, 0.5, 1.0]])

ys = np.array([1.0, 1.0, 0.0, 1.0])

x_i = xs[:, 0]
y_i = ys[0]

w = np.array([0.1, 0.2, -0.1])

def forward(x, w):
    y_hat = w.dot(x)
    return y_hat

# Hyperparameters
epochs = 50
lr = 0.001

for i in range(epochs): 
    # Forwards
    y_hat = forward(x_i, w)

    # Calculate loss
    l = (y_hat - y_i) ** 2.0

    # Backwards
    dl_dw =  (y_hat - y_i) * x_i # * 2.0?

    # Weight update
    w -= lr * dl_dw
    print('epoch: {} - loss: {}, prediction: {}'.format(i, l, y_hat))

Freezing one weight: what does it do?

In [None]:
xs = np.array([[8.50, 9.5, 9.9, 9.0],
               [0.65, 0.8, 0.8, 0.9],
               [1.20, 1.3, 0.5, 1.0]])

ys = np.array([1.0, 1.0, 0.0, 1.0])

x_i = xs[:, 0]
y_i = ys[0]

w = np.array([0.1, 0.2, -0.1])

def forward(x, w):
    y_hat = w.dot(x)
    return y_hat

# Hyperparameters: here, we can increase the learning rate,
# since we are "freezing" `w_0`
epochs = 50
lr = 0.3

for i in range(epochs): 
    # Forwards
    y_hat = forward(x_i, w)

    # Calculate loss
    l = (y_hat - y_i) ** 2.0

    # Backwards
    dl_dw =  (y_hat - y_i) * x_i # * 2.0?
    
    # "Freeze" any updates to `w_0`
    dl_dw[0] = 0.0
    
    # Weight update
    w -= lr * dl_dw
    print('epoch: {} - loss: {}, prediction: {}'.format(i, l, y_hat))

Gradient descent learning with multiple outputs and a single input

In [11]:
# Single input, 3 outputs
xs = np.array([0.65, 1.0, 1.0, 0.9])
ys = np.array([[0.1, 0.0, 0.0, 0.1],
               [1.0, 1.0, 0.0, 1.0],
               [0.1, 0.0, 0.1, 0.2]])

x_i = xs[0]
y_i = ys[:, 0]

w = np.array([0.3, 0.2, 0.9])

def forward(x):
    y_hat = w.dot(x)
    return y_hat

epochs = 50
lr = 0.1

for i in range(epochs): 
    # Forwards
    y_hat = forward(x_i)         # Shape `(3,)`
    
    # Calculate loss
    l = (y_hat - y_i) ** 2.0     # Shape `(3,)`
    l_total = np.sum(l)
    
    # Backwards
    dl_dw =  (y_hat - y_i) * x_i # * 2.0?
    
    # Weight update
    w -= lr * dl_dw
    print('epoch: {} - loss (total): {}, prediction: {}'.format(i, l_total, y_hat))

epoch: 0 - loss (total): 1.00115, prediction: [0.195 0.13  0.585]
epoch: 1 - loss (total): 0.918339940321875, prediction: [0.19098625 0.1667575  0.56450875]
epoch: 2 - loss (total): 0.8423795095543973, prediction: [0.18714208 0.201962   0.54488326]
epoch: 3 - loss (total): 0.7727021410703248, prediction: [0.18346033 0.2356791  0.52608694]
epoch: 4 - loss (total): 0.7087881317655766, prediction: [0.17993413 0.26797166 0.50808476]
epoch: 5 - loss (total): 0.6501607657458451, prediction: [0.17655691 0.29889986 0.49084318]
epoch: 6 - loss (total): 0.5963827586422256, prediction: [0.17332238 0.32852134 0.47433006]
epoch: 7 - loss (total): 0.5470529960350563, prediction: [0.17022451 0.35689131 0.45851461]
epoch: 8 - loss (total): 0.5018035416588289, prediction: [0.16725753 0.38406265 0.44336737]
epoch: 9 - loss (total): 0.4602968930732402, prediction: [0.1644159  0.41008601 0.4288601 ]
epoch: 10 - loss (total): 0.42222346433124297, prediction: [0.16169432 0.43500987 0.41496576]
epoch: 11 - l

Gradient descent learning with multiple inputs and outputs

In [29]:
# 3 inputs, 3 outputs
xs = np.array([[8.50, 9.5, 9.9, 9.0],  # Toes
               [0.65, 0.8, 0.8, 0.9],  # Win-loss record
               [1.20, 1.3, 0.5, 1.0]]) # Number of fans (millions)

ys = np.array([[0.1, 0.0, 0.0, 0.1],  # Hurt
               [1.0, 1.0, 0.0, 1.0],  # Win
               [0.1, 0.0, 0.1, 0.2]]) # Sad

x_i = xs[:, 0]
y_i = ys[:, 0]
print('x_i (shape): {}'.format(x_i.shape))
print('y_i (shape): {}'.format(y_i.shape))

w = np.array([[0.1, 0.1, -0.3],  # Hurt?
              [0.1, 0.2,  0.0],  # Win?
              [0.0, 1.3,  0.1]]) # Sad?
print('w (shape): {}'.format(w.shape))

def forward(x):
    y_hat = w.dot(x)
    return y_hat

epochs = 50
lr = 0.001

for i in range(epochs): 
    # Forwards:
    # y_hat[0] -> hurt?
    # y_hat[1] -> win?
    # y_hat[2] -> sad?
    y_hat = forward(x_i)         # Shape `(3,)`
    
    # Calculate loss
    l = (y_hat - y_i) ** 2.0     # Shape `(3,)`
    l_total = np.sum(l)
    
    # Backwards
    dl_dw =  np.outer((y_hat - y_i), x_i) # * 2.0?
    
    # Weight update
    w -= lr * dl_dw
    print('epoch: {} - loss (total): {}, prediction: {}'.format(i, l_total, y_hat))

x_i (shape): (3,)
y_i (shape): (3,)
w (shape): (3, 3)
epoch: 0 - loss (total): 0.9556500000000003, prediction: [0.555 0.98  0.965]
epoch: 1 - loss (total): 0.8192478418174456, prediction: [0.52127881 0.98148225 0.90089269]
epoch: 2 - loss (total): 0.7023146824910186, prediction: [0.49005679 0.98285465 0.84153653]
epoch: 3 - loss (total): 0.6020716663082419, prediction: [0.4611487  0.98412533 0.7865794 ]
epoch: 4 - loss (total): 0.5161365701276204, prediction: [0.43438307 0.98530184 0.73569529]
epoch: 5 - loss (total): 0.4424671910847189, prediction: [0.4096011  0.98639116 0.68858232]
epoch: 6 - loss (total): 0.37931281470327316, prediction: [0.38665579 0.98739975 0.64496101]
epoch: 7 - loss (total): 0.32517261007623827, prediction: [0.36541102 0.98833358 0.60457259]
epoch: 8 - loss (total): 0.27875996339988907, prediction: [0.34574074 0.98919821 0.56717745]
epoch: 9 - loss (total): 0.2389719022659645, prediction: [0.32752828 0.98999876 0.53255376]
epoch: 10 - loss (total): 0.2048628840

Example of the outer product

In [30]:
a = np.array([0, 1, 2])
b = np.array([3, 4, 5])
print('a (shape): {}'.format(a.shape))
print('b (shape): {}'.format(b.shape))

c = np.outer(a, b)
print('c (shape): {}'.format(c.shape)) # Should be `(3, 3)`
print(c)


a (shape): (3,)
b (shape): (3,)
c (shape): (3, 3)
[[ 0  0  0]
 [ 3  4  5]
 [ 6  8 10]]
