# Introduction

In [8]:
import numpy as np
import tensorflow as tf

np.set_printoptions(precision=2, suppress=True)

<div><img src="../../assets/01_ML_Concepts/LR_GD_1.png", width=500, height=100></div>

<div>
<img src="../../assets/01_ML_Concepts/LR_GD_2.png", width=500, height=100>
    </div>

<div>
<img src="../../assets/01_ML_Concepts/LR_GD_3.png", width=500, height=100>
    </div>

# Implementation

Let's implement Gradient Descent using the derived gradients

## Synthetic Data

In [9]:
nb_samples = 100 # n in the whiteboard
nb_features = 3 # d in the whiteboard

# generate three random vectors 
x_1 = np.random.normal(0, 0.1, size=nb_samples)
x_2 = np.random.normal(0, 0.3, size=nb_samples)
x_3 = np.random.normal(0, 0.2, size=nb_samples)

# take their linear combination according to vector theta to generate y
noise = np.random.normal(0, 0.01, size=nb_samples) # some gaussian noise
noise = 0
y = 0.3*x_1 + 0.2*x_2 + 0.1*x_3 + noise # number of parameters corresponidng to nb_features
y = y.reshape(-1, 1) # this is important for tf

# create the design matrix X
X = np.stack([x_1, x_2, x_3], axis=1)

# sanity check the dimensions of our matrices
print(f'X shape = {X.shape}, y shape = {y.shape}')

X shape = (100, 3), y shape = (100, 1)


## Numpy

In [10]:
def compute_gradient(y, theta, X):
    '''
    Implement the gradient as shown from the derivation
    '''
    gradient = (-y.T @ X) + (theta.T @ X.T @ X)
    return gradient.T


def compute_loss(y_pred, y_true):
    '''
    Compute the loss function
    '''
    return np.sum(np.square(y_true - y_pred))

In [11]:
# constants and parameters
learning_rate = 0.01
epochs = 500

# initialize theta
theta = np.random.normal(0, 1, size=nb_features).reshape(-1, 1)

for epoch in range(epochs):
    # compute gradients
    gradient = compute_gradient(y, theta, X)
    
    # update variables
    theta -= learning_rate * gradient
    
    # compute current loss
    y_pred = X @ theta
    loss = compute_loss(y_pred, y)
    if epoch % 100 == 0:
        print(f'Epoch = {epoch} loss = {loss:.2f}')
        
print(f'Final Theta: {theta.squeeze()}')

Epoch = 0 loss = 96.44
Epoch = 100 loss = 0.00
Epoch = 200 loss = 0.00
Epoch = 300 loss = 0.00
Epoch = 400 loss = 0.00
Final Theta: [0.3 0.2 0.1]


Nice!

## Tensorflow 2.0


In [12]:
def train_epoch(X, y, model, loss_function):
    '''
    One Training Epoch
    '''
    with tf.GradientTape() as tape:
        # apply the model
        y_pred = model.predict(X)
        
        # compute current loss
        current_loss = loss_function(y_pred, y)
        
    # automatic differentiation step
    gradients = tape.gradient(current_loss, model.variables)

    # apply gradients
    grads_and_vars = zip(gradients, model.variables)
    optimizer.apply_gradients(grads_and_vars)

    return current_loss


def train(data, target, model, loss_function, epochs, verbose=True):
    '''
    Full training pipeline
    '''
    for epoch in range(epochs):
        current_loss = train_epoch(data, target, model, loss_function)
        if epoch % 1000 == 0:
            print(f'Epoch {epoch}, current loss {current_loss:.2f}')

In [13]:
# constants and parameters
learning_rate = 0.01
epochs = 500

# define the model
class LinearModel:
    
    def __init__(self, nb_features):
        self.nb_features = nb_features
        self._init_variables()
        
        
    def _init_variables(self):
        theta = tf.random.normal(shape=(self.nb_features, 1))
        self.theta = tf.Variable(theta, trainable=True, dtype=tf.float32)
        self.variables = [self.theta]
        
    
    def predict(self, X):
        theta = self.variables[0]
        return tf.matmul(X, theta)


# define the loss function
def l2_loss_function(y_pred, y_true):
    return tf.reduce_sum(tf.square(y_pred - y_true))


# define the data
data = tf.constant(X, dtype=tf.float32)
target = tf.constant(y.reshape(-1, 1), dtype=tf.float32)


# initialize the model
model = LinearModel(nb_features)


# choose an optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)


# train
train(data, target, model, l2_loss_function, epochs, verbose=True)

# print results
theta_tensor = model.variables[0]
print(f'Final Theta: {theta_tensor.numpy().squeeze()}')

Epoch 0, current loss 43.76
Final Theta: [0.3 0.2 0.1]


Finally, let's wrap this procedure in a function