In [None]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

# Load the data

In [None]:
import datetime
from helpers import *

height, weight, gender = load_data(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)

In [None]:
y.shape, tx.shape

# 1 Computing the Cost Function

Fill in the `compute_loss` function below:
<a id='compute_loss'></a>


In [None]:
def compute_loss(y, tx, w):
    """Calculate the loss.
    You can calculate the loss using mse or mae.
    """
    # ***************************************************
    # INSERT YOUR CODE HERE
    #print("Y shape" , y.shape, "\n\n")
    #print("w shape" , w.shape, "\n\n")
    #print("TX shape" , tx.shape, "\n\n")
    mse = np.dot( (y-np.dot(tx, w)).T, y-np.dot(tx, w))/ (2*y.shape[0])
    # TODO: compute loss by MSE
    # ***************************************************
    #raise NotImplementedError
    return mse # THEOADDED

#y = np.array([4,7,6,5]).T
#w = np.array([3,1]).T
#tx = np.array([[1,1,1,1], [1,2,3,4]]).T

w = np.array([1,2]).T
#print("w shape" , w.shape, "\n\n")
print(compute_loss(y, tx, w))

# 2 Grid Search

Fill in the function `grid_search()` below:

In [None]:
def grid_search(y, tx, w0, w1):
    """Algorithm for grid search."""
    losses = -1*np.ones((len(w0), len(w1))) #set to -1 as losses can only be positive
    # ***************************************************
    # INSERT YOUR CODE HERE
    w0_star, w1_star, loss_star = -1, -1, -1
    for w0_i in range(w0.shape[0]):
        for w1_i in range(w1.shape[0]):
            losses[w0_i][w1_i] = compute_loss(y, tx, np.array([w0[w0_i],w1[w1_i]]).T)
            if loss_star == -1 or loss_star > losses[w0_i][w1_i] :
                loss_star = losses[w0_i][w1_i]
                w0_star = w0[w0_i]
                w1_star = w1[w1_i]
    #print(loss_star, w0_star, w1_star)
    # TODO: compute loss for each combination of w0 and w1.
    # ***************************************************
    #raise NotImplementedError
    return losses

#grid_search(y, tx, np.array([3,4,5]).T, np.array([1,7,2]).T)

Let us play with the grid search demo now!

In [None]:
from grid_search import generate_w, get_best_parameters
from plots import grid_visualization

# Generate the grid of parameters to be swept
grid_w0, grid_w1 = generate_w(num_intervals=100) #NB this function createts num_intervals points regularily spaced between -100 and 200 for w0 and -150 and 150 for w1

# Start the grid search
start_time = datetime.datetime.now()
grid_losses = grid_search(y, tx, grid_w0, grid_w1)

# Select the best combinaison
loss_star, w0_star, w1_star = get_best_parameters(grid_w0, grid_w1, grid_losses)
end_time = datetime.datetime.now()
execution_time = (end_time - start_time).total_seconds()

# Print the results
print("Grid Search: loss*={l}, w0*={w0}, w1*={w1}, execution time={t:.3f} seconds".format(
      l=loss_star, w0=w0_star, w1=w1_star, t=execution_time))

# Plot the results
fig = grid_visualization(grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
fig.set_size_inches(10.0,6.0)
fig.savefig("grid_plot")  # Optional saving

# 3 Gradient Descent

Again, please fill in the functions `compute_gradient` below:

In [None]:
def compute_gradient(y, tx, w):
    """Compute the gradient."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    N = y.shape[0] # N is the number of datapoints
    # tx is the x input matrix with the augmented 1 column at the beginning for the w0 parameter as the offset at axis origins
    e = y - np.dot(tx, w)# e is the error vector e = y - f(x). NB there is a calculated error for each datapoint
    gradient = -np.dot(tx.T, e)/ N
    return gradient
    # TODO: compute gradient and error vector
    # ***************************************************
    #raise NotImplementedError
   

# y = np.array([[4],
#               [7],
#               [6],
#               [5],
#               [10]])

# w = np.array([[0.4],
#               [1.],
#               [0.6]])

# tx = np.array([[1,1,5],
#                [1,2,7],
#                [1,3,9],
#                [1,4,1],
#                [1,2,9]])

# compute_gradient(y, tx, w)

Please fill in the functions `gradient_descent` below:

In [None]:
def gradient_descent(y, tx, initial_w, max_iters, gamma):
    """Gradient descent algorithm."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        # ***************************************************
        # INSERT YOUR CODE HERE
        w = ws[n_iter]
        #print(w)
        loss = compute_loss(y, tx, w)
        #print(loss)
        gradient = compute_gradient(y, tx, w)
        # TODO: compute gradient and loss
        # ***************************************************
        #raise NotImplementedError
        # ***************************************************
        # INSERT YOUR CODE HERE
        w = w - gamma*gradient
        # TODO: update w by gradient
        # ***************************************************
        #raise NotImplementedError
        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws


# y = np.array([[4],
#               [5],
#               [6],
#               [7]])

# w = np.array([[0.4],
#               [1.]])

# tx = np.array([[1,1],
#                [1,2],
#                [1,3],
#                [1,4]])

# gradient_descent(y, tx, w, 1000, 0.1)
# print("Done")

Test your gradient descent function through gradient descent demo shown below:

In [None]:
# from gradient_descent import *
from plots import gradient_descent_visualization

# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7

# Initialization
w_initial = np.array([0, 0])

# Start gradient descent.
start_time = datetime.datetime.now()
gradient_losses, gradient_ws = gradient_descent(y, tx, w_initial, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("Gradient Descent: execution time={t:.3f} seconds".format(t=exection_time))

In [None]:
# Time Visualization
from ipywidgets import IntSlider, interact

def plot_figure(n_iter):
    fig = gradient_descent_visualization(
        gradient_losses, gradient_ws, grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight, n_iter)
    fig.set_size_inches(10.0, 6.0)

interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_ws)))

# 4 Stochastic gradient descent

In [None]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    loss = compute_loss(y, tx, w)
    #print(loss)
    gradient = compute_gradient(y, tx, w)    
    w = w - gamma*gradient
    return w
    # TODO: implement stochastic gradient computation.It's same as the gradient descent.
    # ***************************************************
    #raise NotImplementedError


def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent algorithm."""
    # ***************************************************
    # INSERT YOUR CODE HERE
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size=batch_size, num_batches=max_iters):

        loss = compute_loss(minibatch_y, minibatch_tx, w)

        gradient = compute_gradient(minibatch_y, minibatch_tx, w)

        w = w - gamma*gradient

        # store w and loss
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent: loss={l}, w0={w0}, w1={w1}".format(l=loss, w0=w[0], w1=w[1]))
    # TODO: implement stochastic gradient descent.
    # ***************************************************
    #raise NotImplementedError
    return losses, ws

In [None]:
# from stochastic_gradient_descent import *

# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1

# Initialization
w_initial = np.array([0, 0])

# Start SGD.
start_time = datetime.datetime.now()
sgd_losses, sgd_ws = stochastic_gradient_descent(
    y, tx, w_initial, batch_size, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("SGD: execution time={t:.3f} seconds".format(t=exection_time))

In [None]:
# Time Visualization
from ipywidgets import IntSlider, interact
def plot_figure(n_iter):
    fig = gradient_descent_visualization(
        sgd_losses, sgd_ws, grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight, n_iter)
    fig.set_size_inches(10.0, 6.0)

interact(plot_figure, n_iter=IntSlider(min=1, max=len(gradient_ws)))

# 5 Effect of Outliers

### Exercise 5


In [None]:
height, weight, gender = load_data(sub_sample=True, add_outlier=False)
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1

# Initialization
w_initial = np.array([0, 0])

x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)
sgd_losses, sgd_ws = gradient_descent(
    y, tx, w_initial, max_iters, gamma)
# Plot the results
plt.figure()
fig = grid_visualization(grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
fig.set_size_inches(10.0,6.0)
fig.savefig("grid_plot")  # Optional saving
#plt.show()

print("\n\n New test wih ouliers\n")
height, weight, gender = load_data(sub_sample=True, add_outlier=True)
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.7
batch_size = 1

# Initialization
w_initial = np.array([0, 0])

x, mean_x, std_x = standardize(height)
y, tx = build_model_data(x, weight)
sgd_losses, sgd_ws = gradient_descent(
    y, tx, w_initial, max_iters, gamma)
# Plot the results
plt.figure()
fig = grid_visualization(grid_losses, grid_w0, grid_w1, mean_x, std_x, height, weight)
fig.set_size_inches(10.0,6.0)
fig.savefig("grid_plot")  # Optional saving
plt.show()

# 6 Subgradient Descent

### Exercise 6

Modify the function `compute_loss(y, tx, w)` for the Mean Absolute Error cost function [here](#compute_loss)