In [30]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [4]:
def MSE(y, tx, w):
    """Compute MSE at w
    
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        w: numpy array of shape=(D+1, ). The vector of model parameters.
        
    Returns:
        Returns the mean square error at w for input tx and output y
    """
    e = y - tx.dot(w)
    return np.mean(e**2)

In [5]:
def compute_gradient(y, tx, w):
    """Computes the gradient at w.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        w: numpy array of shape=(D+1, ). The vector of model parameters.
        
    Returns:
        An numpy array of shape (D+1, ) (same shape as w), containing the gradient of the loss at w.
    """
    e = y - tx.dot(w)
    return -tx.T.dot(e)/len(y)

In [6]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """The Gradient Descent (GD) algorithm for least squares.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        initial_w: numpy array of shape=(D+1, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of GD
        gamma: a scalar denoting the stepsize
        
    Returns:
        w: the model parameter as numpy arrays of shape (2, ), for the last iteration of GD 
        loss: the loss value corresponding to w
    """
    
    w = initial_w
    for n_iter in range(max_iters):
        grad = compute_gradient(y, tx, w)
        w = w - gamma * grad

    loss = MSE(y, tx, w)
    return w, loss

In [12]:
def get_random_sample(y, tx):
    random_sample_index = np.random.randint(len(y))
    y_sample  = y [random_sample_index]
    tx_sample = tx[random_sample_index]
    return y_sample, tx_sample

In [27]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    
    for n_iter in range(max_iters):
        y_sample, tx_sample = get_random_sample(y, tx)
        grad = compute_gradient(y_sample, tx_sample, w)
        w = w - gamma * grad
        
    loss = compute_loss(y_sample, tx_sample, w)
    return losse, w

In [67]:
x = [[12,16,71,99,45,27,80,58,4,50],
     [35,78,73,3,55,43,56,98,32,40]]
y = [56,22,37,78,83,55,70,94,12,40]
x = np.array(x).T
tx = np.insert(x, 0, 1, axis=1)
y = np.array(y).reshape(-1,1)
linreg = LinearRegression().fit(tx,y)

linreg.coef_

array([[0.        , 0.50475262, 0.13872634]])

In [63]:
initial_w = np.array([0,0,0])
max_iters = 100
gamma = 0.1

In [64]:
least_squares_GD(y, tx, initial_w, max_iters, gamma)

ValueError: operands could not be broadcast together with shapes (3,) (3,10) 

In [65]:
least_squares_GD(y, tx, initial_w, max_iters, gamma)

(10, 1)

In [66]:
tx.dot(initial_w)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])