In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
def MSE(y, tx, w):
    """Compute MSE at w
    
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        w: numpy array of shape=(D+1, ). The vector of model parameters.
        
    Returns:
        Returns the mean square error at w for input tx and output y
    """
    e = y - tx.dot(w)
    return np.mean(e**2)

In [3]:
def compute_gradient(y, tx, w):
    """Computes the gradient at w.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        w: numpy array of shape=(D+1, ). The vector of model parameters.
        
    Returns:
        An numpy array of shape (D+1, ) (same shape as w), containing the gradient of the loss at w.
    """
    e = y - tx.dot(w)
    return -tx.T.dot(e)/len(y)

In [4]:
def compute_gradientSGD(y, tx, w):
    """Computes the gradient SGD at w for batches of size one.
        
    Args:
        y: a number
        tx: numpy array of shape=(D+1, )
        w: numpy array of shape=(D+1, ). The vector of model parameters.
        
    Returns:
        An numpy array of shape (D+1, ) (same shape as w), containing the gradient of the loss at w.
    """
    e = y - tx.dot(w)
    return -tx.T.dot(e)

In [37]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """The Gradient Descent (GD) algorithm for least squares.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        initial_w: numpy array of shape=(D+1, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of GD
        gamma: a scalar denoting the stepsize
        
    Returns:
        w: the model parameter as numpy arrays of shape (2, ), for the last iteration of GD 
        loss: the loss value corresponding to w
    """
    
    w = initial_w
    for n_iter in range(max_iters):
        grad = compute_gradient(y, tx, w)
        w = w - gamma * grad

    loss = MSE(y, tx, w)
    return w, loss

In [38]:
def get_random_sample(y, tx):
    """get a random sample of (y, tx)
    
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        
    Returns:
        y_sample: a random sample of y as a number
        tx_sample: a random sample of tx as numpy arrays of shape (D+1, )
    """
    random_sample_index = np.random.randint(len(y))
    y_sample  = y [random_sample_index]
    tx_sample = tx[random_sample_index]
    return y_sample, tx_sample

In [39]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """The Stochastic Gradient Descent (SGD) algorithm for least squares using batches of size one.
        
    Args:
        y: numpy array of shape=(N, )
        tx: numpy array of shape=(N,D+1)
        initial_w: numpy array of shape=(D+1, ). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of GD
        gamma: a scalar denoting the stepsize
        
    Returns:
        w: the model parameter as numpy arrays of shape (2, ), for the last iteration of SGD 
        loss: the loss value corresponding to w
    """
    
    w = initial_w
    
    for n_iter in range(max_iters):
        y_sample, tx_sample = get_random_sample(y, tx)
        grad = compute_gradientSGD(y_sample, tx_sample, w)
        w = w - gamma * grad
        
    loss = MSE(y, tx, w)
    return w, loss

# Sandbox for testing

In [54]:
x = [[12,16,71,99,45,27,80,58,4,50],
     [35,78,73,3,55,43,56,98,32,40]]
x = (x-np.mean(x, axis=1).reshape(2,1))/np.std(x, axis=1).reshape(2,1)
y = [56,22,37,78,83,55,70,94,12,40]
x = np.array(x).T
tx = np.insert(x, 0, 1, axis=1)
y = np.array(y)
linreg = LinearRegression().fit(x,y)
initial_w = np.array([0,0,0])
w = initial_w


print(np.append(np.array(linreg.intercept_), linreg.coef_))

[54.7        15.08492696  3.55532095]


### Testing for least Squares GD

In [56]:
least_squares_GD(y, tx, initial_w, 100, 0.1)

(array([54.69854709, 15.08425522,  3.55477245]), 423.1618830220783)

### Testing for least Squares SGD

In [45]:
least_squares_SGD(y, tx, initial_w, 100000, 0.0001)

(array([54.8223406 , 14.90603655,  3.58603709]), 423.21068889343735)