In [1]:
## These gradient descent functions are taken from DSI-2.

In [2]:
## This function is designed to take as inputs:
##### true (observed) values of Y,
##### a vector of inputs (X), 
##### the estimated y-intercept (beta0),
##### and the estimated slope (beta1).

## This function is designed to output:
##### the mean squared error.

def mean_squared_error(y_true, x, beta0, beta1):
    y_pred = beta0 + x * beta1
    mean_sq_err = np.mean((y_true - y_pred)**2)
    return mean_sq_err

In [3]:
## This function will iterate through every observed Y and calculate
## the gradient for beta0 and for beta1 after 

## This function is designed to take as inputs:
##### observed Y values,
##### observed X values,
##### an initial guess for beta0,
##### an initial guess for beta1,
##### and the step size (alpha).

## This function is designed to output:
##### the updated beta0 and beta1 after ONE iteration of gradient descent.

def gradient_update(y, x, beta0, beta1, step_size):

    beta0_gradient = 0 # set gradient to 0 to start
    beta1_gradient = 0 # set gradient to 0 to start
    
    N = float(len(y)) # N = number of observations
    
    for i in range(len(y)):
    
        beta0_gradient += (2./N * -1 * (y[i] - (beta0 + beta1*x[i])))
            # beta0_gradient calculates gradient of beta0 based 
            # on observed data; calculation derived with calculus
            
        beta1_gradient += (2./N * -1 * x[i] * (y[i] - (beta0 + beta1*x[i])))
            # beta1_gradient calculates gradient of beta1 based 
            # on observed data; calculation derived with calculus
        
    beta0 = beta0 - (step_size * beta0_gradient) ## applying gradient descent
    beta1 = beta1 - (step_size * beta1_gradient) ## applying gradient descent
    
    return [beta0, beta1]

## Recall: this updates gradient descent ONCE, i.e. going from
## initial guess to next step.

In [4]:
## This function will implement the gradient descent function (i.e. update beta0, beta1 once) over 
## multiple iterations (in this case, 500).

def gradient_descent_iterator(y, x, beta0, beta1, step_size=.0001, iterations=500):
    
    mean_squared_errors = []
    mean_squared_errors.append(mean_squared_error(y, x, beta0, beta1)) 
        # calculate MSE given observed Y, X, guessed beta0, guessed beta1, add to mean_squared_error list.
    
    beta0s = [beta0]
    beta1s = [beta1]
    
    for i in range(iterations):
        [beta0, beta1] = gradient_update(y, x, beta0, beta1, step_size)
            # take in old beta0, old beta1 and output new beta0, beta1
        mean_squared_errors.append(mean_squared_error(y, x, beta0, beta1))
            # calculate MSE given observed Y, X, 
        beta0s.append(beta0)
            # add new beta0 value to beta0s
        beta1s.append(beta1)
            # add new beta1 value to beta1s
        
    return [mean_squared_errors, beta0s, beta1s]
        # returns the list of MSE, beta0, and beta1 so that you can (hopefully!) 
        # see the MSE decrease 