# Required ML method

1. *** least_squares_GD (y, tx, inital_w, gamma, max_iters) ***
Linear regression using gradient descent 

2. *** least_squares_SGD(y, tx, initial_w, gamma, max_iters) ***
Linear regression using stochastic gradient descent 

3. *** least_squares(y, tx) ***
Least squares regression using normal equations

4. *** ridge_regression(y, tx, lambda_) ***
Ridge regression using normal equations

5. *** logistic_regression(y, tx, initial_w, gamma, max_iters) ***
Logistic regression using gradient descent or SGD 

6. *** reg_logistic_regression(y, tx, lambda_, initial_w, gamma, max_iters) ***
Regularized  logistic  regression  using  gradient  descent or SGD

In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
%load_ext autoreload
%autoreload 2

In [2]:
from proj1_helpers import *

DATA_TRAIN_PATH = '../data/train.csv' 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [7]:
# copied from costs.py
# TODO: perhaps import it instead of copy?

def calculate_mse(e):
    """Calculate the mse for vector e."""
    return 1/2*np.mean(e**2)


def calculate_mae(e):
    """Calculate the mae for vector e."""
    return np.mean(np.abs(e))


def compute_loss(y, tx, w):
    """Calculate the loss using mse """
    e = y - tx.dot(w)
    return calculate_mse(e)

## Linear regression using gradient descent 

In [35]:
def compute_gradient(y, tx, w):
    """ compute the gradient associated to the MSE cost function"""
    # return tx.T.dot( sigmoid(tx.dot(w)) - y.reshape((tx.shape[0],1)) )
    e = y - (tx @ w)
    return -1/y.shape[0] * (tx.T @ e)


def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """
    Gradient descent algorithm using the MSE cost function
    
    Params:
        y (1D-array): training values
        tx (2D-array): each row contains the data associated to a sample.
                    each column contains all the sample value for a feature
        initial_w (1D-array): the initial weight vector
        max_iters (int): maximum number of iterations to run
        gamma (float): step size   

    Returns:
        w (1D-array): the last weight vector
        loss (float): the last loss value
    """
    losses = []
    w = initial_w
    losses.append(compute_cost(y, tx, w))
    
    
    for n_iter in range(max_iters):
        # compute one step of gradient descent
        grad = compute_gradient(y, tx, w)
        w = w - gamma * grad
        loss = compute_cost(y, tx, w)

        # store results
        losses.append(loss)

    return w, losses[-1]

## Test this function

In [None]:
max_iters = 1000
gamma = 1e-7
initial_w = np.zeros(tX.shape[1])

w, loss = least_squares_GD(y, tX, initial_w, max_iters, gamma)

In [None]:
loss