<a href="https://colab.research.google.com/github/nklingen/CS-433-Project-1/blob/master/implementations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from proj1_helpers import *

In [5]:
#from google.colab import files
#uploaded = files.upload()

# **1. Least Squares Gradient Descent**
Linear regression using gradient descent

In [6]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma): 
        ws = [initial_w]
        losses = []
        w = initial_w
    
        for n_iter in range(max_iters+1):
            
            # compute gradient and loss
            gradient = compute_gradient_by_type(y, tx, w, "MAE")
            loss = compute_loss_by_type(y, tx, w, "MAE")
            
            # update w by gradient
            w = w - gamma*gradient
            
            # store w and loss
            ws.append(w)
            losses.append(loss)
            if n_iter % 10 == 0 :
                print("least_squares_GD ({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters, l=loss))
            
        return ws[-1], losses[-1]

# **2. Least Squares Stochastic Gradient Descent**
Linear regression using stochastic gradient descent

In [7]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """
        Use the Stochastic Gradient Descent (batch size 1) method to find the best weights
        
        INPUT:
            y           - Predictions
            tx          - Samples
            initial_w   - Initial weights
            max_iters   - Maximum number of iterations
            gamma       - Step size
            
        OUTPUT:
            w           - Best weights
            loss        - Minimum loss
    """

    # Define a batch size of 1 for the submission
    batch_size = int(1)

    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    iterations = []

    last_loss = 0

    for n_iter in range(max_iters):
        # Compute the stochastic gradient and the loss (See helpers.py for the functions)
        loss = compute_cost(y, tx, w)
        grad = compute_stoch_gradient(y, tx, w, batch_size, 100)

        # Update w by gradient
        w = w - gamma * grad

        # store w and loss
        ws.append(w)
        losses.append(loss)
        iterations.append(n_iter)

        if n_iter % 100 == 0:
            print("  Iter={it}, loss={ll}, diff={dff}".format(it=n_iter, ll=loss, dff=(loss - last_loss)))
            last_loss = loss

            # Stopping criteria for the convergence
        if n_iter > 1 and np.abs(losses[-1] - losses[-2]) < 10 ** -8:
            break

    print("  Iter={it}, loss={ll}, diff={dff}".format(it=n_iter, ll=loss, dff=(loss - last_loss)))
    # Get the latest loss and weights
    return ws[-1], losses[-1]

# **3. Least Squares**
Least squares regression using normal equations

In [8]:
def least_squares(y, tx):
    w = np.linalg.solve(np.dot(tx.T,tx), np.dot(tx.T,y))
    MSE = compute_loss(y, tx, w)
    return w, MSE

# **4. Ridge Regression**
Ridge regression using normal equations

In [9]:
def ridge_regression(y, tx, lambda_):
    a = (1/len(y))*(np.dot(tx.T,tx)) + 2*(lambda_*np.identity(tx.shape[1]))
    b = (1/len(y))*np.dot(tx.T,y)
    w = np.linalg.solve(a,b)
    MSE = compute_loss(y, tx, w)
    return w, MSE

# **5. Logistic Regression**
Logistic regression using gradient descent or SGD

In [10]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    return

# **6. Regularized Logistic Regression**
Regularized logistic regression using gradient descent
or SGD

In [11]:
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    return

# **Helper Functions**

In [12]:
def compute_loss(y, tx, w):
    # Mean Squared Error
    MSE = 1/(2*y.shape[0])*np.sum(np.square(y-np.dot(tx,w)))
    return MSE

In [13]:
def compute_cost(y, tx, w):
    """
        Compute the MSE cost.
        
        INPUT:
            y           - Predictions vector
            tx          - Samples
            w           - Weights
            
        OUTPUT:
            cost        - Double value for the costs seen in the course.
    """
    # Compute the error
    e = y - tx.dot(w)

    # Compute the cost
    return 1. / 2 * np.mean(e ** 2)

In [14]:
def compute_loss_by_type(y, tx, w, type):
    if (type == "MSE"):    
        MSE = 1/(2*y.shape[0])*np.sum(np.square(y-np.dot(tx,w)))
        return MSE
    elif (type == "MAE"):
        e = y - np.dot(tx, w)
        return np.mean(np.abs(e))
    else:
        print("Type not supported")
        return

In [15]:
def compute_gradient_by_type(y, tx, w, type):
    if (type == "MSE"):    
        gradient = (-1/y.shape[0])*np.dot(tx.T,(y-np.dot(tx, w)))
        return gradient
    elif (type == "MAE"):
        MAE = 0
        N = len(y)
        e = y - np.dot(tx, w)
        for i in range(len(e)):
            MAE = MAE + np.sign(e[i])

        return 1/N * MAE * w
    else:
        print("Type not supported")
        return

In [16]:
def compute_gradient(y, tx, w):
    gradient = (-1/y.shape[0])*np.dot(tx.T,(y-np.dot(tx, w)))
    return gradient

In [17]:
def compute_stoch_gradient(y, tx, w, batch_size, max_iter):
    """Compute a stochastic gradient for batch data."""

    stoch_grad = np.zeros(len(tx[0]))

    for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, max_iter):
        stoch_grad = stoch_grad + compute_gradient(minibatch_y, minibatch_tx, w)

    return 1 / float(batch_size) * stoch_grad

In [18]:
def build_poly(x, degree):
    return np.array([x**j for j in range(degree+1)]).T

In [20]:
def train_test_diff(x, y, degree, ratio, seed):
        # split the data, and return train and test data
        train_x, train_y, test_x, test_y = split_data(tX, y, ratio, seed)

        # calcualte weight through least square
        w, loss_star = least_squares(train_y,train_x)

        # calculate RMSE for train and test data,
        # and store them in rmse_tr and rmse_te respectively: TODO
        mse_tr = compute_loss(train_y,train_x,w)
        mse_te = compute_loss(test_y,test_x,w)

        print("proportion={p}, Training RMSE={tr:.3f}, Testing RMSE={te:.3f}".format(
                  p=ratio, tr=mse_tr, te=mse_te))

In [None]:
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]