# Required ML methods

1. *** least_squares_GD (y, tx, inital_w, gamma, max_iters) ***
Linear regression using gradient descent 

2. *** least_squares_SGD(y, tx, initial_w, gamma, max_iters) ***
Linear regression using stochastic gradient descent 

3. *** least_squares(y, tx) ***
Least squares regression using normal equations

4. *** ridge_regression(y, tx, lambda_) ***
Ridge regression using normal equations

5. *** logistic_regression(y, tx, initial_w, gamma, max_iters) ***
Logistic regression using gradient descent or SGD 

6. *** reg_logistic_regression(y, tx, lambda_, initial_w, gamma, max_iters) ***
Regularized  logistic  regression  using  gradient  descent or SGD

In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import math
%load_ext autoreload
%autoreload 2

In [2]:
from proj1_helpers import *
from helpers import *

DATA_TRAIN_PATH = '../data/train.csv' 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

# General gradient descent

In [3]:
# general gradient descent
def gradient_descent(y, tx, initial_x, gamma, max_iters, compute_gradient, compute_loss):
    losses = []
    w = initial_w
    
    for n_iter in range(max_iters):
        gradient = compute_gradient(y, tx, w)
        loss = compute_loss(y, tx, w)
        w = w - gamma * gradient
        
        losses.append(loss)
    
    return w, losses[-1]

# General stochastic gradient descent

In [4]:
def stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma,
                                seed, compute_gradient, compute_loss):
    losses = []
    w = initial_w
    
    num_batches = math.floor(y.shape[0] / batch_size) 
    batches = batch_iter(y, tx, seed, batch_size, num_batches)

    for n_iter in range(max_iters):
        s_y, s_tx = next(batches)
        gradient = compute_gradient(s_y, s_tx, w)
        loss = compute_loss(y, tx, w)
        # print(loss)
        w = w - gamma * gradient
        
        losses.append(loss)
        
    return w, losses[-1]

## Linear regression using gradient descent 

In [5]:
# TODO: perhaps import it instead of copy?
def calculate_mse(e):
    """Calculate the mse for vector e."""
    return 1/2*np.mean(e**2)

def calculate_mae(e):
    """Calculate the mae for vector e."""
    return np.mean(np.abs(e))

def compute_loss_mse(y, tx, w):
    """Calculate the loss using mse """
    e = y - tx.dot(w)
    return calculate_mse(e)

def compute_gradient_mse(y, tx, w):
    """ compute the gradient associated to the MSE cost function"""
    e = y - (tx @ w)
    return -1/y.shape[0] * (tx.T @ e)

In [6]:
def least_squares_GD(y, tx, initial_w, gamma, max_iters):
    return gradient_descent(y, tx, initial_w, gamma, max_iters,
                            compute_gradient_mse, compute_loss_mse)

In [7]:
max_iters = 1000
gamma = 1e-7
initial_w = np.zeros(tX.shape[1])

w_lr_gd, loss_lr_gd = least_squares_GD(y, tX, initial_w, gamma, max_iters)

In [9]:
loss_lr_gd

0.39938606005358546

## Linear regression using stochastic gradient descent 

In [10]:
def least_squares_SGD(y, tx, initial_w, gamma, max_iters):
    batch_size = y.shape[0]//2
    seed = 3
    
    return stochastic_gradient_descent(y, tx, initial_w, batch_size, max_iters, gamma,
                                       seed, compute_gradient_mse, compute_loss_mse)

In [11]:
max_iters = 1000
gamma = 1e-8
initial_w = np.zeros(tX.shape[1])

w_lr_sgd, loss_lr_sgd = least_squares_SGD(y, tX, initial_w, gamma, max_iters)

In [12]:
loss_lr_sgd

0.41671005622007445

## Least squares regression using normal equations

In [15]:
def least_squares(y, tx):
    w = (np.linalg.inv(tx.T @ tx) @ tx.T @ y)
    loss = compute_loss_mse(y, tx, w)
    return w, loss

In [17]:
w_ls, loss_ls = least_squares(y, tX)

In [18]:
loss_ls

0.33968680990826089

## Ridge regression using normal equations

In [24]:
def ridge_regression(y, tx, lambda_):
    w = np.linalg.inv(tx.T @ tx + lambda_ * np.identity(tx.shape[1])) @ tx.T @ y
    loss = compute_loss_mse(y, tx, w)
    return w, loss

In [25]:
lamb = 23
w_rr, loss_rr = ridge_regression(y, tX, lamb)

In [26]:
loss_rr

0.33968793894774219