In [2]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

# 1 Generate the data


In [5]:
from proj1_helpers import *
yb, input_data, ids = load_csv_data("./test.csv")


# 2 Least squares methods
## 2.1 Least squares  


In [7]:
from costs import *

def least_squares(y, tx):
    w = np.pinv(tx) @ y
    loss = compute_loss_mse(y, tx, w)
    return w, loss
    
    
    

## 2.2 least squares with gradient descent
### 2.2.1 Function implementation


In [13]:
from costs import *

def least_squares_GD(y, tx, initial_w, max_iters, gamma) :
    # Define parameters to store w and loss
    
    w = initial_w
    loss = -1
    for n_iter in range(max_iters):
        if (n_iter == max_iters - 1) :
            loss = compute_loss_mse(y,tx,w)
        
        grad = compute_gradient(y,tx,w)
        w = w - gamma*grad           
        
    return loss, w

### 2.2.2 helper funtions


In [9]:
def compute_gradient(y, tx, w):   
    e = y - (tx @ w)
    return ((-1/len(y)) * (tx.T @ e))


## 2.3 Least squares with stochastic gradient descent
### 2.3.1 Function implementation


In [None]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma) :
    # Define parameters to store w and loss
    
    w = initial_w
    loss = 0
    batch_size = 1
    
    for minibatch_y, minibatch_tx in batch_iter(y, tx, batch_size, max_iters):
        loss = compute_loss_mse(y,tx,w)
        grad = compute_stoch_gradient(minibatch_y, minibatch_tx, w)
        w = w - gamma*grad

           
    return losses, ws

### 2.3.2 helper functions


In [14]:
def compute_stoch_gradient(y, tx, w):
    return compute_gradient(y,tx,w)

In [15]:
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of batch_size matching elements from y and tx.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]


#  3 Ridge regression
## 3.1 function implementation

In [2]:
def ridge_regression(y, tx, lambda_ ): 
    
    x_tr = tx.transpose()
    lambda_prime = lambda_ * 2 * tx.shape[0]
    X = (x_tr @ tx) + (lambda_prime * np.eye(tx.shape[1]))
    Y = x_tr @ y
    w_ridge = (np.linalg.solve(X, Y))    
    loss = compute_loss_ridge(y, tx, w, lambda_)
    return w_ridge, loss

### 3.2 helper functions


In [3]:
from costs import *
import numpy as np

def compute_loss_ridge (y, tx, w, lambda_):
    return compute_loss_mse(y, tx, w) + lambda * np.sum(w.T @ w) 

# 4 Logistic regression
## 4.1 Lg using gradient descent
### 4.1.1 function implementation



In [10]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    w = initial_w
    for iter in range(max_iters):
        gradient = calculate_gradient_logistic(y, tx, w)
        w = w - (gamma * gradient)
    loss = calculate_loss_logistic(y, tx, w)
    return w, loss

### 4.1.2 helper functions

In [12]:
def sigmoid(t):
    return (1 / (1 + np.exp(-t)))

def fx(x) :
    return np.log(1 + np.exp(x))

def calculate_loss_logistic(y, tx, w):
    y_predicted = tx @ w
    right_hand = y * y_predicted
    left_hand = np.apply_along_axis(fx, 1, y_predicted)
    return np.sum(left_hand - right_hand)
    
def calculate_gradient_logistic(y, tx, w):
    y_predicted = tx @ w
    left_hand = np.apply_along_axis(sigmoid, 1, y_predicted)
    return(tx.T @ (left_hand - y))
    

 ## 4.1 regularized lg using gradient descent 
 ### 4.1.1 function implementation
 

In [13]:
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    w = initial_w
    for iter in range(max_iters):
        gradient = calculate_gradient_reg_logistic(y, tx, lambda_,  w)
        w = w - (gamma * gradient)
    loss = calculate_loss_reg_logistic(y, tx, lambda_, w)
    return w, loss

### 4.1.2 helper functions


In [14]:
def sigmoid(t):
    return (1 / (1 + np.exp(-t)))

def fx(x) :
    return np.log(1 + np.exp(x))

def calculate_loss_logistic(y, tx, w):
    y_predicted = tx @ w
    right_hand = y * y_predicted
    left_hand = np.apply_along_axis(fx, 1, y_predicted)
    return np.sum(left_hand - right_hand) +((lambda_ / 2.0) * (w.T @ w))
    
def calculate_gradient_logistic(y, tx, w):
    y_predicted = tx @ w
    left_hand = np.apply_along_axis(sigmoid, 1, y_predicted)
    return(tx.T @ (left_hand - y)) + ( (lambda_ / 2.0) * w )
    