In [88]:
import numpy as np
import matplotlib.pyplot as plt
import datetime

from proj1_helpers import *
from implementations import *

# constants
training_data = '/Users/Gaurav/Desktop/train.csv'
test_data = '/Users/Gaurav/Desktop/test.csv'

#### Helper functions

In [89]:
def error(y, tx, w):
    """Calculates the error in current prediction."""
    return y - np.dot(tx, w)


def compute_loss(y, tx, w):
    """Calculates the loss using MSE."""
    N = y.shape[0]
    e = error(y, tx, w)
    factor = 1/(2*N)
    loss = (np.dot(np.transpose(e), e)) * factor
    return loss


def compute_gradient(y, tx, w):
    """Computes the gradient of the MSE loss function."""
    N = y.shape[0]
    e = error(y, tx, w)
    factor = -1/N
    grad = (np.dot(np.transpose(tx), e)) * factor
    loss = compute_loss(y, tx, w)
    return grad, loss


def compute_stoch_gradient(y, tx, w):
    """Computes a stochastic gradient from a few examples n and their corresponding y_n labels."""
    N = y.shape[0]
    e = error(y, tx, w)
    factor = -1/N
    grad = (np.dot(np.transpose(tx), e)) * factor
    loss = compute_loss(y, tx, w)
    return grad, loss


def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generates a minibatch iterator for a dataset.
    Takes as input two iterables - the output desired values 'y' and the input data 'tx'.
    Outputs an iterator which gives mini-batches of batch_size matching elements from y and tx.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use:
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        do something
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]

            
def compare_prediction(w_train, x, y):
    """Calculates accuracy by comparing prediction with given test data."""
    pred = predict_labels(w_train, x)
    N = len(pred)
    count = 0.0
    for i in range(len(pred)):
        if pred[i] == y[i]:
            count += 1
    # matches = (y == pred).sum()
    return count/N


def split_data(x, y, ratio, seed=1):
    """
    split the dataset based on the split ratio. If ratio is 0.8 
    you will have 80% of your data set dedicated to training 
    and the rest dedicated to testing
    """
    # set seed
    np.random.seed(seed)

    # split the data based on the given ratio
    idx = [i for i in range(len(x))]
    np.random.shuffle(idx)
    split = int(len(x) * ratio)
    
    x_shuffle = x[idx]
    y_shuffle = y[idx]
    
    x_train = x_shuffle[:split]
    x_test = x_shuffle[split:]
    y_train = y_shuffle[:split]
    y_test = y_shuffle[split:]
    
    return x_train, y_train, x_test, y_test 


def build_poly(x, degree):
    """Builds a polynomial of the given degree and appends it to the given matrix."""
    x_ret = x
    for i in range(2, degree+1):
        x_ret = np.c_[x_ret, np.power(x, i)]
    return x_ret


def standardize(x):
    """Standardizes the matrix by calculating the mean of each column and subtracting it from individual values."""
    x_ret = x.copy()
    for i in range(1, x.shape[1]):
        c = x[:,i]
        c = (c - np.mean(c))/np.std(c)
        x_ret[:,i] = c
    return x_ret


#### Functions to Implement:

In [90]:
def least_squares(y, tx):
    """Calculates the solution using the least squares method."""     
    gram = np.dot(np.transpose(tx),tx)
    gram = np.linalg.inv(gram)
    
    w = np.dot(gram,np.transpose(tx))
    w = np.dot(w, y)
    loss = compute_loss(y, tx, w)
    return w, loss


def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    """Calculates the solution using the least squares with gradient descent method."""
    w = initial_w
    for n_iter in range(max_iters):
        grad, loss = compute_gradient(y, tx ,w)
        w = w - gamma * grad  
    return w, loss


def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    """Calculates the solution using the least squares with stochastic gradient descent method."""
    w = initial_w
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=1, num_batches=1):
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            w = w - gamma * grad
            loss = compute_loss(y, tx, w)
    return w, loss


def ridge_regression(y, tx, lambda_):
    """Calculates the solution using the ridge regression method."""
    N = tx.shape[1]
    a = np.dot(np.transpose(tx), tx) + lambda_ * np.identity(N)
    b = np.dot(np.transpose(tx), y)
    w = np.linalg.solve(a, b)
    loss = compute_loss(y, tx, w)
    return w, loss


def sigma(x):
    """Calculates sigma using the formula."""
    return np.exp(x)/(1+np.exp(x))


def logistic_regression(y, tx, initial_w, max_iters, gamma):
    """Calculates the solution using the logistic regression method."""
    w = initial_w
    for n_iter in range(max_iters):
        yx = np.dot(y, np.transpose(tx))
        yxw = np.dot(yx, w)
        log = np.log(1 + np.exp(np.dot(np.transpose(tx), w)))
        loss = (log - yxw).sum()
        
        # Update rule
        sig = sigma(np.dot(tx, w))
        sig = sig - y
        grad = np.dot(np.transpose(tx), sig)
        w = w - gamma * grad 
    return w, loss


def reg_logistic_regression(y, tx, lambda_ , initial_w, max_iters, gamma):
    """Calculates the solution using the regularized logistic regression using gradient descent method."""
    w = initial_w
    for n_iter in range(max_iters):
        yx = np.dot(y, np.transpose(tx))
        yxw = np.dot(yx, w)
        log = np.log(1 + np.exp(np.dot(np.transpose(tx), w)))
        
        # Add the 'penalty' term
        loss = (log - yxw).sum() - (lambda_/2)* np.square((np.linalg.norm(w)))
        
        # Update rule
        sig = sigma(np.dot(tx, w))
        sig = sig - y
        grad = np.dot(np.transpose(tx), sig) + 2 * lambda_*w
        w = w - gamma * grad
    return w, loss

### Retrieve input training and testing data

In [91]:
ty, tx, ids_train = load_csv_data(training_data, sub_sample = False)

fy, fx, ids_test = load_csv_data(test_data, sub_sample = False)

### Preprocess Data

In [96]:
# normalize the data
x_train = standardize(tx)
fx_train = standardize(fx)

# split data
x_train, y_train, x_test, y_test = split_data(x_train, ty, 0.80, seed=1)

# polynomial fit
# degree = 2
# x_train = build_poly(x_train, degree)
# x_test = build_poly(x_test, degree)
# tx_train = build_poly(tx_train, degree)

# add intercept
x_train = np.hstack((np.ones((x_train.shape[0], 1)), x_train))
x_test = np.hstack((np.ones((x_test.shape[0], 1)), x_test))
fx_train = np.hstack((np.ones((fx_train.shape[0], 1)), fx_train))

### Least Squares Method

In [97]:
w_ls, loss_ls = least_squares(y_train, x_train)
ls_accuracy = compare_prediction(w_ls, x_test, y_test)
print('Accuracy: ' + str(ls_accuracy * 100) + '%')

Accuracy: 74.466%


### Least Squares using Gradient Descent (using MSE)

In [98]:
max_iters = 70
step_size = 1e-5
w_0 = np.ones(x_train.shape[1])

w_lsgd, loss_lsgd = least_squares_GD(y_train, x_train, w_0, max_iters, step_size)
lsGD_accuracy = compare_prediction(w_lsgd, x_test, y_test)
print('Accuracy: ' + str(lsGD_accuracy * 100) + '%')

Accuracy: 58.182%


### Least Squares using Stochastic Gradient Descent (batch_size = 1)

In [99]:
max_iters = 50
step_size = 1e-5
w_0 = np.zeros(x_train.shape[1])

w_lstoch, loss_lstoch = least_squares_SGD(y_train, x_train, w_0, max_iters, step_size) 
lsStoch_accuracy = compare_prediction(w_lstoch, x_test, y_test)
print('Accuracy: ' + str(lsStoch_accuracy * 100) + '%')

Accuracy: 52.617999999999995%


### Ridge Regression

In [100]:
degree = 12
lambda_ = 0.278834626595
w_rr, loss_rr = ridge_regression(y_train, build_poly(x_train, degree), lambda_)
rr_accuracy = compare_prediction(w_rr, build_poly(x_test, degree), y_test)
print('Accuracy: ' + str(rr_accuracy * 100) + '%')

Accuracy: 81.41199999999999%


In [101]:
y_pred = predict_labels(w_rr, build_poly(fx_train, degree))
create_csv_submission(ids_test, y_pred, "output.csv")