In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2         

## Load the training data into feature matrix, class labels, and event ids:

In [1]:
from proj1_helpers import *
DATA_TRAIN_PATH = '/home/ML_course/projects/project1/data/train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

def clean_data(tX):
    tX[tX == -999] = np.NaN
    mean = np.nanmean(tX,axis=0)
    inds = np.where(np.isnan(tX))    
    tX[inds]= np.take(mean, inds[1])
    
    std= np.std(tX,axis=0)
    newMean = np.nanmean(tX,axis=0)
    
    return (tX-newMean)/std

tX = clean_data(tX)



OSError: /home/ML_course/projects/project1/data/train.csv not found.

## Do your thing crazy machine learning thing here :) ...

In [2]:
def calculate_mse(e):
   return 1/2*np.mean(e**2)


def compute_gradient(y, tx, w):
    err = y - tx.dot(w)
    grad = -tx.T.dot(err) / len(err)
    return grad, err



############################### Linear regression with gradient descent################################   

def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    y= y.reshape(y.shape[0],1)
    losses = []
    w = initial_w
    for n_iter in range(max_iters):
        grad, err = compute_gradient(y, tx, w)
        loss = calculate_mse(err)
        w = w - gamma * grad
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}, w1={w1}, w1={w1}".format(
        bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

#least_squares_GD(y, tX, np.full((30,1),0.00001), 100, 0.001)


######## least squares ##################################################################################
def least_square(y,tx):
    s= tx.T.dot(tx)
    t = tx.T.dot(y)
    return np.linalg.solve(s, t)


################################# Linear regression with SGD ###########################################

def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]


def stochastic_gradient_descent(
        y, tx, initial_w, batch_size, max_iters, gamma):
    """Stochastic gradient descent."""
    # Define parameters to store w and loss
    ws = [initial_w]
    losses = []
    w = initial_w
    y= y.reshape(y.shape[0],1)
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=batch_size, num_batches=1):
            # compute a stochastic gradient and loss
            grad, _ = compute_gradient(y_batch, tx_batch, w)
            # update w through the stochastic gradient update
            w = w - gamma * grad
            # calculate loss
            e = y - tx.dot(w)
            loss = calculate_mse(e)
            # store w and loss
            ws.append(w)
            losses.append(loss)

        print("SGD({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return losses, ws

#stochastic_gradient_descent(y, tX, np.full((30,1),0.00001),4, 100, 0.00001)

################################## Ridge regression ##############################################


def ridge_regression_solve(y, tx, lambda_):
    aI = 2 * tx.shape[0] * lambda_ * np.identity(tx.shape[1])
    a = tx.T.dot(tx) + aI
    b = tx.T.dot(y)
    return np.linalg.solve(a, b)

def ridge_regression(y, tx, lambda_ ):
    y= y.reshape(y.shape[0],1)
    w = ridge_regression_solve(y, tx, lambda_)
    err = y - tx.dot(w)
    rmse = np.sqrt(2 * calculate_mse(err))
    return rmse, w
    

        
#lambdas = np.logspace(-5, 0, 15)
#for ind, lambda_ in enumerate(lambdas):
 #   ridge_regression(y, tX,lambda_)

    
################################## Logisitic regression ##################################
def sigmoid(t):
    sigmoid = 1/(1 +np.exp(-t)) 
    return sigmoid

def calculate_loss(y, tx, w):
    
    sig = sigmoid(tx.dot(w))
    cost =  - (1-y).T.dot(np.log(1-sig)) - (-1+y).T.dot(np.log(sig))
    return cost

def calculate_gradient_LR(y, tx, w):
    
    return tx.T.dot(sigmoid(tx.dot(w))- y)
                               
    
def learning_by_gradient_descent(y, tx, w, gamma):
    loss = calculate_loss(y, tx, w)
    grad = calculate_gradient_LR(y,tx,w)
    w = w- gamma * grad
    
    return loss, w

def logistic_regression(y, tx, initial_w, max_iters, gamma):
    threshold = 1e-8
    losses = []
    y= y.reshape(y.shape[0],1)
    # start the logistic regression
    for iter in range(max_iter):
        # get loss and update w.
        loss, w = learning_by_gradient_descent(y, tx, w, gamma)
        # log info
        if iter % 100 == 0:
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
            
    return loss, w
    
    
#logistic_regression(y, np.c_[np.ones((y.shape[0], 1)), x], np.zeros((tx.shape[1], 1)), 10000, 0.01)

################################## Logisitic regression with REgula##################################
def penalized_logistic_regression(y, tx, w, lambda_):
    num_samples = y.shape[0]
    
    loss = calculate_loss(y, tx, w) + lambda_ * np.squeeze(w.T.dot(w))
    gradient = calculate_gradient_LR(y, tx, w) + 2 * lambda_ * w
    return loss, gradient

def learning_by_penalized_gradient(y, tx, w, gamma, lambda_):
   
    loss, gradient = penalized_logistic_regression(y, tx, w, lambda_)
    w -= gamma * gradient
    return loss, w

def reg_logistic_regression(y, tx, lambda_ , initial_w, max_iters, gamma):
    # init parameters
    threshold = 1e-8
    losses = []
    w = initial_w
    y = y.reshape(y.shape[0],1)
    # start the logistic regression
    for iter in range(max_iters):
        # get loss and update w.
        loss, w = learning_by_penalized_gradient(y, tx, w, gamma, lambda_)
        # log info
        if iter % 100 == 0:
            print("Current iteration={i}, loss={l}".format(i=iter, l=loss))
        # converge criterion
        losses.append(loss)
        if len(losses) > 1 and np.abs(losses[-1] - losses[-2]) < threshold:
            break
    return losses,w 

reg_logistic_regression(y, tX, 0.1, np.zeros((tX.shape[1], 1)), 10000, 0.01)

NameError: name 'y' is not defined

## Generate predictions and save ouput in csv format for submission:

In [10]:
DATA_TEST_PATH = '' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [31]:
OUTPUT_PATH = '' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)