In [30]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math
from implementations import *
from proj1_helpers import *
from misc_helpers import *
from plot_functions import *
from ml_math import *
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [31]:
DATA_TRAIN_PATH = '../data/train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)
y = (y+1)/2

## Do your crazy machine learning thing here :) ...

In [32]:
def split_data(y, x, ratio, myseed=1):
    """split the dataset based on the split ratio."""
    # set seed
    np.random.seed(myseed)
    # generate random indices
    num_row = len(y)
    indices = np.random.permutation(num_row)
    index_split = int(np.floor(ratio * num_row))
    index_tr = indices[: index_split]
    index_te = indices[index_split:]
    # create split
    x_tr = x[index_tr]
    x_te = x[index_te]
    y_tr = y[index_tr]
    y_te = y[index_te]
    return x_tr, x_te, y_tr, y_te

In [33]:
ratio = 0.6
x_train, x_test, y_train, y_test = split_data(y, tX, ratio)

## Logistic regression function

In [34]:
def sigmoid(t, l = 1):
    """apply sigmoid function on t."""
    return 1.0 / (1.0+np.exp(-t))

def calculate_loss(y, tx, w):
    """compute the cost by negative log likelihood."""
    s = 0
    for n in range(len(y)):
        s += np.log(1 + np.exp(np.dot(tx[n,:].T,w))) - y[n]*np.dot(tx[n,:].T,w)
    return s 

def calculate_gradient(y, tx, w):
    """compute the gradient of loss."""
    return np.dot(tx.T,sigmoid(np.dot(tx,w))-y)

def calculate_hessian(y, tx, w):
    """return the hessian of the loss function."""
    N = len(y)
    S = np.eye(N)
    for n in range(N):
        pred = sigmoid(np.dot(tx[n,:].T,w))
        S[n,n] = pred*(1-pred)
    return tx.T.dot(S.dot(tx))

def logistic_regression(y, tx, w, newton = False):
    """return the loss, gradient, and hessian."""
    loss = calculate_loss(y, tx, w)
    gradient = calculate_gradient(y, tx, w)
    if newton:
        hess = calculate_hessian(y, tx, w)
    else:
        hess = 0
    return loss, gradient, hess

def penalized_logistic_regression(y, tx, w, lambda_, newton = False):
    """return the loss, gradient, and hessian."""
    loss, gradient, hess = logistic_regression(y, tx, w, newton)
    loss += lambda_/2*np.linalg.norm(w)**2
    gradient += lambda_*w
    if newton:
        hess += lambda_*np.eye(len(w)).dot(w)
    else:
        hess = 0
    return loss, gradient, hess

def logistic_regression_ADAM(y , tx, lambda_, maxit, verbose = False):
    """return w using ADAM"""
    n , p =np.shape(tx)
    w = np.zeros(p)
    w_prev = w
    alpha = 0.1
    beta1 = 0.9
    beta2 = 0.999
    eps = 1E-8
    m_prev = 0
    v_prev = 0
    for k in range(maxit):
        g = calculate_gradient(y, tx, w_prev) + lambda_ * w
        m = beta1*m_prev + (1-beta1)*g
        v = beta2*v_prev + (1-beta2)*g**2
        m_hat = m/(1-beta1)
        v_hat = v/(1-beta2)
        H = np.sqrt(v_hat)+eps
        w_next = w - alpha*m_hat/H
        
        w = w_next
        w_prev = w
        m_prev = m
        v_prev = v
        loss = calculate_loss(y, tx, w)
        #if not k%10 and verbose:
        if verbose:
            print ('%d : loss = %f, norm(g) = %f'%(k,loss,np.linalg.norm(w)) )
    return w, loss

def logistic_regression_GD(y , tx, gamma, lambda_, maxit, verbose = False):
    """return w using ADAM"""
    n , p =np.shape(tx)
    w = np.zeros(p)
    
    for k in range(maxit):
        g = calculate_gradient(y, tx, w) + lambda_ * w
        w = w - gamma*g
        
        loss = calculate_loss(y, tx, w)
        #if not k%10 and verbose:
        if verbose:
            print ('%d : loss = %f, norm(g) = %f'%(k,loss,np.linalg.norm(w)) )
    return w, loss

def logistic_regression_SGD(y , tx, lambda_, maxit):
    """return w using ADAM"""
    n , p =np.shape(tx)
    w = np.zeros(p)
    w_prev = w
    alpha = 0.1
    beta1 = 0.9
    beta2 = 0.999
    eps = 1E-8
    m_prev = 0
    v_prev = 0
    for k in range(maxit):
        g = calculate_gradient(y, tx, w_prev) + lambda_ * w
        m = beta1*m_prev + (1-beta1)*g
        v = beta2*v_prev + (1-beta2)*g**2
        m_hat = m/(1-beta1)
        v_hat = v/(1-beta2)
        H = np.sqrt(v_hat)+eps
        w_next = w - alpha*m_hat/H
        
        w = w_next
        w_prev = w
        m_prev = m
        v_prev = v
    return w

In [35]:
# Validate logistic_regression_ADAM
tx_norm = normalize(x_train)
lambda_ = 0.5
w1 , loss= logistic_regression_ADAM(y_train ,tx_norm, lambda_, 100, True)

0 : loss = 103839.673495, norm(g) = 0.547723
1 : loss = 103663.193372, norm(g) = 1.279493
2 : loss = 103459.745190, norm(g) = 2.116993
3 : loss = 103238.781133, norm(g) = 3.027679
4 : loss = 103006.516402, norm(g) = 3.991741
5 : loss = 102767.364769, norm(g) = 4.991813
6 : loss = 102524.589742, norm(g) = 6.015421
7 : loss = 102280.660509, norm(g) = 7.053083
8 : loss = 102037.468558, norm(g) = 8.096447
9 : loss = 101796.469349, norm(g) = 9.138420
10 : loss = 101558.780402, norm(g) = 10.173553
11 : loss = 101325.252642, norm(g) = 11.197767
12 : loss = 101096.524847, norm(g) = 12.208012
13 : loss = 100873.066857, norm(g) = 13.202117
14 : loss = 100655.214332, norm(g) = 14.178719
15 : loss = 100443.196326, norm(g) = 15.137138
16 : loss = 100237.156486, norm(g) = 16.077175
17 : loss = 100037.169193, norm(g) = 16.998877
18 : loss = 99843.252204, norm(g) = 17.902350
19 : loss = 99655.376857, norm(g) = 18.787675
20 : loss = 99473.476522, norm(g) = 19.654960
21 : loss = 99297.453863, norm(g) = 

In [44]:
gamma = 0.2
w2 , loss= logistic_regression_GD(y_trai ,tx_norm, gamma, lambda_, 50, True)

0 : loss = 93732.372824, norm(g) = 55.747337
1 : loss = 92278.556897, norm(g) = 69.191352
2 : loss = 91517.365786, norm(g) = 76.163061
3 : loss = 90982.617400, norm(g) = 81.425982
4 : loss = 90572.852465, norm(g) = 85.867118
5 : loss = 90246.383620, norm(g) = 89.705273
6 : loss = 89980.591184, norm(g) = 93.035721
7 : loss = 89761.207437, norm(g) = 95.926989
8 : loss = 89578.387118, norm(g) = 98.437138
9 : loss = 89424.947175, norm(g) = 100.616561
10 : loss = 89295.451687, norm(g) = 102.508962
11 : loss = 89185.680271, norm(g) = 104.152187
12 : loss = 89092.293437, norm(g) = 105.579024
13 : loss = 89012.609431, norm(g) = 106.817911
14 : loss = 88944.448988, norm(g) = 107.893543
15 : loss = 88886.023849, norm(g) = 108.827380
16 : loss = 88835.854662, norm(g) = 109.638077
17 : loss = 88792.709271, norm(g) = 110.341857
18 : loss = 88755.555522, norm(g) = 110.952824
19 : loss = 88723.524607, norm(g) = 111.483235
20 : loss = 88695.882223, norm(g) = 111.943740
21 : loss = 88672.005597, norm(g

In [46]:
tx_test_norm =normalize(x_test)
print(calculate_loss((y_test+1)/2,tx_test_norm,w1))
print(calculate_loss((y_test+1)/2,tx_test_norm,w2))

82362.48239936346
87222.02568160382


## Cross Validation Function

In [9]:
def build_k_indices(y, k_fold, seed):
    """build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)
    
def cross_validation(y, x, k_fold, solver = 'LS',stoch = True,lambda_ = 0, maxit = 1):
    """return the loss of ridge regression."""
    seed = 1
    k_indices = build_k_indices(y, k_fold, seed)
    
    mse_tr = 0
    mse_te = 0
    #p = np.shape(x)[1]
    #w0 = np.zeros(p)
    
    for k in range(k_fold):
        # get k'th subgroup in test, others in train:
        test_indices = k_indices[k]
        train_indices = np.delete(k_indices,k,0).flatten()
        x_tr = x[train_indices]
        y_tr = y[train_indices]
        x_te = x[test_indices]
        y_te = y[test_indices]

        # Least squares:
        if solver == 'LS':
            w, loss = least_squares(y_tr, x_tr)
        elif solver == 'RR':
            w, loss = ridge_regression(y_tr, x_tr, lambda_)
        elif solver == 'LR':
            
        else:
            raise('Error')

        # calculate the loss for train and test data: 
        
        loss_tr = compute_error(x_tr,y_tr, w)
        loss_te = compute_error(x_te,y_te, w)
    
        mse_tr += loss_tr/k_fold
        mse_te += loss_te/k_fold
        
    
    return mse_tr, mse_te, w

IndentationError: expected an indented block (<ipython-input-9-5a4abf258794>, line 37)

### Standardize

In [3]:
y_norm = normalize(y)
tX_norm = normalize(tX)

### Least squares as reference

In [12]:
w_LS, loss_LS = least_squares(y_train,x_train)

In [8]:
#calculate_loss()

### Test Logistic Regression

### Test Regulated Logistic Regression

### Choose the weight you want

In [None]:
weights = w_GD

## Generate predictions and save ouput in csv format for submission:

In [None]:
DATA_TEST_PATH = 'data/test.csv' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [None]:
OUTPUT_PATH = 'result/to_try.csv' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, build_poly(tX_test,degree))
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)

In [None]:
np.shape(build_poly(tX_test,degree))