In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

import datetime
from proj1_helpers import *
from implementations import *

#### Helper functions

In [3]:
def error(y, tx, w):
    return y - np.dot(tx, w)

In [4]:
def compute_loss(y, tx, w):
    """Calculates the loss using MSE."""
    N = y.shape[0]
    e = error(y, tx, w)  
    loss = (np.dot(np.transpose(e), e))* (1/(2*N))    
    return loss

##### Remove this one? 

In [5]:
def compute_gradient(y, tx, w):
    """Computes the gradient of the MSE loss function"""
    N = y.shape[0]
    e = error(y, tx, w)
    grad = (np.dot(np.transpose(tx), e)) * (-1/N)
    loss = compute_loss(y, tx, w)
    return grad, loss

In [6]:
def compute_stoch_gradient(y, tx, w):
    """Compute a stochastic gradient from just few examples n and their corresponding y_n labels."""
    N = y.shape[0]
    e = error(y, tx, w)
    grad = (np.dot(np.transpose(tx), e)) * (-1/N)
    loss = compute_loss(y, tx, w)
    return grad, loss

In [7]:
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]


#### Example

In [8]:
tx = np.array([[1,2,3],[4,5,6],[7,8,9]])
w = np.array([1,2,3])
y = np.array([12, 32, 50])

compute_loss(y, tx, w)
compute_gradient(y, tx, w)

(array([ 0.66666667,  1.33333333,  2.        ]), 0.66666666666666663)

In [9]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    w = initial_w.copy() # ........ !!!
    for n_iter in range(max_iters):
        grad, loss = compute_gradient(y, tx ,w)
       #Update rule
        w = w - gamma * grad  
        #print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    return w, loss  

In [10]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    w = initial_w.copy()
    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(y, tx, batch_size=1, num_batches=1):
            grad, _ = compute_stoch_gradient(y_batch, tx_batch, w)
            w = w - gamma * grad
            loss = compute_loss(y, tx, w)
    return w, loss

In [11]:
def least_squares(y, tx):
    """calculate the least squares solution."""     
    gram = np.dot(np.transpose(tx),tx)
    gram = np.linalg.inv(gram)
    
    w = np.dot(gram,np.transpose(tx))
    w = np.dot(w, y) 
    v = np.dot(tx, w)
    loss = compute_loss(y, tx, w)
    return w, loss

In [12]:
def ridge_regression(y, tx, lambda_):
    N = y.shape[0]
    gram = np.dot(np.transpose(tx),tx)
    i = (np.identity(N))*(2*lambda_*N)
    gram = gram + i
    gram = np.linalg.inv(gram)
    w = np.dot(gram,np.transpose(tx))
    w = np.dot(w, y) 
    loss = compute_loss(y, tx, w)
    return w, loss

In [13]:
def sigma(x):
    return np.exp(x)/(1+np.exp(x))

In [14]:
#Logistic regression using gradient descent
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    #if y.min == -1:
    #    y = (y>0).astype(np.float64)
    w = initial_w
    for n_iter in range(max_iters):
        yx = np.dot(y, np.transpose(tx))
        yxw = np.dot(yx, w)
        log = np.log(1 + np.exp(np.dot(np.transpose(tx),w)))
        loss = (log - yxw).sum()
        #Update rule
        sig = sigma(np.dot(xt, w))
        sig = sig - y
        grad = np.dot(np.transpose(xt), sig)
        w = w - gamma * grad 
        
        ## at the last iteration, should the gradient be updated before or after the the update rule ???? 
    return w, loss 

In [15]:
#Regularized logistic regression using gradient descent
def reg_logistic_regression(y, tx, lambda_ , initial_w, max_iters, gamma):
    # Case y.min == -1
    w = initial_w
    for n_iter in range(max_iters):
        yx = np.dot(y, np.transpose(tx))
        yxw = np.dot(yx, w)
        log = np.log(1 + np.exp(np.dot(np.transpose(tx),w)))
        loss = (log - yxw).sum() - (lambda_/2)* np.square((np.linalg.norm(w)))   ## Add the 'penalty' term
        #Update rule
        sig = sigma(np.dot(xt, w))
        sig = sig - y
        grad = np.dot(np.transpose(xt), sig) + 2 * lambda_*w
        w = w - gamma * grad 
        
    return w, loss 

In [16]:
y, tx, ids = load_csv_data('/Users/abouzaid/Desktop/train.csv', sub_sample = False)

In [17]:
y_ts, tx_ts, ids_ts = load_csv_data('/Users/abouzaid/Desktop/test.csv', sub_sample = False)

In [18]:
def compare_prediction(w_train, x, y):
    pred = predict_labels(w_train, x)
    N = len(pred)
    matches = (y == pred).sum()
    return matches/N

In [19]:
w_ls, loss_ls = least_squares(y, tx)

In [20]:
compare_prediction(w_ls, tx, y)

0.74432799999999999

In [21]:
def build_poly(x,degree):
    x_ret = x
    for i in range(2,degree+1):
        x_ret = np.c_[x_ret,np.power(x,i)]
    return (x_ret)

In [22]:
def ridge_regression_demo(x, y, degree, ratio, seed):
    
    #lambdas = np.logspace(-5, 0, 15)
    x_tr, y_tr, x_te, y_te = split_data(x, y, ratio, seed)
    tr_poly = build_poly(x_tr, degree)
    te_poly = build_poly(x_te, degree)
    loss = 10000
    w = 0
    
    #for lambda_ in lambdas:
    lambda_ = 2
    weight, lo = ridge_regression(y_tr, tr_poly, lambda_)

    mse_test = compute_loss(y_te, te_poly, weight)
        
    if(mse_test < loss):
        loss = mse_test
        w = weight  
    return w, loss

In [23]:
def split_data(x, y, ratio, seed=1):
    """
    split the dataset based on the split ratio. If ratio is 0.8 
    you will have 80% of your data set dedicated to training 
    and the rest dedicated to testing
    """
    # set seed
    np.random.seed(seed)

    # split the data based on the given ratio

    idx = [i for i in range(len(x))]
    np.random.shuffle(idx)
    split = int(len(x) * ratio)
    
    x_shuffle = x[idx]
    y_shuffle = y[idx]
    
    x_train = x_shuffle[:split]
    x_test = x_shuffle[split:]
    y_train = y_shuffle[:split]
    y_test = y_shuffle[split:]
    
    return x_train, y_train, x_test, y_test 

In [None]:
initial_w = 0
max_iters = 10
gamma = 1e-5
lambda_ = 4
w, l = ridge_regression(y, tx, 4)

In [None]:
seed = 2017
degree = 2
split_ratio = 0.5

w_r, loss_ridge = ridge_regression_demo(tx, y, degree, split_ratio, seed)

In [None]:
comapre_prediction(w_ls, tx, y)

In [None]:
import pandas as pd

In [36]:
a = pd.read_csv('/Users/abouzaid/Desktop/test.csv')

In [41]:
c = a.corr()#.drop(['PRI_jet_leading_eta'], axis=0)
c

Unnamed: 0,Id,DER_mass_MMC,DER_mass_transverse_met_lep,DER_mass_vis,DER_pt_h,DER_deltaeta_jet_jet,DER_mass_jet_jet,DER_prodeta_jet_jet,DER_deltar_tau_lep,DER_pt_tot,...,PRI_met_phi,PRI_met_sumet,PRI_jet_num,PRI_jet_leading_pt,PRI_jet_leading_eta,PRI_jet_leading_phi,PRI_jet_subleading_pt,PRI_jet_subleading_eta,PRI_jet_subleading_phi,PRI_jet_all_pt
Id,1.0,0.002087,-0.002173,-0.000453,-0.000992,0.000251,0.000539,0.000252,0.000683,0.000117,...,0.001077,-0.000826,-8e-05,-0.000263,-0.000191,-0.000198,0.000282,0.000249,0.000248,-0.000181
DER_mass_MMC,0.002087,1.0,-0.454687,0.173097,0.199962,0.163885,0.161923,0.163738,0.232466,0.04519,...,0.007424,0.222026,0.221683,0.249558,0.246367,0.246365,0.164104,0.16384,0.163837,0.186204
DER_mass_transverse_met_lep,-0.002173,-0.454687,1.0,0.190261,-0.252314,-0.179995,-0.193043,-0.179558,0.039064,0.011608,...,-0.018506,-0.166341,-0.212148,-0.228561,-0.21926,-0.219251,-0.180443,-0.179847,-0.179839,-0.210731
DER_mass_vis,-0.000453,0.173097,0.190261,1.0,-0.062134,-0.029582,-0.037312,-0.029471,0.576132,-0.001481,...,-0.003474,0.057713,-0.024343,-0.015229,-0.009848,-0.009836,-0.030455,-0.029547,-0.029539,-0.050174
DER_pt_h,-0.000992,0.199962,-0.252314,-0.062134,1.0,0.522688,0.532228,0.522689,-0.543396,0.302274,...,0.009842,0.784183,0.623523,0.623694,0.566963,0.566955,0.530724,0.522747,0.522746,0.808229
DER_deltaeta_jet_jet,0.000251,0.163885,-0.179995,-0.029582,0.522688,1.0,0.945823,0.999981,-0.29904,0.282022,...,0.006433,0.617472,0.868103,0.546749,0.523594,0.523595,0.999318,0.999995,0.999996,0.711387
DER_mass_jet_jet,0.000539,0.161923,-0.193043,-0.037312,0.532228,0.945823,1.0,0.944216,-0.302572,0.259253,...,0.006817,0.614925,0.8144,0.523099,0.494948,0.494949,0.947485,0.945285,0.945289,0.718618
DER_prodeta_jet_jet,0.000252,0.163738,-0.179558,-0.029471,0.522689,0.999981,0.944216,1.0,-0.299073,0.282584,...,0.006416,0.617791,0.868327,0.546746,0.523591,0.523591,0.999319,0.999988,0.999989,0.711583
DER_deltar_tau_lep,0.000683,0.232466,0.039064,0.576132,-0.543396,-0.29904,-0.302572,-0.299073,1.0,-0.158079,...,-0.005582,-0.40643,-0.349113,-0.336464,-0.304889,-0.304867,-0.303347,-0.299085,-0.299079,-0.447847
DER_pt_tot,0.000117,0.04519,0.011608,-0.001481,0.302274,0.282022,0.259253,0.282584,-0.158079,1.0,...,-0.00042,0.466815,0.374249,0.208513,0.19128,0.191286,0.290872,0.282241,0.282247,0.420791


In [51]:
c[c>0.7] 

Unnamed: 0,Id,DER_mass_MMC,DER_mass_transverse_met_lep,DER_mass_vis,DER_pt_h,DER_deltaeta_jet_jet,DER_mass_jet_jet,DER_prodeta_jet_jet,DER_deltar_tau_lep,DER_pt_tot,...,PRI_met_phi,PRI_met_sumet,PRI_jet_num,PRI_jet_leading_pt,PRI_jet_leading_eta,PRI_jet_leading_phi,PRI_jet_subleading_pt,PRI_jet_subleading_eta,PRI_jet_subleading_phi,PRI_jet_all_pt
Id,1.0,,,,,,,,,,...,,,,,,,,,,
DER_mass_MMC,,1.0,,,,,,,,,...,,,,,,,,,,
DER_mass_transverse_met_lep,,,1.0,,,,,,,,...,,,,,,,,,,
DER_mass_vis,,,,1.0,,,,,,,...,,,,,,,,,,
DER_pt_h,,,,,1.0,,,,,,...,,0.784183,,,,,,,,0.808229
DER_deltaeta_jet_jet,,,,,,1.0,0.945823,0.999981,,,...,,,0.868103,,,,0.999318,0.999995,0.999996,0.711387
DER_mass_jet_jet,,,,,,0.945823,1.0,0.944216,,,...,,,0.8144,,,,0.947485,0.945285,0.945289,0.718618
DER_prodeta_jet_jet,,,,,,0.999981,0.944216,1.0,,,...,,,0.868327,,,,0.999319,0.999988,0.999989,0.711583
DER_deltar_tau_lep,,,,,,,,,1.0,,...,,,,,,,,,,
DER_pt_tot,,,,,,,,,,1.0,...,,,,,,,,,,


In [54]:
c[c < -0.5]

Unnamed: 0,Id,DER_mass_MMC,DER_mass_transverse_met_lep,DER_mass_vis,DER_pt_h,DER_deltaeta_jet_jet,DER_mass_jet_jet,DER_prodeta_jet_jet,DER_deltar_tau_lep,DER_pt_tot,...,PRI_met_phi,PRI_met_sumet,PRI_jet_num,PRI_jet_leading_pt,PRI_jet_leading_eta,PRI_jet_leading_phi,PRI_jet_subleading_pt,PRI_jet_subleading_eta,PRI_jet_subleading_phi,PRI_jet_all_pt
Id,,,,,,,,,,,...,,,,,,,,,,
DER_mass_MMC,,,,,,,,,,,...,,,,,,,,,,
DER_mass_transverse_met_lep,,,,,,,,,,,...,,,,,,,,,,
DER_mass_vis,,,,,,,,,,,...,,,,,,,,,,
DER_pt_h,,,,,,,,,-0.543396,,...,,,,,,,,,,
DER_deltaeta_jet_jet,,,,,,,,,,,...,,,,,,,,,,
DER_mass_jet_jet,,,,,,,,,,,...,,,,,,,,,,
DER_prodeta_jet_jet,,,,,,,,,,,...,,,,,,,,,,
DER_deltar_tau_lep,,,,,-0.543396,,,,,,...,,,,,,,,,,
DER_pt_tot,,,,,,,,,,,...,,,,,,,,,,
