In [80]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [81]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [82]:
from implementations import *

## Do your thing crazy machine learning thing here :) ...

In [83]:
# Separate the data according to the value of column 24 (PRI_jet_num) 

def separate(y, tX, ids):
    
    split_x = []
    split_y = []
    split_ids = []
    
    jet_column_nbr = 22
    
    for i in range(4):
        
        split_x.append(tX[np.where(tX[:,jet_column_nbr] == i)])
        split_y.append(y[np.where(tX[:,jet_column_nbr] == i)])
        split_ids.append(ids[np.where(tX[:,jet_column_nbr] == i)])
    
    
    
    return split_x, split_y, split_ids

In [84]:
split_x, split_y, split_ids = separate(y, tX, ids)

In [85]:
#remove the columns from each set of data given a boolean array

def removeNone(data, selection):
   
    cleaned=[]
    
    for i in range(4):
        curr_data = data[i]
        
        cleaned.append(curr_data[:,selection[i]])
      
    return cleaned
    
    

In [86]:
#print statistics about the None values (-999) for each columns
#returns a boolean array that can be used to filter the columns that have 100% of undefined values (-999)
def dataStatistics(data):
    
    stats=[]
    
    for i in range(4):
        
        print("Statistics ")
        print("Type :")
        print(i)
        
        
        nones = (data[i] == -999)
    
        mean = np.sum(nones, axis=0)/nones.shape[0]
        print(mean) 
        stats.append(mean != 1)
    
    return stats
    

In [87]:
selection = dataStatistics(split_x)

Statistics 
Type :
0
[0.26145747 0.         0.         0.         1.         1.
 1.         0.         0.         0.         0.         0.
 1.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         1.
 1.         1.         1.         1.         1.         0.        ]
Statistics 
Type :
1
[0.09751883 0.         0.         0.         1.         1.
 1.         0.         0.         0.         0.         0.
 1.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         1.         1.         1.         0.        ]
Statistics 
Type :
2
[0.05859584 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.        ]
Statistics 
Type :
3
[0.0

In [88]:
cleaned = removeNone(split_x, selection)



# Now we can either drop the lines with residual Nones or replace the Nones by the median value

In [89]:
#replace the value of column 0 (can be None sometimes) by the median value of this column

def putMedianInsteadOfNone(cleaned):
    
    completed_data = []
    
    for i in range(4):
        #current PRI_jet_num
        current = cleaned[i]
        
        median = np.median(current[np.where(current[:,0] != -999)], axis = 0)
        
        #replace -999 by median value
        current[np.where(current[:,0] == -999)] = median
        
        completed_data.append(current)
    
    
    return completed_data
    
    

In [203]:
cleaned_with_median = putMedianInsteadOfNone(cleaned)



In [94]:
#Instead of putting the median we can simply drop the data where columns 0 == -999
def dropLineIfNone(cleaned, split_y, split_ids):
    
    res_x=[]
    res_y=[]
    res_ids=[]
    
    for i in range(4):
        
        current = cleaned[i]
        
        drop_indexes = np.where(current[:,0] != -999)
        
        res_x.append(current[drop_indexes])
        res_y.append(current[drop_indexes])
        res_ids.append(current[drop_indexes])
        
    return res_x, res_y, res_ids

In [14]:
dropped_x, dropped_y, dropped_ids = dropLineIfNone(cleaned, split_y, split_ids)


## At this point, the first values in each of the split data has a PRI_jet_num = 0, then 1 and so on. The data is clean and we can work with it.

In [91]:
initial_w = np.zeros(cleaned_with_median[0].shape[1])
median_cleaned_x_0_lsq_GD = least_squares_GD(split_y[0], normalize(cleaned_with_median[0]), initial_w, 500, 0.1)

In [92]:
print(median_cleaned_x_0_lsq_GD)

(array([ 0.2155689 , -0.53554557, -0.40401874,  0.03959931,  0.08147027,
        0.03959934,  0.16148251,  0.03462872,  0.10957412,  0.5842968 ,
        0.05218412,  0.04930115, -0.04715568,  0.05726908,  0.0508691 ,
       -0.07238437,  0.03803752, -0.00064042,  0.05024209,  0.05024209]), 0.29714565802617193)


In [93]:
median_cleaned_x_0_lsq_SGD = least_squares_SGD(split_y[0], normalize(cleaned_with_median[0]), initial_w, 500, 0.001)

In [94]:
print(median_cleaned_x_0_lsq_SGD)

(array([-0.04664514, -0.08499222, -0.03314077,  0.01911169,  0.03011799,
        0.01911165, -0.01748823,  0.02998163,  0.03474344,  0.03542557,
        0.0327547 ,  0.03212463, -0.01669425,  0.03315438,  0.03390444,
       -0.01023011,  0.03346313, -0.03834954,  0.03278724,  0.03278724]), 0.36790483932056484)


In [95]:
median_cleaned_x_0_lsq = least_squares(split_y[0], normalize(cleaned_with_median[0]))

In [96]:
print(median_cleaned_x_0_lsq)

(array([ 1.19348797e-01, -3.20910781e-01, -7.11010616e-01, -3.61745390e+03,
        2.92434098e+01,  3.61754007e+03, -7.97192079e+00, -1.48451124e+01,
       -9.21399265e-01,  8.74154018e+00,  1.22199385e-02,  9.00441921e-03,
        8.51383385e+00,  1.42330751e-01,  1.82847018e-02, -8.31032487e-02,
       -1.72379531e-01,  3.24838123e-02, -9.72704906e+00, -9.72704906e+00]), 0.2599109628362648)


In [97]:
median_cleaned_x_0_ridge = ridge_regression(split_y[0], normalize(cleaned_with_median[0]), 0.037)

In [98]:
print(median_cleaned_x_0_ridge)

(array([ 0.06227524, -0.46079671, -0.13201699,  0.0411997 ,  0.05966671,
        0.04119971,  0.1212882 ,  0.04336166,  0.07570685,  0.32090765,
        0.05564915,  0.05486386, -0.05369323,  0.05732194,  0.05516295,
       -0.06514523,  0.05211816,  0.00402923,  0.05500482,  0.05500482]), 0.3046039439677196)


In [99]:
median_cleaned_x_0_log = logistic_regression(split_y[0], normalize(cleaned_with_median[0]), initial_w, 10000, 0.01)

  return 1.0/(1 + np.exp(-z))


KeyboardInterrupt: 

In [None]:
print(median_cleaned_x_0_log)

## Features Expension

In [204]:
def build_poly(x, degree):
   
    x_extended = np.ones(shape=x.shape)

    for d in range (1, degree +1):
        x_extended = np.c_[x_extended, x**d]
        

    return x_extended

In [194]:
a = np.array([[1,2,3,4],[5,6,7,8]])
build_poly(a, 2)

array([[ 1.,  1.,  1.,  1.,  1.,  2.,  3.,  4.,  1.,  4.,  9., 16.],
       [ 1.,  1.,  1.,  1.,  5.,  6.,  7.,  8., 25., 36., 49., 64.]])

## Cross Validation

In [205]:
#method to split the training set into a (new) training set and a test set (same as in lab03)

def split_data(x, y, ratio, seed=1):
    """
    split the dataset based on the split ratio. If ratio is 0.8 
    you will have 80% of your data set dedicated to training 
    and the rest dedicated to testing
    """
    # set seed
    np.random.seed(seed)
 
    # split the data based on the given ratio

    training_nbr = int(x.shape[0] * ratio)
    indexes = np.random.choice(x.shape[0],training_nbr, replace=False)
    
    x_train = x[indexes]
    y_train = y[indexes]
    x_test = np.delete(x, indexes, axis = 0)
    y_test = np.delete(y, indexes, axis = 0)
    
    
    return x_train, y_train, x_test, y_test

In [210]:
#perform cross-validation 

def crossValidation(x, y, splitRatio, degrees, seed =1):
    
    x_train, y_train, x_test, y_test = split_data(x, y, splitRatio, seed)
    
    a_training = []
    a_testing = []
    weights = []
    degr = []
    
    # define parameter (just add more for loops if there are more parameters for the model)
    lambdas = np.arange(0,0.03,0.0005)
    
    for ind, lambda_ in enumerate(lambdas):
        
        for ind_d, d in enumerate(degrees):
            
            
            #perform polynomial feature expension
            x_test_poly = build_poly(x_test,d)
            x_train_poly = build_poly(x_train, d)
            
            #normalize data (DANGER: the test set must be normalized with the training set's mean and std)
            mean = np.mean(x_train_poly)
            std = np.std(x_train_poly)
            
            x_train_ready = (x_train_poly - mean) / std
            x_test_ready = (x_test_poly - mean) / std
           
            
            #train model with these parameters and compute the error (MSE or RMSE)
        
            #ideal : lambdas = np.arange(0,0.03,0.001)
            #w_star, e_tr = ridge_regression(y_train,x_train_ready, lambda_)
        
            #ideal : lambdas = np.arange(0,0.3,0.1)
            #w_star, e_tr = logistic_regression(y_train, x_train_ready,np.ones(x_train_ready.shape[1])  ,400, lambda_)
        
            #don't usel least squares with lambda bigger than 0.35 ideal: lambdas = np.arange(0.001,0.13,0.01)
            #w_star, e_tr = least_squares_GD(y_train, x_train,np.ones(x_train.shape[1])  ,400, lambda_)    
            #w_star, e_tr = least_squares_SGD(y_train, x_train,np.ones(x_train.shape[1])  ,400, lambda_)
        
            #DON'T REALLY NEED TO DO CROSS VALIDATION FOR THIS ONE ;) BUT PRACTICAL TO RUN IT HERE
            w_star, e_tr = least_squares(y_train, x_train_ready)  
        
            degr.append(d)
        
            #compare the prediction with the reality
            accuracy_training = np.count_nonzero(predict_labels(w_star, x_train_ready) + y_train)/len(y_train)
            accuracy_testing = np.count_nonzero(predict_labels(w_star, x_test_ready) + y_test)/len(y_test)
        
            a_training.append(accuracy_training)
            a_testing.append(accuracy_testing)
            weights.append(w_star)
            print("lambda={l:.5f},degree={deg}, Training Accuracy={tr}, Testing Accuracy={te}".format(
                   l=lambda_, tr=a_training[ind*len(degrees)+ind_d], te=a_testing[ind*len(degrees)+ind_d], deg=d))
        
            #plt.plot(lambdas, a_training,'r--' , lambdas, a_testing, 'g--')
            #plt.show
    
    return weights[np.argmax(a_testing)], degr[np.argmax(a_testing)], a_testing[np.argmax(a_testing)]

In [211]:
degrees = [1,2,3,5,7,12]
crossValidation(cleaned_with_median[0], split_y[0], 0.9, degrees ,6)

lambda=0.00000,degree=1, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.00000,degree=2, Training Accuracy=0.8282492410004337, Testing Accuracy=0.8347678142514011
lambda=0.00000,degree=3, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00000,degree=5, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.00000,degree=7, Training Accuracy=0.7445201899445069, Testing Accuracy=0.747898318654924
lambda=0.00000,degree=12, Training Accuracy=0.744531310817273, Testing Accuracy=0.747898318654924
lambda=0.00050,degree=1, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.00050,degree=2, Training Accuracy=0.8282492410004337, Testing Accuracy=0.8347678142514011
lambda=0.00050,degree=3, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00050,degree=5, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652


KeyboardInterrupt: 

In [207]:
def put_together(labels, indices):
    
    #First build first chunk
    ids_0 = np.matrix(indices[0]).T
    lab_0 = np.matrix(labels[0]).T
    
    unsorted_res = np.concatenate((ids_0, lab_0), axis=1)
    
    for i in range(1,len(labels)):
        ids = np.matrix(indices[i]).T
        lab = np.matrix(labels[i]).T
        by_jet_num = np.concatenate((ids, lab), axis=1)
        unsorted_res = np.concatenate((unsorted_res, by_jet_num), axis=0)
        
    sorted_res = unsorted_res[unsorted_res[:,0].argsort()]
    
    return sorted_res[:,0,1]

In [135]:
a = np.matrix(split_y[0])
b = np.matrix(split_ids[0])
print(a.shape)
print(b.T.shape)
c = np.concatenate((b.T, a), axis=1)

a2 = np.matrix(split_y[1])
b2 = np.matrix(split_ids[1])
print(a2.shape)
print(b2.T.shape)
c2 = np.concatenate((b2.T, a2), axis=1)
d = np.concatenate((c, c2), axis=0)
print(d.shape)
print(d)

d = d[d[:,0].argsort()]
e = d[:,0,:]
print(e)
print(e.shape)

(99913, 1)
(99913, 1)
(77544, 1)
(77544, 1)
(177457, 2)
[[ 1.00003e+05 -1.00000e+00]
 [ 1.00004e+05 -1.00000e+00]
 [ 1.00008e+05 -1.00000e+00]
 ...
 [ 3.49989e+05 -1.00000e+00]
 [ 3.49991e+05  1.00000e+00]
 [ 3.49997e+05  1.00000e+00]]
[[ 1.00003e+05 -1.00000e+00]
 [ 1.00003e+05 -1.00000e+00]
 [ 1.00003e+05 -1.00000e+00]
 ...
 [ 1.00003e+05 -1.00000e+00]
 [ 1.00003e+05 -1.00000e+00]
 [ 1.00003e+05 -1.00000e+00]]
(177457, 2)


## Generate predictions and save ouput in csv format for submission:

In [103]:
DATA_TEST_PATH = '../data/test.csv' 
y_donotUse, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [208]:
#separate data with respect to column 24 and remove None
split_x_test, _, split_ids_test =  separate(y_donotUse, tX_test, ids_test)


split_x_cleaned_test = removeNone(split_x_test, dataStatistics(split_x_test))

#median instead of None
split_x_with_median = putMedianInsteadOfNone(split_x_cleaned_test)

#line dropped when None
#split_x_drop_lines, split_y_dropped_split_indexes_dropped = dropLineIfNone(split_x_cleaned_test, _, split_ids_test)

#degrees for polynomial feature expension
degrees = [1,2,3,5]

y_res = []

for i in range(4):
    #training
    w_star, d, accuracy = crossValidation(cleaned_with_median[i], split_y[i], 0.9, degrees ,6)
    
    #prediction
    y_res.append(predict_labels(w_star, normalize(build_poly(split_x_with_median[i], d))))





Statistics 
Type :
0
[0.2605448 0.        0.        0.        1.        1.        1.
 0.        0.        0.        0.        0.        1.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        1.        1.        1.        1.        1.
 1.        0.       ]
Statistics 
Type :
1
[0.09834149 0.         0.         0.         1.         1.
 1.         0.         0.         0.         0.         0.
 1.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         1.         1.         1.         0.        ]
Statistics 
Type :
2
[0.05881481 0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.        ]
Statistics 
Type :
3
[0.06376737 0.         0.     

lambda=0.00850,degree=3, Training Accuracy=0.8282492410004337, Testing Accuracy=0.8347678142514011
lambda=0.00850,degree=5, Training Accuracy=0.8282492410004337, Testing Accuracy=0.8347678142514011
lambda=0.00900,degree=1, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00900,degree=2, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00900,degree=3, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00900,degree=5, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.00950,degree=1, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.00950,degree=2, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.00950,degree=3, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.00950,degree=5, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.0

lambda=0.01900,degree=3, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.01900,degree=5, Training Accuracy=0.8011587949422271, Testing Accuracy=0.8096477181745396
lambda=0.01950,degree=1, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.01950,degree=2, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.01950,degree=3, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.01950,degree=5, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.02000,degree=1, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.02000,degree=2, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.02000,degree=3, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.02000,degree=5, Training Accuracy=0.8244570233871954, Testing Accuracy=0.8282626100880705
lambda=0.0

lambda=0.02950,degree=3, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.02950,degree=5, Training Accuracy=0.7441754428887579, Testing Accuracy=0.7471977582065652
lambda=0.00000,degree=1, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.00000,degree=2, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.00000,degree=3, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.00000,degree=5, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.00050,degree=1, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.00050,degree=2, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.00050,degree=3, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.00050,degree=5, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.0

lambda=0.01000,degree=3, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.01000,degree=5, Training Accuracy=0.7147258163894024, Testing Accuracy=0.7120567375886525
lambda=0.01050,degree=1, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.01050,degree=2, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.01050,degree=3, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.01050,degree=5, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.01100,degree=1, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.01100,degree=2, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.01100,degree=3, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.01100,degree=5, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.0

lambda=0.02050,degree=3, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.02050,degree=5, Training Accuracy=0.7607216036911261, Testing Accuracy=0.7595099935525468
lambda=0.02100,degree=1, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.02100,degree=2, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.02100,degree=3, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.02100,degree=5, Training Accuracy=0.7285245525799194, Testing Accuracy=0.7218568665377176
lambda=0.02150,degree=1, Training Accuracy=0.6457178065311152, Testing Accuracy=0.6520954223081883
lambda=0.02150,degree=2, Training Accuracy=0.6457178065311152, Testing Accuracy=0.6520954223081883
lambda=0.02150,degree=3, Training Accuracy=0.6457178065311152, Testing Accuracy=0.6520954223081883
lambda=0.02150,degree=5, Training Accuracy=0.6457178065311152, Testing Accuracy=0.6520954223081883
lambda=0.0

lambda=0.00100,degree=3, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.00100,degree=5, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.00150,degree=1, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.00150,degree=2, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.00150,degree=3, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.00150,degree=5, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.00200,degree=1, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.00200,degree=2, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.00200,degree=3, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.00200,degree=5, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.00250,d

lambda=0.01150,degree=3, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.01150,degree=5, Training Accuracy=0.6182042742771443, Testing Accuracy=0.6073838824930528
lambda=0.01200,degree=1, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.01200,degree=2, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.01200,degree=3, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.01200,degree=5, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.01250,degree=1, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.01250,degree=2, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.01250,degree=3, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.01250,degree=5, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.01300

lambda=0.02200,degree=3, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.02200,degree=5, Training Accuracy=0.7369488983480735, Testing Accuracy=0.722707423580786
lambda=0.02250,degree=1, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.02250,degree=2, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.02250,degree=3, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.02250,degree=5, Training Accuracy=0.7586952206612118, Testing Accuracy=0.7489082969432315
lambda=0.02300,degree=1, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.02300,degree=2, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.02300,degree=3, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.02300,degree=5, Training Accuracy=0.6787896164619219, Testing Accuracy=0.667923779277491
lambda=0.02350,d

lambda=0.00250,degree=5, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.00300,degree=1, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.00300,degree=2, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.00300,degree=3, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.00300,degree=5, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.00350,degree=1, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.00350,degree=2, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.00350,degree=3, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.00350,degree=5, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.00400,degree=1, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.00

lambda=0.01300,degree=5, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.01350,degree=1, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.01350,degree=2, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.01350,degree=3, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.01350,degree=5, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.01400,degree=1, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.01400,degree=2, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.01400,degree=3, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.01400,degree=5, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.01450,degree=1, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.01

lambda=0.02350,degree=5, Training Accuracy=0.7035644457813205, Testing Accuracy=0.7176364456472711
lambda=0.02400,degree=1, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.02400,degree=2, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.02400,degree=3, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.02400,degree=5, Training Accuracy=0.7264250263197474, Testing Accuracy=0.7392873252142534
lambda=0.02450,degree=1, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.02450,degree=2, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.02450,degree=3, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.02450,degree=5, Training Accuracy=0.761417757056199, Testing Accuracy=0.7753721244925575
lambda=0.02500,degree=1, Training Accuracy=0.7141926104176066, Testing Accuracy=0.7298150654036987
lambda=0.02500

In [209]:
OUTPUT_PATH = '../data/submission.csv'
#y_pred = predict_labels(weights, tX_test)
y_pred = put_together(y_res, split_ids_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)