In [97]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import sys
from implementations import * 
from proj1_helpers import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [31]:
DATA_TRAIN_PATH = '../data/train.csv'
y_train, x_train, ids_train = load_csv_data(DATA_TRAIN_PATH)
print('y_train:\n',y_train.shape)
print('x_train:\n', x_train.shape)
print('ids_train:\n',ids_train.shape)

y_train:
 (250000,)
x_train:
 (250000, 30)
ids_train:
 (250000,)


In [19]:
DATA_TEST_PATH = '../data/test.csv'
y_test, x_test, ids_test = load_csv_data(DATA_TEST_PATH)
print('y_test:\n', y_test.shape)
print('x_test:\n', x_test.shape)
print('ids_test:\n',ids_test.shape)

y_test:
 (568238,)
x_test:
 (568238, 30)
ids_test:
 (568238,)


## Do your thing crazy machine learning thing here :) ...

We noticed that some data was missing --> replace thoses missing values by the mean of its corresponding column and normalize data

In [43]:
def preprocess_data(y, x):
    print("col n°1 first 6 values, before preprocess:", x[:, 0][:6])
    MISSING_DATA = -999
    replacement_values_by_col = np.ma.array(x, mask=[x==MISSING_DATA]).mean(axis=0)
    replacement_values = np.tile(replacement_values_by_col, (len(x), 1))
    x2= x.copy()
    x2[x == MISSING_DATA] = replacement_values[x == MISSING_DATA]
    print("col n°1 first 6 values, after preprocess: ", x2[:, 0][:6])
    standardized_x, _, _ = standardize(x2)
    new_y, new_x = build_model_data(y, standardized_x)
    print("col n°2 first 6 values, after standardization: ", new_x[:, 1][:6], '\n')
    return new_y, new_x

new_y_train, new_tx_train = preprocess_data(y_train, x_train)
new_y_test, new_tx_test = preprocess_data(y_test, x_test)

col n°1 first 6 values, before preprocess: [ 138.47   160.937 -999.     143.905  175.864   89.744]
col n°1 first 6 values, after preprocess:  [138.47       160.937      121.85852836 143.905      175.864
  89.744     ]
col n°2 first 6 values, after standardization:  [ 3.14910656e-01  7.40827026e-01 -1.00190288e-12  4.17944237e-01
  1.02380444e+00 -6.08808624e-01] 

col n°1 first 6 values, before preprocess: [-999.     106.398  117.794  135.861   74.159   95.709]
col n°1 first 6 values, after preprocess:  [121.87172934 106.398      117.794      135.861       74.159
  95.709     ]
col n°2 first 6 values, after standardization:  [-2.25397884e-12 -2.95946335e-01 -7.79895413e-02  2.67554982e-01
 -9.12540672e-01 -5.00381239e-01] 



#### Gradient Descent with polynomial & linear regression, different lambdas

In [85]:
def build_poly(x, degree):
    """polynomial basis functions for input data x, for j=0 up to j=degree."""
    poly_matrix = np.ones(len(x))
    for n in range(1, degree+1):
        poly_matrix = np.c_[poly_matrix, x**n] 
    return poly_matrix

def split_data(x, y, train_ratio, seed=1):
    """ Split the dataset based on the split ratio """
    # set seed
    np.random.seed(seed)
    indices = np.random.permutation(len(x))
    end = int(train_ratio*len(x))
    x_tr = x[indices[:end]]
    x_te = x[indices[end:]]
    y_tr = y[indices[:end]]
    y_te = y[indices[end:]]
    return x_tr, x_te, y_tr, y_te

In [86]:
def find_best_param_GD1(y, x, gammas, degrees, seed, max_iters):
    best_losses = []
    best_gammas = []
    for deg in degrees:
        loss_gam_deg = []
        for gam in gammas:
            print('\n\nDeg:', deg, '  Gam:', gam)
            initial_w = np.zeros(x.shape[1], dtype=np.float64)
            x_tr, _, y_tr, _ = split_data(x, y, 0.75)
            px_tr = build_poly(x_tr, deg)
            initial_w = np.zeros(px_tr.shape[1], dtype=np.float64)
            loss, _ = least_squares_GD(y_tr, px_tr, initial_w, max_iters, gam)
            loss_gam_deg.append(loss)
            
        index = np.nanargmin(loss_gam_deg)
        best_gam_for_deg = gammas[index]
        best_loss_for_deg = loss_gam_deg[index]
        best_gammas.append(best_gam_for_deg)
        best_losses.append(best_loss_for_deg)
    best_index = np.nanargmin(best_losses)
    best_deg = degrees[best_index]
    best_gamma = best_gammas[best_index]

    return best_deg, best_gamma

best_deg, best_gamma = find_best_param_GD1(new_y_train, new_tx_train, 
    [0.0001, 0.01, 0.1, 0.2, 0.3], range(1,4), 1, 50)



Deg: 1   Gam: 0.0001
GD(0/49): loss=0.5, w0=-3.15679999999892e-05, w1=-3.15679999999892e-05
GD(1/49): loss=0.49993147779237457, w0=-6.312968357254305e-05, w1=-6.312968357254305e-05
GD(2/49): loss=0.4998629986351137, w0=-9.468505198042141e-05, w1=-9.468505198042141e-05
GD(3/49): loss=0.4997945624924503, w0=-0.00012623410648614222, w1=-0.00012623410648614222
GD(4/49): loss=0.4997261693286541, w0=-0.00015777684835197172, w1=-0.00015777684835197172
GD(5/49): loss=0.499657819108031, w0=-0.00018931327883992485, w1=-0.00018931327883992485
GD(6/49): loss=0.4995895117949235, w0=-0.00022084339921176429, w1=-0.00022084339921176429
GD(7/49): loss=0.4995212473537103, w0=-0.00025236721072900155, w1=-0.00025236721072900155
GD(8/49): loss=0.4994530257488068, w0=-0.00028388471465289674, w1=-0.00028388471465289674
GD(9/49): loss=0.4993848469446641, w0=-0.0003153959122444584, w1=-0.0003153959122444584
GD(10/49): loss=0.49931671090577023, w0=-0.0003469008047644434, w1=-0.0003469008047644434
GD(11/49): l

GD(48/49): loss=0.37858066565702303, w0=-0.09916122394352105, w1=-0.09916122394352105
GD(49/49): loss=0.37783904069044083, w0=-0.10033347208357879, w1=-0.10033347208357879


Deg: 1   Gam: 0.1
GD(0/49): loss=0.5, w0=-0.0315679999999892, w1=-0.0315679999999892
GD(1/49): loss=0.44223409016035103, w0=-0.05681957255423589, w1=-0.05681957255423589
GD(2/49): loss=0.41192332802929227, w0=-0.0770174876124699, w1=-0.0770174876124699
GD(3/49): loss=0.3936204369176494, w0=-0.09317271543267791, w1=-0.09317271543267791
GD(4/49): loss=0.38193128512670405, w0=-0.10609421540545805, w1=-0.10609421540545805
GD(5/49): loss=0.3742072097436376, w0=-0.11642914978538546, w1=-0.11642914978538546
GD(6/49): loss=0.3689494842041691, w0=-0.1246951901093701, w1=-0.1246951901093701
GD(7/49): loss=0.36526251119382364, w0=-0.1313064083180594, w1=-0.1313064083180594
GD(8/49): loss=0.3625957555607111, w0=-0.1365940037208164, w1=-0.1365940037208164
GD(9/49): loss=0.3606039288019567, w0=-0.14082288758448538, w1=-0.1408228

GD(0/49): loss=0.5, w0=-0.09470399999996759, w1=-0.09470399999996759
GD(1/49): loss=0.391304659584782, w0=-0.13256015298818663, w1=-0.13256015298818663
GD(2/49): loss=0.3720090242473306, w0=-0.14766324760766825, w1=-0.14766324760766825
GD(3/49): loss=0.3650733588715465, w0=-0.15371229746918344, w1=-0.15371229746918344
GD(4/49): loss=0.36114825685871604, w0=-0.1561092254726643, w1=-0.1561092254726643
GD(5/49): loss=0.3583208342156432, w0=-0.15707730532992542, w1=-0.15707730532992542
GD(6/49): loss=0.35608331066085686, w0=-0.15744679513721505, w1=-0.15744679513721505
GD(7/49): loss=0.3542421834848074, w0=-0.1576020106107618, w1=-0.1576020106107618
GD(8/49): loss=0.3526958137170244, w0=-0.15764930728090168, w1=-0.15764930728090168
GD(9/49): loss=0.3513795491658346, w0=-0.15767380519184374, w1=-0.15767380519184374
GD(10/49): loss=0.3502479706301125, w0=-0.15767116934265965, w1=-0.15767116934265965
GD(11/49): loss=0.3492673624610607, w0=-0.1576742386541346, w1=-0.1576742386541346
GD(12/49):

GD(49/49): loss=0.4827338813207113, w0=-0.0014996870647340755, w1=-0.0014996870647340755


Deg: 2   Gam: 0.01
GD(0/49): loss=0.5, w0=-0.00315679999999892, w1=-0.00315679999999892
GD(1/49): loss=0.470626250015293, w0=-0.00557795264425347, w1=-0.00557795264425347
GD(2/49): loss=1.2865716710104655, w0=-0.00828395586920652, w1=-0.00828395586920652
GD(3/49): loss=210.22852818203827, w0=-0.0026502075124915075, w1=-0.0026502075124915075
GD(4/49): loss=52689.724051394114, w0=-0.12474604426711687, w1=-0.12474604426711687
GD(5/49): loss=13233117.905555472, w0=1.7809873943338086, w1=1.7809873943338086
GD(6/49): loss=3323548021.232267, w0=-28.446737696454825, w1=-28.446737696454825
GD(7/49): loss=834721784999.7997, w0=450.5740648262121, w1=450.5740648262121
GD(8/49): loss=209643565848460.3, w0=-7140.88842558582, w1=-7140.88842558582
GD(9/49): loss=5.265278262946975e+16, w0=113167.32535143368, w1=113167.32535143368
GD(10/49): loss=1.322394754833658e+19, w0=-1793457.2107160178, w1=-1793457.210716017

GD(42/49): loss=8.599795984804636e+181, w0=-4.833294580139869e+88, w1=-4.833294580139869e+88
GD(43/49): loss=2.4121560970224703e+186, w0=8.094721203752435e+90, w1=8.094721203752435e+90
GD(44/49): loss=6.765854732697764e+190, w0=-1.3556904153063785e+93, w1=-1.3556904153063785e+93
GD(45/49): loss=1.8977540599663427e+195, w0=2.2704877115491303e+95, w1=2.2704877115491303e+95
GD(46/49): loss=5.323008864960523e+199, w0=-3.802574975888174e+97, w1=-3.802574975888174e+97
GD(47/49): loss=1.493050336404065e+204, w0=6.368489190097967e+99, w1=6.368489190097967e+99
GD(48/49): loss=4.187855710161015e+208, w0=-1.0665839548613193e+102, w1=-1.0665839548613193e+102
GD(49/49): loss=1.1746513176084556e+213, w0=1.7862970302852695e+104, w1=1.7862970302852695e+104


Deg: 2   Gam: 0.2
GD(0/49): loss=0.5, w0=-0.0631359999999784, w1=-0.0631359999999784
GD(1/49): loss=5.444909396294918, w0=0.16798694229820638, w1=0.16798694229820638
GD(2/49): loss=169796.999304659, w0=-7.47061466465756, w1=-7.47061466465756
GD(3/

GD(37/49): loss=1.3475624799697042e+195, w0=5.762623265692504e+95, w1=5.762623265692504e+95
GD(38/49): loss=3.4289356474999256e+200, w0=-2.906868865542991e+98, w1=-2.906868865542991e+98
GD(39/49): loss=8.725086850859932e+205, w0=1.466326395440285e+101, w1=1.466326395440285e+101
GD(40/49): loss=2.220139086324158e+211, w0=-7.396663549056504e+103, w1=-7.396663549056504e+103
GD(41/49): loss=5.649247562662905e+216, w0=3.7311359754601645e+106, w1=3.7311359754601645e+106
GD(42/49): loss=1.4374774184572551e+222, w0=-1.882115574818732e+109, w1=-1.882115574818732e+109
GD(43/49): loss=3.657728406578333e+227, w0=9.494049694981518e+111, w1=9.494049694981518e+111
GD(44/49): loss=9.307260708587836e+232, w0=-4.7891309554387923e+114, w1=-4.7891309554387923e+114
GD(45/49): loss=2.3682759425721934e+238, w0=2.415805272271281e+117, w1=2.415805272271281e+117
GD(46/49): loss=6.02618870984353e+243, w0=-1.2186167318949377e+120, w1=-1.2186167318949377e+120
GD(47/49): loss=1.5333918532822722e+249, w0=6.147129308

  return 1/2*np.mean(e**2)
  ret = umr_sum(arr, axis, dtype, out, keepdims)


GD(48/49): loss=inf, w0=8.562025539886536e+152, w1=8.562025539886536e+152
GD(49/49): loss=inf, w0=-2.0418557327816897e+156, w1=-2.0418557327816897e+156


Deg: 3   Gam: 0.01
GD(0/49): loss=0.5, w0=-0.00315679999999892, w1=-0.00315679999999892
GD(1/49): loss=136537.49922505795, w0=-0.01726188014891997, w1=-0.01726188014891997
GD(2/49): loss=7768955953633415.0, w0=3718.1216450737807, w1=3718.1216450737807
GD(3/49): loss=4.422015222053254e+26, w0=-887387366.5206829, w1=-887387366.5206829
GD(4/49): loss=2.5169686571333793e+37, w0=211710427219766.22, w1=211710427219766.22
GD(5/49): loss=1.4326344218349974e+48, w0=-5.050926350824964e+19, w1=-5.050926350824964e+19
GD(6/49): loss=8.154417739011835e+58, w0=1.2050354495110406e+25, w1=1.2050354495110406e+25
GD(7/49): loss=4.64141637593349e+69, w0=-2.8749388403566365e+30, w1=-2.8749388403566365e+30
GD(8/49): loss=2.6418496898583e+80, w0=6.858946215353942e+35, w1=6.858946215353942e+35
GD(9/49): loss=1.503715508049101e+91, w0=-1.6363876171807662e+41,

GD(45/49): loss=inf, w0=-6.435731001381641e+280, w1=-6.435731001381641e+280
GD(46/49): loss=inf, w0=1.535423943504913e+287, w1=1.535423943504913e+287
GD(47/49): loss=inf, w0=-3.6631840047108665e+293, w1=-3.6631840047108665e+293
GD(48/49): loss=inf, w0=8.739551775998573e+299, w1=8.739551775998573e+299
GD(49/49): loss=inf, w0=-inf, w1=-inf


Deg: 3   Gam: 0.2


  error = y-(tx@w)
  gradient = -1/len(error) * tx.T@error
  return calculate_mse(y-tx@w)


GD(0/49): loss=0.5, w0=-0.0631359999999784, w1=-0.0631359999999784
GD(1/49): loss=54615500.937744334, w0=-4.505584059568402, w1=-4.505584059568402
GD(2/49): loss=1.2430527514423305e+21, w0=29745298.355169334, w1=29745298.355169334
GD(3/49): loss=2.830157358288989e+34, w0=-141984239274489.9, w1=-141984239274489.9
GD(4/49): loss=6.44364502429661e+47, w0=6.77486855498616e+20, w1=6.77486855498616e+20
GD(5/49): loss=1.4670760647826008e+61, w0=-3.2326700972606744e+27, w1=-3.2326700972606744e+27
GD(6/49): loss=3.340209107954337e+74, w0=1.5424883693728187e+34, w1=1.5424883693728187e+34
GD(7/49): loss=7.604920530493771e+87, w0=-7.360077886277432e+40, w1=-7.360077886277432e+40
GD(8/49): loss=1.731472922979473e+101, w0=3.5119063175883764e+47, w1=3.5119063175883764e+47
GD(9/49): loss=3.9421825264182293e+114, w0=-1.675727645017492e+54, w1=-1.675727645017492e+54
GD(10/49): loss=8.975481432799173e+127, w0=7.99583726425867e+60, w1=7.99583726425867e+60
GD(11/49): loss=2.0435194568151915e+141, w0=-3.815

  error = y-(tx@w)
  return calculate_mse(y-tx@w)


GD(48/49): loss=nan, w0=nan, w1=nan
GD(49/49): loss=nan, w0=nan, w1=nan


In [110]:
print('Best degree:', best_deg, '  Best gamma:', best_gamma)

Best degree: 1   Best gamma: 0.3


In [113]:
x_tr, x_te, y_tr, y_te = split_data(new_tx_train, new_y_train, 0.75)
px_tr = build_poly(x_tr, best_deg)
px_te = build_poly(x_te, best_deg)
initial_w = np.zeros(px_tr.shape[1], dtype=np.float64)
gd_loss1, gd_weights1 = least_squares_GD(y_tr, px_tr, initial_w, 100, best_gamma)

GD(0/99): loss=0.5, w0=-0.09470399999996759, w1=-0.09470399999996759
GD(1/99): loss=0.391304659584782, w0=-0.13256015298818663, w1=-0.13256015298818663
GD(2/99): loss=0.3720090242473306, w0=-0.14766324760766825, w1=-0.14766324760766825
GD(3/99): loss=0.3650733588715465, w0=-0.15371229746918344, w1=-0.15371229746918344
GD(4/99): loss=0.36114825685871604, w0=-0.1561092254726643, w1=-0.1561092254726643
GD(5/99): loss=0.3583208342156432, w0=-0.15707730532992542, w1=-0.15707730532992542
GD(6/99): loss=0.35608331066085686, w0=-0.15744679513721505, w1=-0.15744679513721505
GD(7/99): loss=0.3542421834848074, w0=-0.1576020106107618, w1=-0.1576020106107618
GD(8/99): loss=0.3526958137170244, w0=-0.15764930728090168, w1=-0.15764930728090168
GD(9/99): loss=0.3513795491658346, w0=-0.15767380519184374, w1=-0.15767380519184374
GD(10/99): loss=0.3502479706301125, w0=-0.15767116934265965, w1=-0.15767116934265965
GD(11/99): loss=0.3492673624610607, w0=-0.1576742386541346, w1=-0.1576742386541346
GD(12/99):

In [114]:
y_validation = predict_labels(gd_weights1, px_te)
accuracy = sum(y_validation == y_te)/len(y_te)
print('Accuracy for GD:', accuracy)

Accuracy for GD: 0.743184


#### SGD

In [118]:
def find_best_param_SGD(y, x, gammas, degrees, seed, max_iters, batch_size):
    best_losses = []
    best_gammas = []
    for deg in degrees:
        loss_gam_deg = []
        for gam in gammas:
            print('\n\nDeg:', deg, '  Gam:', gam)
            initial_w = np.zeros(x.shape[1], dtype=np.float64)
            x_tr, _, y_tr, _ = split_data(x, y, 0.75)
            px_tr = build_poly(x_tr, deg)
            initial_w = np.zeros(px_tr.shape[1], dtype=np.float64)
            loss, _ = least_squares_SGD(y_tr, px_tr, initial_w, batch_size, max_iters, gam)
            loss_gam_deg.append(loss)
            
        index = np.nanargmin(loss_gam_deg)
        best_gam_for_deg = gammas[index]
        best_loss_for_deg = loss_gam_deg[index]
        best_gammas.append(best_gam_for_deg)
        best_losses.append(best_loss_for_deg)
    best_index = np.nanargmin(best_losses)
    best_deg = degrees[best_index]
    best_gamma = best_gammas[best_index]

    return best_deg, best_gamma

best_deg, best_gamma = find_best_param_SGD(new_y_train, new_tx_train, 
    [0.0001, 0.01, 0.1, 0.2, 0.3], range(1,4), 1, 50, batch_size = int(np.floor(x_tr.shape[0] / 100)))



Deg: 1   Gam: 0.0001
SGD(0/49): loss=0.5, w0=-3.461333333333317e-05, w1=-3.461333333333317e-05
SGD(1/49): loss=0.49992952607540336, w0=-6.569957765143918e-05, w1=-6.569957765143918e-05
SGD(2/49): loss=0.4998633965281265, w0=-9.325957175750554e-05, w1=-9.325957175750554e-05
SGD(3/49): loss=0.49979988843454465, w0=-0.00012422727379205706, w1=-0.00012422727379205706
SGD(4/49): loss=0.4997324184364033, w0=-0.00015614949246512834, w1=-0.00015614949246512834
SGD(5/49): loss=0.49966257206758935, w0=-0.00018785241613646054, w1=-0.00018785241613646054
SGD(6/49): loss=0.4995957117600503, w0=-0.00021880127481525738, w1=-0.00021880127481525738
SGD(7/49): loss=0.49952766335681564, w0=-0.000249742730892276, w1=-0.000249742730892276
SGD(8/49): loss=0.4994620986300455, w0=-0.00028174575505244246, w1=-0.00028174575505244246
SGD(9/49): loss=0.499397056599649, w0=-0.00031832672164780083, w1=-0.00031832672164780083
SGD(10/49): loss=0.49932865633319057, w0=-0.00034893444049857676, w1=-0.00034893444049857

SGD(45/49): loss=0.3807214642532077, w0=-0.09721088200799521, w1=-0.09721088200799521
SGD(46/49): loss=0.37995379410331503, w0=-0.09845364880693454, w1=-0.09845364880693454
SGD(47/49): loss=0.37914737260801584, w0=-0.09949916764168037, w1=-0.09949916764168037
SGD(48/49): loss=0.37840339580872606, w0=-0.10068088785058564, w1=-0.10068088785058564
SGD(49/49): loss=0.37769105623977695, w0=-0.10204593349468769, w1=-0.10204593349468769


Deg: 1   Gam: 0.1
SGD(0/49): loss=0.5, w0=-0.03461333333333317, w1=-0.03461333333333317
SGD(1/49): loss=0.44132316333173355, w0=-0.05861765143935462, w1=-0.05861765143935462
SGD(2/49): loss=0.41288018782268543, w0=-0.07429931809352847, w1=-0.07429931809352847
SGD(3/49): loss=0.3950751171908399, w0=-0.09040279895282781, w1=-0.09040279895282781
SGD(4/49): loss=0.3830599278569747, w0=-0.10449169456639644, w1=-0.10449169456639644
SGD(5/49): loss=0.3748598765715332, w0=-0.11550730002835491, w1=-0.11550730002835491
SGD(6/49): loss=0.36938650896021835, w0=-0.123469

SGD(45/49): loss=0.343532844619478, w0=-0.1588923524956749, w1=-0.1588923524956749
SGD(46/49): loss=0.34341302948949776, w0=-0.1586864118900948, w1=-0.1586864118900948
SGD(47/49): loss=0.34295398267303, w0=-0.15444472246151036, w1=-0.15444472246151036
SGD(48/49): loss=0.34333456743597734, w0=-0.1566510101511572, w1=-0.1566510101511572
SGD(49/49): loss=0.3426213869564804, w0=-0.16032367564032432, w1=-0.16032367564032432


Deg: 1   Gam: 0.3
SGD(0/49): loss=0.5, w0=-0.1038399999999995, w1=-0.1038399999999995
SGD(1/49): loss=0.3948228709467481, w0=-0.1333188629541926, w1=-0.1333188629541926
SGD(2/49): loss=0.3711597537474532, w0=-0.13509603559467093, w1=-0.13509603559467093
SGD(3/49): loss=0.36228395813792275, w0=-0.1470449401914815, w1=-0.1470449401914815
SGD(4/49): loss=0.35681439172139595, w0=-0.15539299095220424, w1=-0.15539299095220424
SGD(5/49): loss=0.35504825303917253, w0=-0.15698967761416793, w1=-0.15698967761416793
SGD(6/49): loss=0.3533737634607099, w0=-0.15642156064502769, w1=-

SGD(42/49): loss=0.48564227151524814, w0=-0.00131971111337866, w1=-0.00131971111337866
SGD(43/49): loss=0.4853429770256838, w0=-0.0013493565100820848, w1=-0.0013493565100820848
SGD(44/49): loss=0.4851041045364135, w0=-0.001378584317569014, w1=-0.001378584317569014
SGD(45/49): loss=0.48485250833930116, w0=-0.0014067988151803532, w1=-0.0014067988151803532
SGD(46/49): loss=0.4846231156111461, w0=-0.0014361101611029936, w1=-0.0014361101611029936
SGD(47/49): loss=0.4842826414189189, w0=-0.0014632847482626934, w1=-0.0014632847482626934
SGD(48/49): loss=0.48402717076251967, w0=-0.0014917761454702914, w1=-0.0014917761454702914
SGD(49/49): loss=0.48380425953246303, w0=-0.0015226545537140293, w1=-0.0015226545537140293


Deg: 2   Gam: 0.01
SGD(0/49): loss=0.5, w0=-0.0034613333333333167, w1=-0.0034613333333333167
SGD(1/49): loss=0.4747025369384146, w0=-0.005688940235400405, w1=-0.005688940235400405
SGD(2/49): loss=0.4565065226136758, w0=-0.007703341240198787, w1=-0.007703341240198787
SGD(3/49): lo

SGD(37/49): loss=1.0429979187580812e+76, w0=4.2688170582451634e+36, w1=4.2688170582451634e+36
SGD(38/49): loss=2.4780425896200576e+78, w0=-3.8733917585098894e+37, w1=-3.8733917585098894e+37
SGD(39/49): loss=5.400772004915543e+79, w0=2.8940485779804234e+38, w1=2.8940485779804234e+38
SGD(40/49): loss=4.308219323354366e+81, w0=-3.215979991499225e+39, w1=-3.215979991499225e+39
SGD(41/49): loss=1.4196300115343024e+85, w0=1.0808378735509703e+41, w1=1.0808378735509703e+41
SGD(42/49): loss=5.087847071165014e+87, w0=-2.2398182736823153e+42, w1=-2.2398182736823153e+42
SGD(43/49): loss=1.4453163453069313e+90, w0=3.2668193151901454e+43, w1=3.2668193151901454e+43
SGD(44/49): loss=1.2892487460738598e+92, w0=-3.7962922474202735e+44, w1=-3.7962922474202735e+44
SGD(45/49): loss=6.549012132238656e+93, w0=3.673248227276824e+45, w1=3.673248227276824e+45
SGD(46/49): loss=8.149924455482725e+95, w0=-3.5452921586923045e+46, w1=-3.5452921586923045e+46
SGD(47/49): loss=2.596522741933192e+98, w0=3.47224714594395

SGD(28/49): loss=3.825126381697725e+85, w0=-7.193783157339854e+41, w1=-7.193783157339854e+41
SGD(29/49): loss=2.0194086759984255e+88, w0=1.8278705014864487e+43, w1=1.8278705014864487e+43
SGD(30/49): loss=1.737899137883676e+91, w0=-6.9029245691550135e+44, w1=-6.9029245691550135e+44
SGD(31/49): loss=8.16950731639057e+94, w0=3.34737143732189e+46, w1=3.34737143732189e+46
SGD(32/49): loss=1.1996050050826984e+98, w0=-1.3292409174790838e+48, w1=-1.3292409174790838e+48
SGD(33/49): loss=1.677242518645547e+101, w0=5.45660155603457e+49, w1=5.45660155603457e+49
SGD(34/49): loss=1.7171370030039418e+104, w0=-1.9628785581079305e+51, w1=-1.9628785581079305e+51
SGD(35/49): loss=1.718637123070282e+107, w0=5.145457897158933e+52, w1=5.145457897158933e+52
SGD(36/49): loss=6.568507575846231e+109, w0=-1.2803110915919904e+54, w1=-1.2803110915919904e+54
SGD(37/49): loss=1.7601262834526292e+113, w0=5.554319106013675e+55, w1=5.554319106013675e+55
SGD(38/49): loss=4.05655559251056e+116, w0=-1.636009469060741e+57,

SGD(20/49): loss=5.434682410779376e+80, w0=-8.98465131315758e+36, w1=-8.98465131315758e+36
SGD(21/49): loss=3.481082356222039e+84, w0=6.774656293216368e+38, w1=6.774656293216368e+38
SGD(22/49): loss=2.130664916316772e+89, w0=-4.254418624698996e+40, w1=-4.254418624698996e+40
SGD(23/49): loss=1.6552962135329663e+92, w0=2.187322797915381e+42, w1=2.187322797915381e+42
SGD(24/49): loss=2.1460286917594522e+95, w0=-1.3693433865414866e+44, w1=-1.3693433865414866e+44
SGD(25/49): loss=1.736645570000328e+99, w0=2.2656432641873088e+46, w1=2.2656432641873088e+46
SGD(26/49): loss=1.048366401245826e+104, w0=-3.1630612792571795e+48, w1=-3.1630612792571795e+48
SGD(27/49): loss=3.027821829369603e+110, w0=6.3704939262978066e+50, w1=6.3704939262978066e+50
SGD(28/49): loss=2.3021553106521263e+112, w0=-4.199609575407422e+52, w1=-4.199609575407422e+52
SGD(29/49): loss=3.3857854680020735e+115, w0=1.6261243497970993e+54, w1=1.6261243497970993e+54
SGD(30/49): loss=7.867157121127145e+118, w0=-2.841021966565264e+

SGD(10/49): loss=8.899421401166933e+68, w0=-2.382216673223332e+32, w1=-2.382216673223332e+32
SGD(11/49): loss=5.627528924208679e+75, w0=2.172786736657963e+35, w1=2.172786736657963e+35
SGD(12/49): loss=5.3282062359341185e+81, w0=-3.436659485487353e+38, w1=-3.436659485487353e+38
SGD(13/49): loss=6.616069501986617e+87, w0=7.870319640725236e+41, w1=7.870319640725236e+41
SGD(14/49): loss=8.674007556384087e+94, w0=-8.442292555294466e+44, w1=-8.442292555294466e+44
SGD(15/49): loss=2.2411175471353205e+100, w0=1.4389505975192346e+48, w1=1.4389505975192346e+48
SGD(16/49): loss=1.9922659780781981e+108, w0=-2.6169070478570094e+51, w1=-2.6169070478570094e+51
SGD(17/49): loss=1.1610245230458045e+113, w0=4.434304753606501e+54, w1=4.434304753606501e+54
SGD(18/49): loss=3.093573929100684e+120, w0=-8.630952096881822e+57, w1=-8.630952096881822e+57
SGD(19/49): loss=3.4230489194733175e+127, w0=1.0635478143378307e+61, w1=1.0635478143378307e+61
SGD(20/49): loss=1.1857710225885281e+133, w0=-2.6181460130898332

  return 1/2*np.mean(e**2)
  ret = umr_sum(arr, axis, dtype, out, keepdims)


SGD(46/49): loss=inf, w0=-2.7234074774162136e+151, w1=-2.7234074774162136e+151
SGD(47/49): loss=inf, w0=3.1083064651059934e+154, w1=3.1083064651059934e+154
SGD(48/49): loss=inf, w0=-2.5682131710367904e+157, w1=-2.5682131710367904e+157
SGD(49/49): loss=inf, w0=2.8594738630285137e+160, w1=2.8594738630285137e+160


Deg: 3   Gam: 0.3
SGD(0/49): loss=0.5, w0=-0.1038399999999995, w1=-0.1038399999999995
SGD(1/49): loss=356917.3365911322, w0=7.6409319232067325, w1=7.6409319232067325
SGD(2/49): loss=76003678889834.55, w0=-57586.07218933175, w1=-57586.07218933175
SGD(3/49): loss=2.242027911885665e+21, w0=1630958231.396353, w1=1630958231.396353
SGD(4/49): loss=3.2099127599974185e+30, w0=-16328026369678.605, w1=-16328026369678.605
SGD(5/49): loss=6.624995691835002e+36, w0=1.3635563298698762e+16, w1=1.3635563298698762e+16
SGD(6/49): loss=1.3431002995358254e+43, w0=-1.2708134966344174e+19, w1=-1.2708134966344174e+19
SGD(7/49): loss=1.2927696793482576e+49, w0=1.7187070733982259e+22, w1=1.718707073398

In [119]:
print('Best degree:', best_deg, '  Best gamma:', best_gamma)

Best degree: 1   Best gamma: 0.2


In [121]:
x_tr, x_te, y_tr, y_te = split_data(new_tx_train, new_y_train, 0.75)
px_tr = build_poly(x_tr, best_deg)
px_te = build_poly(x_te, best_deg)

initial_w = np.zeros(new_tx_train.shape[1], dtype=np.float64)
batch_size = int(np.floor(x_tr.shape[0] / 100))
sgd_loss, sgd_weights = least_squares_SGD(y_tr, x_tr, initial_w, batch_size, 100, best_gamma)

#initial_w = np.zeros(new_tx_train.shape[1], dtype=np.float64)
#x_tr, x_te, y_tr, y_te = split_data(new_tx_train, new_y_train, 0.75)

#initial_w = np.zeros(x_tr.shape[1], dtype=np.float64)
#sgd_loss, sgd_weights = least_squares_SGD(y_tr, x_tr, initial_w, batch_size, 100, best_gamma)

shapes: (1875,) (1875, 31) (32,)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 32 is different from 31)

In [116]:
y_validation = predict_labels(sgd_weights, x_te)
accuracy = sum(y_validation == y_te)/len(y_te)
print('Accuracy for GD:', accuracy)

Accuracy for GD: 0.738848


In [112]:
##GD with k_fold but All-NaN slice encountered
def train_test_crossed(y, x, k_fold, k, seed):
    """build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = np.array([indices[k * interval: (k + 1) * interval] for k in range(k_fold)])
    x_perm = x[k_indices]
    y_perm = y[k_indices]
    y_tr = np.delete(y_perm, k, 0).flatten()
    y_te = y_perm[k]
    shape = (int(y.shape[0]/k_fold), int(x.shape[1]))
    x_tr_tmp = np.delete(x_perm, k, 0)
    x_tr = x_tr_tmp.reshape(-1, x_tr_tmp.shape[-1])
    x_te = x_perm[k]
    return y_tr, y_te, x_tr, x_te


'''def find_best_param_GD(gammas, degrees, k_fold, seed, max_iters):
    best_losses = []
    best_gammas = []
    best_ws = []
    for deg in degrees:
        lw_gam_deg = []
        for gam in gammas:
            lw_tmp = []
            print('\n\nDeg:', deg, '  Gam:', gam)
            for k in range(k_fold):
                #k-fold train
                y_tr, y_te, x_tr, x_te = train_test_crossed(ty, tx, k_fold, k, seed)
                px_tr = build_poly(x_tr, deg)
                px_te = build_poly(x_te, deg)
                initial_w = np.zeros(px_tr.shape[1], dtype=np.float64)
                loss, w = least_squares_GD(y_tr, px_tr, initial_w, max_iters, gam)
                lw_tmp.append(np.array([loss, w]))
            lw_gam_deg.append(np.mean(lw_tmp, axis=0))
            
        print('AAA: ', np.array(lw_gam_deg))
        index = np.nanargmin(np.array(lw_gam_deg)[:, 0])
        best_gam_for_deg = gammas[index]
        best_loss_for_deg = lw_gam_deg[index][0]
        best_w_for_deg = lw_gam_deg[index][1]
        best_gammas.append(best_gam_for_deg)
        best_losses.append(best_loss_for_deg)
        best_ws.append(best_w_for_deg)
    best_deg = degrees[np.nanargmin(best_losses)]
    best_gamma = np.nanmin(best_gammas)
    best_w =  np.nanmin(best_ws)
    
    return best_deg, best_gamma, best_w

best_deg, best_gamma, best_w = find_best_param_GD(
    [0.0001, 0.01, 0.1, 0.7], range(2,7), 4, 1, 50)'''

"def find_best_param_GD(gammas, degrees, k_fold, seed, max_iters):\n    best_losses = []\n    best_gammas = []\n    best_ws = []\n    for deg in degrees:\n        lw_gam_deg = []\n        for gam in gammas:\n            lw_tmp = []\n            print('\n\nDeg:', deg, '  Gam:', gam)\n            for k in range(k_fold):\n                #k-fold train\n                y_tr, y_te, x_tr, x_te = train_test_crossed(ty, tx, k_fold, k, seed)\n                px_tr = build_poly(x_tr, deg)\n                px_te = build_poly(x_te, deg)\n                initial_w = np.zeros(px_tr.shape[1], dtype=np.float64)\n                loss, w = least_squares_GD(y_tr, px_tr, initial_w, max_iters, gam)\n                lw_tmp.append(np.array([loss, w]))\n            lw_gam_deg.append(np.mean(lw_tmp, axis=0))\n            \n        print('AAA: ', np.array(lw_gam_deg))\n        index = np.nanargmin(np.array(lw_gam_deg)[:, 0])\n        best_gam_for_deg = gammas[index]\n        best_loss_for_deg = lw_gam_deg[index

## Generate predictions and save ouput in csv format for submission:

In [None]:
DATA_TEST_PATH = '../data/test.csv'
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [None]:
OUTPUT_PATH = '../data/submission.csv'
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)