# Method Comparison

In [1]:
# all imports
import numpy as np
import pandas as pd
import pandas_profiling
%matplotlib inline
import matplotlib.pyplot as plt

from proj1_helpers import load_csv_data, predict_labels
from implementations import split_data, least_square, least_squares_GD, least_squares_SGD, build_poly, build_poly_test, ridge_regression, logistic_regression

In [2]:
def predict_labels(weights, data):
    """Generates class predictions given weights, and a test data matrix"""
    y_pred = np.dot(data, weights)
    y_pred[np.where(y_pred <= 0)] = -1
    y_pred[np.where(y_pred > 0)] = 1
    
    return y_pred

def print_prediction(y, x, w):
    y_pred = predict_labels(w, x)

    good = np.sum(y_pred == y)
    wrong = len(y_pred) - good

    print(f"Good prediction: {good}/{len(y)} ({100*good/len(y)}%)")
    print(f"Wrong prediction: {wrong}/{len(y)} ({100*wrong/len(y)}%)")

    return good, wrong

def prediction_log(y, x, w):
    def sigmoid(t):
        """apply sigmoid function on t."""
        result = t
        result[t>60] = 1
        result[t<-60] = 0
        result[np.abs(t) < 60] = 1/(1+np.exp(result[np.abs(t) < 60]))

        return result
    #sigmoid = lambda t: 1/(1+np.exp(-t))

    y_pred = sigmoid(np.dot(x, w))
    y_pred[y_pred <= 0.5] = 1
    y_pred[y_pred > 0.5] = 0
    
    good = np.sum(y_pred == y)
    wrong = len(y_pred)-good   
            
    print(f"Good prediction: {good}/{len(y)} ({100*good/len(y)}%)")
    print(f"Wrong prediction: {wrong}/{len(y)} ({100*wrong/len(y)}%)")

    return good, wrong

In [3]:
y, tX, ids = load_csv_data('data/train.csv')

In [4]:
ratio = 0.8
x_train, y_train, x_test, y_test = split_data(tX, y, ratio, random_enabled=False)

## Least Squares

Least squares regression using a normal equations.

In [5]:
loss_ls, w_ls = least_square(y_train, x_train)
print(f"Loss = {loss_ls}")

Loss = 0.8237598026690033


In [6]:
print_prediction(y=y_test, x=x_test, w=w_ls)

Good prediction: 37093/50000 (74.186%)
Wrong prediction: 12907/50000 (25.814%)


(37093, 12907)

## Least Squares GD

Linear regression using gradient descent.

In [7]:
max_iters = 300
gamma = 0.1
initial_w = np.ones(tX.shape[1])

loss_lsgd, w_lsgd = least_squares_GD(y_train, x_train, initial_w, max_iters, gamma)
print(f"Loss = {loss_lsgd}")

  Iter=0, loss=6300.698795844998, diff=6300.698795844998
  Iter=100, loss=84.23727770767559, diff=-6216.461518137323
  Iter=200, loss=0.8057982278360653, diff=-83.43147947983952
  Iter=211, loss=0.7582753709415647, diff=-0.04752285689450053
Loss = 0.7582753709415647


In [8]:
print_prediction(y=y_test, x=x_test, w=w_lsgd)

Good prediction: 31405/50000 (62.81%)
Wrong prediction: 18595/50000 (37.19%)


(31405, 18595)

## Least Squares SGD

Linear regression using stochastic gradient descent.

In [9]:
max_iters = 300
gamma = 0.1
initial_w = np.ones(tX.shape[1])

loss_lssgd, w_lssgd = least_squares_SGD(y_train, x_train, initial_w, max_iters, gamma)
print(f"Loss = {loss_lssgd}")

  Iter=0, loss=6300.698795844998, diff=6300.698795844998
  Iter=100, loss=202.69265096969772, diff=-6098.0061448753
  Iter=200, loss=6.350545003478505, diff=-196.3421059662192
  Iter=264, loss=0.7582753304011594, diff=-5.592269673077346
Loss = 0.7582753304011594


In [10]:
w_lssgd

array([0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542,
       0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542,
       0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542,
       0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542,
       0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542,
       0.00010542, 0.00010542, 0.00010542, 0.00010542, 0.00010542])

In [11]:
prediction_log(y=y_test, x=x_test, w=loss_lssgd)

Good prediction: 0/50000 (0.0%)
Wrong prediction: 50000/50000 (100.0%)




(0, 50000)

## Ridge Regression

Ridge regression using normal equations

In [12]:
#x_train_poly = build_poly(x_train, 8, True, True,True)
#loss_rr, w_rr =  = ridge_regression(y_train, x_train_poly, 0.0001)
#loss_j0, w_pred_jet0

In [13]:
loss_rr, w_rr = ridge_regression(y_train, x_train, 0.0001)
loss_rr, w_rr

(0.3392913481234161,
 array([ 7.95640140e-05, -7.30675186e-03, -5.98167928e-03, -4.74061106e-04,
        -1.64878374e-02,  4.69506730e-04, -2.56830765e-02,  3.21131490e-01,
         2.17751032e-04,  4.29827301e-03, -2.22919788e-01,  9.50531915e-02,
         6.00933663e-02,  3.82670419e-03,  4.70605901e-04, -7.40111252e-04,
         8.75606157e-03, -6.18134956e-04,  2.06202775e-03,  3.73446452e-03,
        -1.52185508e-05, -5.84149473e-04, -3.25102896e-01, -1.26543580e-03,
         7.17071976e-04,  9.92193723e-04, -1.29782335e-03, -5.36689228e-03,
        -1.12833292e-02, -4.07460235e-03]))

In [14]:
print_prediction(y=y_test, x=x_test, w=w_rr)

Good prediction: 37074/50000 (74.148%)
Wrong prediction: 12926/50000 (25.852%)


(37074, 12926)

## Logistic Regression

Logistic regression using gradient descent or SGD.

In [26]:
max_iters = 2000
gamma = 1e-5
lamb = 1
initial_w = np.ones(x_train.shape[1])

loss_lr, w_lr = logistic_regression(y_train, x_train, initial_w, max_iters, gamma)
loss_lr
#w_star, min_loss = get_best_model(losses, ws)
#loss_lr, w_lr = logistic_regression(y_train, tX_train, gamma, max_iters, False)
#w_star, min_loss = get_best_model(losses, ws)

  Iter=0, loss=-395568887.07290137, diff=-395568887.07290137
  Iter=100, loss=-58628137731824.266, diff=-58627742162937.195
  Iter=200, loss=-117256014124142.34, diff=-58627876392318.08
  Iter=300, loss=-175883890516460.5, diff=-58627876392318.16
  Iter=400, loss=-234511766908778.66, diff=-58627876392318.16
  Iter=500, loss=-293139644984082.25, diff=-58627878075303.59
  Iter=600, loss=-351767525141671.2, diff=-58627880157588.94
  Iter=700, loss=-410395405299260.0, diff=-58627880157588.81
  Iter=800, loss=-469023285456848.56, diff=-58627880157588.56
  Iter=900, loss=-527651165614436.7, diff=-58627880157588.125
  Iter=1000, loss=-586279045772024.6, diff=-58627880157587.94
  Iter=1100, loss=-644906925929612.6, diff=-58627880157588.0
  Iter=1200, loss=-703534806087200.8, diff=-58627880157588.125
  Iter=1300, loss=-762162686244788.8, diff=-58627880157588.0
  Iter=1400, loss=-820790566402377.2, diff=-58627880157588.5
  Iter=1500, loss=-879418446559966.0, diff=-58627880157588.75
  Iter=1600, 

-1171971568546331.2

In [29]:
print_prediction(y_test, x_train, w_lr)

Good prediction: 10661/14758 (72.238786%)
Wrong prediction: 4097/14758 (27.761214%)
