## Learning from Data Week 6

### Regularization with Weight Decay

In [2]:
from __future__ import division
%matplotlib inline
import pandas as pd
import numpy as np

In [134]:
train = pd.read_table("in.dta", sep=" +", header=None, engine='python')
train.columns = ["x1", "x2", "y"]
test = pd.read_table("out.dta", sep=" +", header=None, engine='python')
test.columns = ["x1", "x2", "y"]

In [49]:
train.head()

Unnamed: 0,x1,x2,y
0,-0.77947,0.838221,1
1,0.155635,0.895377,1
2,-0.059908,-0.71778,1
3,0.207596,0.758933,1
4,-0.195983,-0.375487,-1


In [135]:
for df in [train, test]:
    df['one'] = 1
    df['x1^2'] = df['x1']**2
    df['x2^2'] = df['x2']**2
    df['x1x2'] = df['x1']*df['x2']
    df['|x1-x2|'] = np.abs(df['x1'] - df['x2'])
    df['|x1+x2|'] = np.abs(df['x1'] + df['x2'])

In [136]:
train = train.reindex(columns = ['one', 'x1', 'x2', 'x1^2', 'x2^2', 'x1x2', '|x1-x2|', '|x1+x2|', 'y'])
test = test.reindex(columns = ['one', 'x1', 'x2', 'x1^2', 'x2^2', 'x1x2', '|x1-x2|', '|x1+x2|', 'y'])

In [137]:
train.head()

Unnamed: 0,one,x1,x2,x1^2,x2^2,x1x2,|x1-x2|,|x1+x2|,y
0,1,-0.77947,0.838221,0.607574,0.702615,-0.653369,1.617692,0.058751,1
1,1,0.155635,0.895377,0.024222,0.801701,0.139352,0.739743,1.051012,1
2,1,-0.059908,-0.71778,0.003589,0.515208,0.043001,0.657872,0.777688,1
3,1,0.207596,0.758933,0.043096,0.57598,0.157552,0.551337,0.96653,1
4,1,-0.195983,-0.375487,0.038409,0.140991,0.073589,0.179504,0.57147,-1


In [105]:
def lin_reg(points, labels):
    """
    Input: training points and labels
    Output: in-sample classification error
    """
    # Pseudo-inverse of X times y
    return np.dot(np.linalg.pinv(points), labels)

w_lin = lin_reg(train.ix[:, :-1], train['y'])
w_lin

array([-1.64706706, -0.14505927,  0.10154121, -2.03296844, -1.82804373,
        2.48152945,  4.15893861,  0.31651714])

In [88]:
def classification_error(w, points, labels):
    y_est = np.sign(np.dot(points, w))
    return np.mean(y_est != labels)

In [89]:
# In-sample classification error
print (classification_error(w_lin, train.ix[:, :-1], train['y']))

# Out-of-sample classification error
print (classification_error(w_lin, test.ix[:, :-1], test['y']))

0.0285714285714
0.084


In [107]:
# With regularization
def LR_reg(points, labels, k):
    lambda_param = 10**k
    A = np.linalg.inv(np.dot(points.T, points) + np.eye(points.shape[1])*lambda_param)
    B = np.dot(points.T, labels)
    return np.dot(A,B)

w_small_k = LR_reg(train.ix[:, :-1], train['y'], -3)
w_small_k

array([-1.6432827 , -0.14333537,  0.10144329, -2.02456533, -1.81721505,
        2.45550685,  4.14009201,  0.31960135])

In [108]:
# In-sample classification error
print (classification_error(w_small_k, train.ix[:, :-1], train['y']))

# Out-of-sample classification error
print (classification_error(w_small_k, test.ix[:, :-1], test['y']))

0.0285714285714
0.08


In [109]:
# With large k
w_large_k = LR_reg(train.ix[:, :-1], train['y'], 3)
print w_large_k

[ 0.00435688 -0.00134416  0.0024939   0.00328695  0.00484127 -0.00862023
  0.01786706 -0.00490192]


In [110]:
# In-sample classification error
print (classification_error(w_large_k, train.ix[:, :-1], train['y']))

# Out-of-sample classification error
print (classification_error(w_large_k, test.ix[:, :-1], test['y']))

0.371428571429
0.436


In [143]:
# Find minimum E_out
def min_E_out(ks):
    errors = []
    for k in ks:
        w_est = LR_reg(train.ix[:, :-1], train['y'], k)
        errors.append(classification_error(w_est, test.ix[:, :-1], test['y']))    
    min_k = np.argmin(errors)
    print (errors)
    print ("The minimum error is {}, with k={}.").format(errors[min_k], ks[min_k])

min_E_out([2, 1, 0, -1, -2])

[0.22800000000000001, 0.124, 0.091999999999999998, 0.056000000000000001, 0.084000000000000005]
The minimum error is 0.056, with k=-1.


In [144]:
min_E_out(range(-10,11))

[0.084000000000000005, 0.084000000000000005, 0.084000000000000005, 0.084000000000000005, 0.084000000000000005, 0.084000000000000005, 0.084000000000000005, 0.080000000000000002, 0.084000000000000005, 0.056000000000000001, 0.091999999999999998, 0.124, 0.22800000000000001, 0.436, 0.45200000000000001, 0.45600000000000002, 0.45600000000000002, 0.45600000000000002, 0.45600000000000002, 0.45600000000000002, 0.45600000000000002]
The minimum error is 0.056, with k=-1.
