### CS 156a, Final, P7-10
Author: Liting Xiao

In [1]:
import numpy as np
from sklearn.linear_model import Ridge

In [2]:
# read in train/test data sets
train_x = np.loadtxt('features.train', usecols=(1,2), unpack=True)
train_y = np.loadtxt('features.train', usecols=0)
test_x = np.loadtxt('features.test', usecols=(1,2), unpack=True)
test_y = np.loadtxt('features.test', usecols=0)

In [3]:
def reg_lin(train_x, dtrain_y, test_x,
               dtest_y, lamb=1.):
    # find w_reg
    reg_mat = np.eye(len(dtrain_y)) * lamb
    pinv_Z_reg = np.dot(np.matrix(np.dot(train_x, train_x.T)
                                  +reg_mat).getI(), train_x)
    w_reg = np.dot(pinv_Z_reg.T, dtrain_y)

    # make predictions
    pred_train_y = np.sign(np.dot(train_x, w_reg.T))
    pred_train_y = np.array(pred_train_y).flatten()
    pred_test_y = np.sign(np.dot(test_x, w_reg.T))
    pred_test_y = np.array(pred_test_y).flatten()
    
    # in/out of sample error
    E_in = np.not_equal(pred_train_y, dtrain_y).sum() / len(dtrain_y)
    E_out = np.not_equal(pred_test_y, dtest_y).sum() / len(dtest_y)
    return E_in, E_out

In [4]:
# z = x = (1, x1, x2)
train_x_trans = np.vstack(([1]*train_x.shape[1],
                           train_x[0], train_x[1])).T
test_x_trans = np.vstack(([1]*test_x.shape[1],
                          test_x[0], test_x[1])).T

# z = (1, x1, x2, x1x2, x1^2, x2^2)
train_x_trans2 = np.vstack((train_x_trans.T,
    train_x_trans.T[1]*train_x_trans.T[2],
    train_x_trans.T[1]**2, train_x_trans.T[2]**2)).T
test_x_trans2 = np.vstack((test_x_trans.T,
    test_x_trans.T[1]*test_x_trans.T[2],
    test_x_trans.T[1]**2, test_x_trans.T[2]**2)).T

#### P7-9

In [5]:
for d in range(0, 10):
    # create one_vs_all labels
    dtrain_y = np.ones_like(train_y)
    dtrain_y[train_y!=d] = -1
    dtest_y = np.ones_like(test_y)
    dtest_y[test_y!=d] = -1
    
    E_in1, E_out1 = reg_lin(train_x_trans, dtrain_y,
                            test_x_trans, dtest_y)
    E_in2, E_out2 = reg_lin(train_x_trans2, dtrain_y,
                            test_x_trans2, dtest_y)
    print('"{} vs all":'.format(d))
    print('    No transformation: E_in = {:.3f}; E_out = {:.3f};'
          .format(E_in1, E_out1))
    print('    With transformation: E_in = {:.3f}; E_out = {:.3f};'
          .format(E_in2, E_out2))
    print('    Transformation improvement: {:.3f}\n'
          .format((E_out1-E_out2)/E_out1))

"0 vs all":
    No transformation: E_in = 0.109; E_out = 0.115;
    With transformation: E_in = 0.102; E_out = 0.107;
    Transformation improvement: 0.074

"1 vs all":
    No transformation: E_in = 0.015; E_out = 0.022;
    With transformation: E_in = 0.012; E_out = 0.022;
    Transformation improvement: 0.022

"2 vs all":
    No transformation: E_in = 0.100; E_out = 0.099;
    With transformation: E_in = 0.100; E_out = 0.099;
    Transformation improvement: 0.000

"3 vs all":
    No transformation: E_in = 0.090; E_out = 0.083;
    With transformation: E_in = 0.090; E_out = 0.083;
    Transformation improvement: 0.000

"4 vs all":
    No transformation: E_in = 0.089; E_out = 0.100;
    With transformation: E_in = 0.089; E_out = 0.100;
    Transformation improvement: 0.000

"5 vs all":
    No transformation: E_in = 0.076; E_out = 0.080;
    With transformation: E_in = 0.076; E_out = 0.079;
    Transformation improvement: 0.006

"6 vs all":
    No transformation: E_in = 0.091; E_out = 0

#### P10

In [6]:
# create 1 vs. 5 labels
def data_for_1v1(x, y, digit1, digit2):
    new_y = np.ones_like(y)
    new_y[y==digit2] = -1
    new_y = new_y[(y==digit1) | (y==digit2)]
    new_x = x[(y==digit1) | (y==digit2)]
    return new_x, new_y

train_x_1v5, train_y_1v5 = data_for_1v1(train_x_trans2, train_y, 1, 5)
test_x_1v5, test_y_1v5 = data_for_1v1(test_x_trans2, test_y, 1, 5)

E_in1, E_out1 = reg_lin(train_x_1v5, train_y_1v5,
                       test_x_1v5, test_y_1v5, lamb=1.)
E_in2, E_out2 = reg_lin(train_x_1v5, train_y_1v5,
                        test_x_1v5, test_y_1v5, lamb=0.01)
print('"1 vs. 5":')
print('    Lambda = 1: E_in = {:.3f}; E_out = {:.3f}'
          .format(E_in1, E_out1))
print('    Lambda = 0.01: E_in = {:.3f}; E_out = {:.3f}'
          .format(E_in2, E_out2))

"1 vs. 5":
    Lambda = 1: E_in = 0.005; E_out = 0.026
    Lambda = 0.01: E_in = 0.004; E_out = 0.028
