### CS 156a, HW2, Problems 5-10
**Author: Liting Xiao**

In [1]:
import numpy as np

In [2]:
# create target func y = mx + b
def create_target_func():
    p1 = np.random.rand(2, 1) * 2 - 1
    p2 = np.random.rand(2, 1) * 2 - 1
    m = (p2[1] - p1[1]) / (p2[0] - p1[0])
    b = p2[1] - m * p2[0]
    return m, b

In [3]:
def create_data_set(N, m, b):
    X = np.random.rand(2, N) * 2 - 1
    # defining point x_n above the line having y_n = 1
    # while below the line having y_n = -1
    Y = np.sign(X[1, :] - m * X[0, :] - b)
    return X, Y

In [4]:
def linear_regression(X, Y):
    X = np.vstack((np.ones(np.size(X, 1)), X))
    pinv_X = np.matmul(np.linalg.pinv(np.matmul(X, X.T)), X)
    return np.matmul(pinv_X, Y)

In [5]:
def eval_err(w, X, Y):
    xcheck = np.vstack((np.ones(np.size(X, 1)), X))
    ycheck = np.sign(np.sum(w * xcheck.T, axis=1))
    frac = sum(np.not_equal(Y, ycheck)) / np.size(X, 1)
    return frac

**Problems 5-6**

In [6]:
N_ein, N_eout = 100, 1000
Ein_list, Eout_list = [], []
N_exp = 1000

for i in range(N_exp):
    m_true, b_true = create_target_func()
    
    # linear regression and find in sample error
    X_ein, Y_ein = create_data_set(N_ein, m_true, b_true)
    init_w = linear_regression(X_ein, Y_ein)
    Ein_list.append(eval_err(init_w, X_ein, Y_ein))
    
    # find out of sample error
    X_eout, Y_eout = create_data_set(N_eout, m_true, b_true)
    Eout_list.append(eval_err(init_w, X_eout, Y_eout)) 
    
print('The average initial E_in is {}'.format(np.mean(Ein_list)))
print('The average initial E_out is {}'.format(np.mean(Eout_list)))

The average initial E_in is 0.04033
The average initial E_out is 0.049242


**Problem 7**

In [7]:
# create PLA for training
def pla(w, X, Y):
    # shuffle the data set
    shuffle_idx = np.random.permutation(len(Y))
    X, Y = X[:, shuffle_idx], Y[shuffle_idx]
        
    # add x0 = 1 term
    xcheck = np.vstack((np.ones(np.size(X, 1)), X)).T
    ycheck = np.sign(np.sum(w * xcheck, axis=1))
        
    # train
    conv = 0
    while sum(np.not_equal(Y, ycheck)) != 0:
        idx = np.not_equal(Y, ycheck)
        x = xcheck[idx][np.random.randint(sum(idx))]
        y = Y[idx][np.random.randint(sum(idx))]
                    
        w += y * x
        ycheck = np.sign(np.sum(w * xcheck, axis=1))
        conv += 1

    return conv

In [8]:
# training
# num of data points
N_pt = 10
conv_list = []

for i in range(N_exp):
    # create target function and data set
    m, b = create_target_func()
    X, Y = create_data_set(N_pt, m, b)

    # linear regression to get initial weight
    init_w = linear_regression(X, Y)

    # PLA
    conv_list.append(pla(init_w, X, Y))
    
print('Average number of iterations to converge for '
      '{} training points: {}'.format(N_pt, np.mean(conv_list)))

Average number of iterations to converge for 10 training points: 4.967


**Problem 8-10**

In [9]:
def create_data_set_nonlinear(N):
    X = np.random.rand(2, N) * 2 - 1
    Y = np.sign(np.power(X[0, :], 2) + np.power(X[1, :], 2) - 0.6)
    # flip 10% of the data set's Y to add in noise
    idx = np.random.permutation(len(Y))[:int(0.1*N)]
    Y[idx] *= -1
    return X, Y

In [10]:
def nonlinear_transformation(X):
    X_tr = np.vstack((X, X[0, :]*X[1, :]))
    X_tr = np.vstack((X_tr, np.power(X[0, :], 2)))
    X_tr = np.vstack((X_tr, np.power(X[1, :], 2)))
    return X_tr

In [11]:
N_pt = 1000
Ein_no_tr, w_tr, Eout_test = [], [], []
for i in range(N_exp):
    X, Y = create_data_set_nonlinear(N_pt)

    # linear regression without nonlinear transformation
    w_no_tr = linear_regression(X, Y)
    Ein_no_tr.append(eval_err(w_no_tr, X, Y))
    
    # nonlinear transformation to get new features
    X_tr = nonlinear_transformation(X)
    
    # linear regression with nonlinear transformation
    w_tr.append(linear_regression(X_tr, Y))
    
    # use a test set to get out of sample error
    X_test, Y_test = create_data_set_nonlinear(N_pt)
    X_test_tr = nonlinear_transformation(X_test)
    w_test = linear_regression(X_test_tr, Y_test)
    Eout_test.append(eval_err(w_test, X_test_tr, Y_test))
    
print('The average E_in without '
      'nonlinear transformation is {}\n'.format(np.mean(Ein_no_tr)))

best_w = np.mean(w_tr, axis=0)
print('The hypothesis found after nonlinear transformation is: \n'
      'g = sign({} + {}*x1 + {}*x2 + {}*x1*x2 + {}*x1^2 + {}*x2^2) \n'
      .format(best_w[0], best_w[1], best_w[2],
              best_w[3], best_w[4], best_w[5]))

print('The average E_out on test sets after '
      'nonlinear transformation is {}\n'.format(np.mean(Eout_test)))

The average E_in without nonlinear transformation is 0.5049859999999999

The hypothesis found after nonlinear transformation is: 
g = sign(-0.9926073428076996 + 0.0005753865673439148*x1 + -0.003116924795875*x2 + -0.0019579603156029703*x1*x2 + 1.5563611193118378*x1^2 + 1.5596762408388483*x2^2) 

The average E_out on test sets after nonlinear transformation is 0.12390000000000001

