In [1]:
import numpy as np
import pandas as pd
import scipy.linalg as lin

In [2]:
#load data
def loaddata(filename):
    data = pd.read_csv(filename, sep='\s+', header=None)
    data = data.as_matrix()
    row, col = data.shape
    X = data[:, 0:col-1]#decision stump does not need x0
    Y = data[:, col-1:col]
    return X, Y

In [3]:
# decison stump
def decision_stump(X, Y, thres, U): # thres can be defined on all cols, defining outside stump can save cost, U:weight
    row, col = X.shape
    r, c = thres.shape; besterr = 1
    btheta = 0; bs = 0; index = 0
    for i in range(col):
        Yhat1 = np.sign(np.tile(X[:, i:i+1], (1, r)).T-thres[:, i:i+1]).T
        err1 = (Yhat1 != Y).T.dot(U) # Should return r*1 results, the error for each thres is the sum of error over all X within thres
        err2 = (-1*Yhat1 != Y).T.dot(U)# reason for .Ts: shape of Y is m * 1
        if np.min(err1) < np.min(err2):
            s = 1
        else:
            s = -1
        if s == 1 and np.min(err1) < besterr:
            besterr = np.min(err1); bs =1
            index = i; btheta = thres[np.argmin(err1),i]
        if s == -1 and np.min(err2) < besterr:
            besterr = np.min(err2); bs = -1
            index = i; btheta = thres[np.argmin(err2), i]
    return besterr, btheta, bs, index # return one best stump on one of the features.

In [5]:
def ada_boost(X, Y, T):
    row, col = X.shape
    U = np.ones((row, 1))/row
    Xsort = np.sort(X, 0)
    thres = (np.r_[Xsort[0:1,:]-0.1, Xsort[0:row-1,:]] + Xsort)/2
    theta = np.zeros((T,)); s = np.zeros((T,))
    index = np.zeros((T,)).astype(int); alpha = np.zeros((T,))
    err = np.zeros((T,)) # each iteration has a theta, index, alpha, err
    for i in range(T):
        err[i], theta[i], s[i], index[i] = decision_stump(X, Y, thres, U)
        yhat = s[i]*np.sign(X[:, index[i]:index[i]+1] - theta[i])
        delta = np.sqrt((1-err[i])/err[i])
        U[yhat == Y] /= delta
        U[yhat != Y] *= delta
        alpha[i] = np.log(delta)
        U /= np.sum(U)
    return theta, index, s, alpha

In [6]:
# generate prediction
def predict(X, theta, index, s, alpha):
    row, col = X.shape
    num = len(theta)
    ytemp = np.tile(s.reshape((1, num)),(row,1))*np.sign(X[:, index]-theta.reshape((1,num)))#selecting multiple cols in array
    yhat = np.sign(ytemp.dot(alpha.reshape(num,1)))
    return yhat

In [7]:
#load data
X, Y = loaddata('hw2_adaboost_train.dat')
Xtest, Ytest = loaddata('hw2_adaboost_test.dat')
row, col = X.shape
r, c = Xtest.shape


In [8]:
#Q12
theta, index, s, alpha = ada_boost(X, Y, 1)
Ypred = predict(X, theta, index, s, alpha)
print np.sum(Ypred != Y)
print 'Ein(g1)：', float(np.sum(Ypred !=Y))/row

24
Ein(g1)： 0.24


In [9]:
# Q13
theta, index, s, alpha = ada_boost(X, Y, 300)
Ypred = predict(X, theta, index, s, alpha)
print np.sum(Ypred != Y)
print 'Ein(G)：', float(np.sum(Ypred!=Y))/row

0
Ein(G)： 0.0


In [49]:
# Q14
theta, index, s, alpha = ada_boost(X, Y, 1)

(100L, 2L)
(100L, 2L)
('sum(U): ', 0.85416626016250508)


In [50]:
# Q16
theta, index, s, alpha = ada_boost(X, Y, 300)

(100L, 2L)
(100L, 2L)
('sum(U): ', 0.98835570147308793)


In [59]:
# Q17
theta, index, s, alpha = ada_boost(X, Y, 1)
Ypred = predict(Xtest, theta, index, s, alpha)
print 'Eout(g1)：', float(np.sum(Ypred!=Ytest))/r

Eout(g1)： 0.29


In [60]:
# Q18
theta, index, s, alpha = ada_boost(X, Y, 300)
Ypred = predict(Xtest, theta, index, s, alpha)
print 'Eout(G):', float(np.sum(Ypred!=Ytest))/r

Eout(G): 0.132


In [17]:
# Q19 and Q20
# define kernel
def matK(X1, X2, gamma):
    row, col = X1.shape
    r, c = X2.shape
    K = np.zeros((row, r)) # shape of kernel m*m
    for i in range(r):
        K[:, i] = np.sum((X1-X2[i:i+1, :])**2, 1)
    K = np.exp(-gamma*K)
    return K

In [64]:
# define kernel
def matK(X1, X2, gamma):
    row, col = X1.shape
    r, c = X2.shape
    K = np.zeros((row, r))
    for i in range(r):
        K[:, i] = np.sum((X1-X2[i:i+1, :])**2,1)
    K = np.exp(-gamma*K)
    return K

In [65]:
#load data
X, Y = loaddata('hw2_lssvm_all.dat')
Xtrain = X[0:400, :]; Ytrain = Y[0:400, :]
Xtest = X[400:, :]; Ytest = Y[400:, :]
row, col = Xtest.shape

In [66]:
#Q19 and Q20 solution:
gamma = [32, 2, 0.125]
lamb = [0.001, 1, 1000]
Ein = np.zeros((len(gamma), len(lamb)))
Eout = np.zeros((len(gamma), len(lamb)))
for i in range(len(gamma)):
    K = matK(Xtrain, Xtrain, gamma[i])
    Ktest = matK(Xtrain, Xtest, gamma[i])
    for j in range(len(lamb)):
        beta = lin.pinv(lamb[j]*np.eye(400)+K).dot(Ytrain)
        yhat = np.sign(K.dot(beta))
        Ein[i, j] = float(np.sum(yhat != Ytrain))/400
        yhat2 = np.sign(Ktest.T.dot(beta))
        Eout[i, j] = float(np.sum(yhat2 != Ytest))/row
print 'minimum Ein:',np.min(Ein)
print 'minimum Eout:', np.min(Eout)

minimum Ein: 0.0
minimum Eout: 0.39
