In [1]:
import numpy as np

In [2]:
def loadSimpData():
    dataMat = np.matrix([[1 ,2.1],
                         [2 ,1.1],
                         [1.3, 1],
                         [1, 1],
                         [2, 1]
                        ])   
    classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
    return dataMat,classLabels

In [3]:
dataMat,classLabels = loadSimpData()

In [13]:
def stumpClassify(dataMatrix,dimen,threshVal,threshIneq):
    retArray = np.ones((np.shape(dataMatrix)[0],1))
    if threshIneq =='lt':
        retArray[dataMatrix[:,dimen]<=threshVal] = -1.0
    else:
        retArray[dataMatrix[:,dimen]>=threshVal] = -1.0
    return retArray

In [14]:
def buildStump(dataArr, classLabels, D):
    dataMatrix = np.mat(dataArr)
    labelMat = np.mat(classLabels).T
    m,n = np.shape(dataMatrix)
    numstep = 10
    bestStump = {}
    bestClassEst = np.mat(np.zeros((m,1)))
    minError = np.inf
    for i in range(n):
        rangeMin = dataMatrix[:,i].min()
        rangeMax = dataMatrix[:,i].max()
        stepSize = (rangeMax - rangeMin)/numstep
        for j in range(-1,int(numstep)+1):
            for inequal in ['lt','gt']:
                threshVal = (rangeMin+float(j)*stepSize)
                predictedVals = stumpClassify(dataMatrix,i,threshVal,inequal)
                errArr = np.mat(np.ones((m,1)))
                errArr[predictedVals == labelMat] = 0
                weightedError = D.T*errArr
                print("split:dim%d,thresh %.2f,thresh ineqal:%s the weighted error is %.3f"%(1,threshVal,inequal,weightedError))
                if weightedError<minError:
                    minError = weightedError
                    bestClassEst = predictedVals.copy()
                    bestStump['dim'] = i
                    bestStump['thresh'] = threshVal
                    bestStump['ineq'] = inequal
    return bestStump,minError,bestClassEst

In [17]:
def adaBoostTrainDS(dataArr,classLabels,numIt=40):
    weakClassArr = []
    m = np.shape(dataArr)[0]
    D = np.mat(np.ones((m,1))/m)
    aggClassEst = np.mat(np.zeros((m,1)))
    for i in range(numIt):
        bestStump,error,classEst = buildStump(dataArr,classLabels,D)
        print("D:",D.T)
        alpha = float(0.5*np.log((1-error)/max(error,1e-16)))
        bestStump['alpha'] = alpha
        weakClassArr.append(bestStump)
        print("classEst:",classEst.T)
        expon = np.multiply(-1*alpha*np.mat(classLabels).T,classEst)
        D = np.multiply(D,np.exp(expon))
        D = D/D.sum()
        aggClassEst += alpha*classEst
        print("aggClassEst:",aggClassEst.T)
        aggErrors = np.multiply(np.sign(aggClassEst)!=np.mat(classLabels).T,np.ones((m,1)))
        errorRate = aggErrors.sum()/m
        print("total error:",errorRate,"\n")
        if errorRate == 0:
            break
    return weakClassArr

In [18]:
classifierArray =adaBoostTrainDS(dataMat,classLabels,9)

split:dim1,thresh 0.90,thresh ineqal:lt the weighted error is 0.400
split:dim1,thresh 0.90,thresh ineqal:gt the weighted error is 0.600
split:dim1,thresh 1.00,thresh ineqal:lt the weighted error is 0.400
split:dim1,thresh 1.00,thresh ineqal:gt the weighted error is 0.600
split:dim1,thresh 1.10,thresh ineqal:lt the weighted error is 0.400
split:dim1,thresh 1.10,thresh ineqal:gt the weighted error is 0.600
split:dim1,thresh 1.20,thresh ineqal:lt the weighted error is 0.400
split:dim1,thresh 1.20,thresh ineqal:gt the weighted error is 0.600
split:dim1,thresh 1.30,thresh ineqal:lt the weighted error is 0.200
split:dim1,thresh 1.30,thresh ineqal:gt the weighted error is 0.600
split:dim1,thresh 1.40,thresh ineqal:lt the weighted error is 0.200
split:dim1,thresh 1.40,thresh ineqal:gt the weighted error is 0.800
split:dim1,thresh 1.50,thresh ineqal:lt the weighted error is 0.200
split:dim1,thresh 1.50,thresh ineqal:gt the weighted error is 0.800
split:dim1,thresh 1.60,thresh ineqal:lt the weig

In [19]:
classifierArray

[{'alpha': 0.6931471805599453, 'dim': 0, 'ineq': 'lt', 'thresh': 1.3},
 {'alpha': 0.9729550745276565, 'dim': 1, 'ineq': 'lt', 'thresh': 1.0},
 {'alpha': 0.8958797346140273, 'dim': 0, 'ineq': 'lt', 'thresh': 0.9}]

In [20]:
def adaClassify(datToClass,classifierArray):
    dataMatrix = np.mat(datToClass)
    m = np.shape(dataMatrix)[0]
    aggClassEst = np.mat(np.zeros((m,1)))
    for i in range(len(classifierArray)):
        classEst = stumpClassify(dataMatrix,classifierArray[i]['dim'],
                                classifierArray[i]['thresh'],
                                classifierArray[i]['ineq'])
        aggClassEst += classifierArray[i]['alpha']*classEst
        print(aggClassEst)
    return np.sign(aggClassEst)

In [21]:
adaClassify([0,0],classifierArray)

[[-0.69314718]]
[[-1.66610226]]
[[-2.56198199]]


matrix([[-1.]])