# Boosting 提升方法
- 1 boosting是将弱学习算法提升为强学习算法的统计学习方。在分类学习中通过反复更新**训练数据的权重分布**，构建一些弱分类器，并将其线性组合，构造强分类器。

- 2 Boosting和Bagging是集成学习的两种方式 
     + Boosting 是每个基本模型训练时采用不同权重，针对上个一个弱分类模型修改样本权重值，序列式
     + Bagging 是每个模型由总体样本中随机抽样得到的不同数据集训练得到的，并行式

- 3 AdaBoost 模型
$$
f(x) = \sum_{m=1}^{M} \alpha_{m} G_{m} (x)
$$

- 4 AdaBoost 算法
    + 1) 输入

# 例8.1 

In [1]:
import numpy as np

In [2]:
X = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 1, 1, -1, -1, -1, 1, 1, 1, -1])
print(X.shape, y.shape)

(10,) (10,)


In [3]:
# 

class AdaBoost():
    """
    AdaBoost
    """
    
    def calc_e_Gx(self, trainDataArr, trainLabelArr, n, div, rule, D):
        """
        计算分类错误率
        """
        
        e = 0
        
        x = trainDataArr[:, n]
        y = trainLabelArr
        
        predict = []
        
        if rule == 'LisOne':
            L = 1
            H = -1
        else:
            L = -1
            H = 1
            
        for i in range(trainDataArr.shape[0]):
            if x[i] < div:
                predict.append(L)
                if y[i] != L:
                    e += D[i]
            elif x[i] >= div:
                predict.append(H)
                if y[i] != H:
                    e += D[i]
                    
        return np.array(predict), e
    
    def createSigleBoostingTree(self, trainDataArr, trainLabelArr, D):
        """
        创建单层提升树
        """
        
        m, n = np.shape(trainDataArr)
        
        sigleBoostTree = {}
        
        sigleBoostTree['e'] = 1
        
        for i in range(n):
            for div in [-0.5, 0.5, 1.5]:
                for rule in ['LisOne', 'HisOne']:
                    Gx, e = self.calc_e_Gx(trainDataArr, trainLabelArr, i, div, rule, D)
                    if e < sigleBoostTree['e']:
                        
                        sigleBoostTree['e'] = e
                        sigleBoostTree['div'] = div
                        sigleBoostTree['rule'] = rule
                        sigleBoostTree['Gx'] = Gx
                        sigleBoostTree['feature'] = i
                        
        return sigleBoostTree
    
    
    def createBoostingTree(self, trainDataList, trainLabelList, treeNum = 50):
        """
        创建提升树
        """
        
        trainDataArr = np.array(trainDataList)
        trainLabelArr = np.array(trainLabelList)
        
        finallpredict = [0] * len(trainLabelArr)
        
        m, n = np.shape(trainDataArr)
        
        D = [1 / m] * m
        
        tree = []
        
        for i in range(treeNum):
            curTree = self.createSigleBoostingTree(trainDataArr, trainLabelArr, D)
            alpha = 1/2 * np.log((1 - curTree['e']) / curTree['e'])
            Gx = curTree['Gx']

            D = np.multiply(D, np.exp(-1 * alpha * np.multiply(trainLabelArr, Gx))) / sum(D)
            
            curTree['alpha'] = alpha
            tree.append(curTree)
            
            finallpredict += alpha * Gx
            
            error = sum([1 for i in range(len(trainDataList)) if np.sign(finallpredict[i]) != trainLabelArr[i]])
            
            finallError = error / len(trainDataList)
            
            if finallError == 0:
                return tree
            
            print("iter : %d: %d, sigle error: %.4f, finall error: %.4f" % (i, treeNum, curTree['e'], finallError))
        
        return tree
        
    def predict(self, x, div, rule, feature):
        """
        预测标签
        """
        if rule == 'LisOne':
            L = 1
            H = -1
        else:
            L = -1
            H = 1
            
        if x[feature] < div:
            return L 
        else:
            return H
        
    def model_test(testDataList, testLabelList, tree):
        """
        测试
        """
        error_count = 0
        for i in range(len(testDataList)):
            result = 0
            for curTree in tree:
                div = curTree['div']
                rule = curTree['rule']
                feature = curTree['feature']
                alpha = curTree['alpha']
                result += alpha * self.predict(testDataList[i], div, rule, feature)
            
            if np.sign(result) != testLabelList[i]:
                error_count += 1
        
        return 1 - error_count / len(testDataList)

    

In [5]:
clf = AdaBoost()
tree = clf.createBoostingTree(X, y, 40)

ValueError: not enough values to unpack (expected 2, got 1)