# Capter8 提升方法
## Adaboost模型

In [1]:
import numpy as np
import matplotlib.pyplot as plt
class AdaBoost():
    def __init__(self, G_num = 5):
        self.G_num = G_num

    #使用LR作为基学习器
    def G(self, train_x, train_y, Dw, epoch, lr):
        loss_lst = []
        w = np.zeros(train_x.shape[1])
        b = 0
        for i in range(epoch):
            h = 1 / (1 + np.exp(-1 * (np.dot(train_x, w.T) + b)))
            L = -1 * np.dot((train_y * np.log(h) + (1 - train_y) * np.log(1 - h)).T, Dw)
            loss_lst.append(L)
            dL_dw = np.dot(((h - train_y) * Dw).T, train_x)
            dL_db = np.dot((h - train_y).T, Dw)
            w = w - lr * dL_dw
            b = b - lr * dL_db
        print("G loss is " + ' '.join([str(round(x,2)) for x in loss_lst]))
        return (w, b)

    def G_pre(self, x, w, b):
        pre = 1 / (1 + np.exp(-1 * (np.dot(x, w.T) + b)))
        return np.where(pre<=0.5, -1, 1)

    def fit(self, train_x, train_y, G_epoch=10, lr = 0.05):
        self.G_lst = []
        step_loss = []
        n = train_x.shape[0]
        Dw = np.zeros(n) + 1/n
        for i in range(self.G_num):
            w, b = self.G(train_x, train_y, Dw, G_epoch, lr)
            pre = self.G_pre(train_x, w, b)
            err_idx = np.nonzero(pre-train_y)[0]
            e = np.sum(Dw[err_idx])
            step_loss.append(str(len(err_idx)))
            a = 1/2 * np.log((1-e)/e)
            self.G_lst.append({'a': a, 'w' : w, 'b' : b})
            Dw_raw = Dw * np.exp(-1*a*train_y*pre)
            Dw = Dw_raw/sum(Dw_raw)
        print('%d model error sample num is %s' % (self.G_num, ' '.join(step_loss)))

    def predict(self, test_x):
        val = np.zeros(test_x.shape[0])
        for arg in self.G_lst:
            val = val + arg['a'] * self.G_pre(test_x, arg['w'], arg['b'])
        return np.where(val>0, 1, -1)

    def get_accuracy(self, gt_y, pre_y):
        diff = np.subtract(gt_y, pre_y)
        err = diff[diff!=0].shape[0]
        return 1 - err/gt_y.shape[0]

    def plot_decision_boundary(self, fea, label, ax, title=''):
        fea1_min, fea1_max = fea[:, 0].min() - 0.5, fea[:, 0].max() + 0.5
        fea2_min, fea2_max = fea[:, 1].min() - 0.5, fea[:, 1].max() + 0.5
        s = 0.05
        g_fea1, g_fea2 = np.meshgrid(np.arange(fea1_min, fea1_max, s), np.arange(fea2_min, fea2_max, s))
        pre_y = self.predict(np.c_[g_fea1.ravel(), g_fea2.ravel()])
        pre_y = pre_y.reshape(g_fea1.shape)
        ax.contourf(g_fea1, g_fea2, pre_y, cmap=plt.cm.Spectral)
        ax.scatter(fea[:, 0], fea[:, 1], c = label, s = 20)
        if len(title) > 0:
            ax.set_title(title)

## 模型训练与验证

In [2]:
%matplotlib widget
from sklearn import datasets
sample_num = 200
inputs, labels = datasets.make_classification(n_samples=sample_num, n_features=2, n_classes=2, n_redundant=0)
labels = np.where(labels == 0, -1, labels)
train_num = sample_num//2
train_x = inputs[:train_num]
train_y = labels[:train_num]
test_x = inputs[train_num:]
test_y = labels[train_num:]

model = AdaBoost(5)
model.fit(train_x, train_y)
pre_y = model.predict(test_x)
print('accuracy: %.2f' % model.get_accuracy(test_y, pre_y))
model.plot_decision_boundary(train_x, train_y, plt)

G loss is 0.69 0.63 0.56 0.5 0.45 0.39 0.34 0.29 0.24 0.19
G loss is 0.69 0.68 0.66 0.64 0.62 0.61 0.59 0.58 0.56 0.55
G loss is 0.69 0.68 0.68 0.67 0.66 0.65 0.65 0.64 0.63 0.62
G loss is 0.69 0.69 0.68 0.67 0.66 0.66 0.65 0.64 0.64 0.63
G loss is 0.69 0.69 0.68 0.67 0.67 0.66 0.66 0.65 0.65 0.64
5 model error sample num is 20 16 19 20 20
accuracy: 0.73


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …