In [9]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
X=np.arange(10).reshape(-1,1)
y=np.array([1,1,1,-1,-1,-1,1,1,1,-1])

In [12]:
# 李航统计学习方法代码实现
import copy
class SimpleClf:
    def __init__(self):
        self.split_x=[]
        self.best_split_x=None
        self.reverse=False
        pass
    def fit(self,X,y,w):
        best_loss=1e10 
        best_split_x=-1
        for i in range(len(X)-1):
            
            split_x=(X[i]+X[i+1])/2
            self.split_x.append(float(split_x))
        for split_x in self.split_x:
            pre_y=self.__predict_by_split_x(X,split_x)
            loss=w.dot(pre_y!=y)
            if best_loss >loss:
                best_loss=loss
                self.best_split_x=split_x 
        for split_x in self.split_x:
            pre_y=self.__predict_by_split_x_reverse(X,split_x)
            loss=w.dot(pre_y!=y)
            if best_loss >loss:
                best_loss=loss
                self.reverse=True
                self.best_split_x=split_x  
        print("---  simpleClf-----",self.best_split_x,self.reverse)
        pass
    def score(self,X,y):
        pre_y=self.predict(X)
        return sum(pre_y==y)/len(y)
    def predict(self,X):
        pre_y=[]
        for x in X:
            if self.reverse:
                if x <self.best_split_x:
                    pre_y.append(-1)
                else :
                    pre_y.append(1)
            else:
                if x <self.best_split_x:
                    pre_y.append(1)
                else :
                    pre_y.append(-1)
        return np.array(pre_y)
    def __predict_by_split_x(self,X,split_x):
        pre_y=[]
        for x in X:
            if x <split_x:
                pre_y.append(1)
            else :
                pre_y.append(-1)
        return np.array(pre_y)
            
    def __predict_by_split_x_reverse(self,X,split_x):
        pre_y=[]
        for x in X:
            if x >split_x:
                pre_y.append(1)
            else :
                pre_y.append(-1)
        return np.array(pre_y)
    
class AdaBoost:
    def __init__(self,base_clf,max_iter=10,learning_rate=1):
        self.max_iter=max_iter;
        self.base_clf=base_clf
        self.clfs=[]
        self.alphas=[]
        self.f=None
        self.learning_rate=learning_rate
        pass
    def fit(self,X,y):
        w=np.ones(len(y))/len(y)
        for iter_i in range(self.max_iter):
            base_clf=SimpleClf()
            print(w)
            base_clf.fit(X,y,w)
            pre_y=base_clf.predict(X)
          
            print(w.dot((pre_y!=y).astype('int')))
            e_i=w.dot((pre_y!=y).astype('int'))
            print(e_i)
            alpha_i= 0.5 * np.log((1-e_i)/e_i)
            pre_y=base_clf.predict(X)
            for i in range(len(w)):
                w[i]=w[i]*np.exp(-alpha_i*y[i]*pre_y[i])
            w=1/sum(w) *w
            self.clfs.append(base_clf)
            self.alphas.append(alpha_i)
            if self.__train_score(X,y)==1:
                break
            print()
            
        pass
    def score(self,X,y):
        
        return self.__train_score(X,y)
    def __train_score(self,X,y):
        pre_y=self.__train_predict(X)
        return sum(pre_y==y)/len(y)
    def __train_score_error(self,X,y):
        pre_y=self.__train_predict(X)
        return sum(pre_y==y)/len(y)
    def __train_predict(self,X):
        pre_y=np.zeros(len(X))
        for i in range(len(self.alphas)):
            pre_y+=self.learning_rate*self.clfs[i].predict(X)*self.alphas[i]
        pre_y[pre_y>0]=1
        pre_y[pre_y<=0]=-1
        return pre_y 
    def predict(self,X):
        return self.__train_predict(X)
#         for x in X:
#             for clf in self.clfs:
#                 print(clf.split_x)
        
        
            
    
    
# clf = SimpleClf();
# w=np.ones(len(y))/len(y)
# clf.fit(X,y,w)
# clf.score(X,y)   
ada = AdaBoost(SimpleClf(),learning_rate=0.1)
ada.fit(X,y)
print(ada.predict(X))
print(ada.score(X,y))

[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]
---  simpleClf----- 2.5 False
0.30000000000000004
0.30000000000000004

[0.07142857 0.07142857 0.07142857 0.07142857 0.07142857 0.07142857
 0.16666667 0.16666667 0.16666667 0.07142857]
---  simpleClf----- 8.5 False
0.21428571428571427
0.21428571428571427

[0.04545455 0.04545455 0.04545455 0.16666667 0.16666667 0.16666667
 0.10606061 0.10606061 0.10606061 0.04545455]
---  simpleClf----- 5.5 True
0.18181818181818188
0.18181818181818188
[ 1.  1.  1. -1. -1. -1.  1.  1.  1. -1.]
1.0


In [10]:
# sklearn
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                         algorithm="SAMME",
                        n_estimators=10)
bdt.fit(X, y)
bdt.score(X,y)

1.0