In [1]:
import numpy as np

In [2]:
class Adaboost:
    
    def __init__(self,x,y,nums_of_estimators = 10):
        """
        Class Initializer
        """
        self.x = x
        self.y = y
        self.nums = len(x) #Number of data
        self.dict = dict(zip(x,y)) #A dict to find y according to x
        self.nums_of_estimators = nums_of_estimators
        self.weight_of_x = [1/len(x) for _ in range(len(x))] #record weight of trainning data
        self.threshold_of_weak_classifier = [] #record the threshold in each round
        self.alpha_rate_of_weak_classifier = [] #record the alpha of weak classifier in each round
    
    def sample_data(self):
        """
        To sample the training data according to its weight
        
        RETURN : Sampled data
        """
        sample = []
        for i in range(self.nums):
            sample.extend([self.x[i]]*int(1000 * self.weight_of_x[i])) #extend number of data according to 1000*weight
        np.random.shuffle(sample) #shuffle the data
        return sample[0:self.nums] #first 10 data is training data in this round

    def compute_accuracy(self,y_true,y_pred):
        """
        Compute the accuracy based on ground truth and prediction
        y_true : ground truth
        y_pred : prediction
        
        RETURN : accuracy
        """
        return np.sum(np.array(y_true) == np.array(y_pred)) / self.nums
    
    def examine_classifier(self,Train_X,Train_Y,threshold,left):
        """
        Compute the accuracy of weaking classifier if using the threshold and sign
        Train_X : Features of Training data
        Train_Y : Labels of Training data
        threshold : A threshold to classify data
        left : indicate the label of value < threshold. ['pos','neg']
        
        RETURN : accuracy of prediction
        """
        pred = []
        for i in Train_X:
            if i <= threshold: #< threshold
                if left == 'pos': #value < threshold should be positive
                    pred.append(1)
                else:
                    pred.append(-1)
            else:
                if left == 'pos':
                    pred.append(-1)
                else:
                    pred.append(1)
        return self.compute_accuracy(Train_Y,pred)
    
    def compute_error_rate(self,threshold,left):
        """
        Compute the error rate using threshold and left
        threshold : A threshold to classify data
        left : indicate the label of value < threshold. ['pos','neg']        
        
        RETURN : error rate of the weak classifier and data which are not correctly classified
        """
        error_index = []
        for i in range(self.nums):
            if self.x[i] <= threshold:
                if left == 'pos':
                    pred = 1
                else:
                    pred = -1
            else:
                if left == 'pos':
                    pred = -1
                else:
                    pred = 1
            if pred != self.dict[self.x[i]]: #if it is a wrong prediction, annotate it
                error_index.append(i)
        error_rate = np.sum([self.weight_of_x[i] for i in error_index]) #according to formula in slides
        return error_rate,error_index
    
    def update_weights(self,alpha,error_index,error_rate):
        """
        Update the weight of training data 
        alpha : the weight of weak classifier in this round
        error_index : index of training data which are not correctly classified in this round
        error_rate : error rate of classifier
        
        RETURN : NONE
        """
        updated_weights = []
        for i in range(self.nums):
            if i in error_index:
                updated_weights.append(self.weight_of_x[i] * np.exp(alpha)) #update it using formula
            else:
                updated_weights.append(self.weight_of_x[i] * np.exp(-alpha)) #update it using formula
        updated_weights /= np.sum(updated_weights) #Take the mean
        self.weight_of_x = updated_weights
        return
        
    def weak_classifier(self,index):
        """
        A threshold classifier
        index : indicate the number of current round
        
        RETURN : None
        """
        if index == 0: #if it is the first round, do not using sampling method
            Train_X,Train_Y = self.x,self.y
        else:
            Train_X = self.sample_data()
            Train_Y = [self.dict[x] for x in Train_X]
        best_acc = -1 #record the best accuracy
        best_threshold = (-0.5,'pos') #record the best classify method
        for threshold in np.arange(-0.5,11.5,0.5): #try each possible threshold
            for left in ['pos','neg']: #for each threshold, two label methods
                acc = self.examine_classifier(Train_X,Train_Y,threshold,left) #compute the accuracy using current (threshold,left)
                if acc > best_acc: #if it is better
                    best_threshold = (threshold,left)
                    best_acc = acc
        error_rate,error_index = self.compute_error_rate(best_threshold[0],best_threshold[1]) #compute error rate and index
        alpha = 0.5 * np.log((1-error_rate)/error_rate) #compute alpha using formula in slides
        self.update_weights(alpha,error_index,error_rate) #update weights of data 
        self.threshold_of_weak_classifier.append((best_threshold[0],best_threshold[1])) #record the classify method
        self.alpha_rate_of_weak_classifier.append(alpha) #record the weight of weak classifier
        return
    
    def fit(self):
        """
        Entry to start the algorithm
        
        RETURN : None
        """
        for _ in range(self.nums_of_estimators):
            self.weak_classifier(_)
            
    def predict(self,x):
        """
        Predict the given data using aggregate classifier
        
        RETURN : -1 / +1
        """
        ans = 0
        for i in range(self.nums_of_estimators):
            if x <= self.threshold_of_weak_classifier[i][0]:
                if self.threshold_of_weak_classifier[i][1] == 'pos':
                    pred = 1
                else:
                    pred = -1
            else:
                if self.threshold_of_weak_classifier[i][1] == 'pos':
                    pred = -1
                else:
                    pred = 1
            ans += self.alpha_rate_of_weak_classifier[i] * pred
        return int(np.sign(ans))
    
    def display_function(self):
        """
        Display the infomation of each weak Classifier
        
        RETURN : None
        """
        for i in range(self.nums_of_estimators):
            print("""The weight of the weak classifier in Round {} is {:.4f}, The threshold is {:.1f},
                    And the label of left side of the threshold is {}""".format(i+1,self.alpha_rate_of_weak_classifier[i],
                                                                                self.threshold_of_weak_classifier[i][0],
                                                                               self.threshold_of_weak_classifier[i][1]))

In [3]:
x = [0,1,2,3,4,5,6,7,8,9]
y = [1,1,-1,-1,-1,1,1,-1,-1,1]

In [4]:
adaboost = Adaboost(x,y,nums_of_estimators=8)
adaboost.fit()
print('The Accuracy on Training Data is {}'.format(adaboost.compute_accuracy(y,[adaboost.predict(i) for i in x])))
adaboost.display_function()

The Accuracy on Training Data is 1.0
The weight of the weak classifier in Round 1 is 0.4236, The threshold is 1.0,
                    And the label of left side of the threshold is pos
The weight of the weak classifier in Round 2 is 0.2428, The threshold is 6.0,
                    And the label of left side of the threshold is pos
The weight of the weak classifier in Round 3 is 0.2098, The threshold is -0.5,
                    And the label of left side of the threshold is neg
The weight of the weak classifier in Round 4 is 0.5734, The threshold is 4.0,
                    And the label of left side of the threshold is neg
The weight of the weak classifier in Round 5 is 0.5064, The threshold is 1.0,
                    And the label of left side of the threshold is pos
The weight of the weak classifier in Round 6 is 0.2772, The threshold is -0.5,
                    And the label of left side of the threshold is neg
The weight of the weak classifier in Round 7 is 0.3687, The thresho