In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np  
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler 
# from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn.ensemble import AdaBoostClassifier

In [3]:
#Load the CSV. And it does have a header
dataset = pd.read_csv('dataset.csv')

In [4]:
#spliting input and target
X = dataset.iloc[:,0:-1]
y = dataset.iloc[:,-1]

In [5]:
#spliting the Training Test Data
XTrain, XTest, yTrain, yTest = train_test_split(X,y,test_size=0.2)

#coverting to floate so standard scalar wouldn't complain
XTrain = XTrain.astype(np.float64)
XTest = XTest.astype(np.float64)

In [6]:
scaler = StandardScaler()  
scaler.fit(XTrain)
XTrain = scaler.transform(XTrain)  
XTest = scaler.transform(XTest)

In [1]:
Acc_Sc_Ad_Train = []
Acc_Sc_Ad_Test = []
Acc_Sc_Bl_Train = []
Acc_Sc_Bl_Test = []

for i in range(100):
    #creating adaboost sklearn and fitting it
    ADBC = AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=3),
        n_estimators= i+1,
        learning_rate=1.5,
        algorithm="SAMME.R")
    ADBC.fit(XTrain,yTrain)
    
    #only using base classifier
    BL = DecisionTreeClassifier(max_depth=3) 
    BL.fit(XTrain, yTrain) 
    
    predTrainAda = ADBC.predict(XTrain)
    predTestAda = ADBC.predict(XTest)
    predTrainBL = BL.predict(XTrain)
    predTestBL = BL.predict(XTest)
    
    Acc_Sc_Ad_Train.append(accuracy_score(yTrain, predTrainAda))
    Acc_Sc_Ad_Test.append(accuracy_score(yTest, predTestAda))
    Acc_Sc_Bl_Train.append(accuracy_score(yTrain, predTrainBL))
    Acc_Sc_Bl_Test.append(accuracy_score(yTest, predTestBL))

NameError: name 'AdaBoostClassifier' is not defined

In [None]:
len(Acc_Sc_Bl_Test)

In [None]:
plt.figure(num=None, figsize=(18, 6), dpi=80, facecolor='w', edgecolor='k')
plt.ylim(0,1.2)
plt.grid(which="major", alpha=0.2)
plt.grid(which="minor", alpha=0.5)
plt.plot(range(1,101),Acc_Sc_Ad_Train,'*-',
        range(1,101),Acc_Sc_Ad_Test,'*b-')
#         range(1,101),Acc_Sc_Bl_Train,
#         range(1,101),Acc_Sc_Bl_Test)

In [None]:
def AdaBoostFromScratch(X,y, M=10, learning_rate = 1.5):
    #Initialization of utility variables
    N = len(y)
    estimator_list, y_predict_list, estimator_error_list, estimator_weight_list, sample_weight_list = [], [],[],[],[]

    #Initialize the sample weights
    sample_weight = np.ones(N) / N
    sample_weight_list.append(sample_weight.copy())

    #For m = 1 to M
    for m in range(M):   

        #Fit a classifier
        estimator = DecisionTreeClassifier(max_depth = 1, max_leaf_nodes=2)
        estimator.fit(X, y, sample_weight=sample_weight)
        y_predict = estimator.predict(X)

        #Misclassifications
        incorrect = (y_predict != y)

        #Estimator error
        estimator_error = np.mean( np.average(incorrect, weights=sample_weight, axis=0))
        
        #Boost estimator weights
        estimator_weight =  learning_rate * np.log((1. - estimator_error) / estimator_error)

        #Boost sample weights
        sample_weight *= np.exp(estimator_weight * incorrect * ((sample_weight > 0) | (estimator_weight < 0)))

        #Save iteration values
        estimator_list.append(estimator)
        y_predict_list.append(y_predict.copy())
        estimator_error_list.append(estimator_error.copy())
        estimator_weight_list.append(estimator_weight.copy())
        sample_weight_list.append(sample_weight.copy())
        


    #Convert to np array for convenience   
    estimator_list = np.asarray(estimator_list)
    y_predict_list = np.asarray(y_predict_list)
    estimator_error_list = np.asarray(estimator_error_list)
    estimator_weight_list = np.asarray(estimator_weight_list)
    sample_weight_list = np.asarray(sample_weight_list)

    #Predictions
    preds = (np.array([np.sign((y_predict_list[:,point] * estimator_weight_list).sum()) for point in range(N)]))
    print('Accuracy = ', (preds == y).sum() / N) 
    
    return estimator_list, estimator_weight_list, sample_weight_list