# Ada Boost

In [1]:
import pandas as pd
train = pd.read_csv('prep_train.csv').filter(items=['Reason for absence', 'Month of absence', 'Day of the week', 'Seasons', 'Transportation expense', 'Distance from Residence to Work', 'Age', 'Work load Average/day ', 'Hit target', 'Height', 'Body mass index', 'Absent'])
train.head(10)

Unnamed: 0,Reason for absence,Month of absence,Day of the week,Seasons,Transportation expense,Distance from Residence to Work,Age,Work load Average/day,Hit target,Height,Body mass index,Absent
0,1.0,0.545455,0.25,0.0,0.633333,0.659574,0.26087,0.194471,0.769231,0.272727,0.578947,1.0
1,0.0,0.545455,0.25,0.0,0.0,0.170213,1.0,0.194471,0.769231,0.454545,0.631579,0.0
2,0.5,0.545455,0.5,0.0,0.225926,0.978723,0.478261,0.194471,0.769231,0.212121,0.631579,1.0
3,1.0,0.545455,0.75,0.0,0.596296,0.0,0.521739,0.194471,0.769231,0.151515,0.263158,1.0
4,0.5,0.545455,0.75,0.0,0.633333,0.659574,0.26087,0.194471,0.769231,0.272727,0.578947,1.0
5,0.5,0.545455,1.0,0.0,0.225926,0.978723,0.478261,0.194471,0.769231,0.212121,0.631579,1.0
6,1.0,0.545455,1.0,0.0,0.9,1.0,0.043478,0.194471,0.769231,0.272727,0.421053,1.0
7,0.5,0.545455,1.0,0.0,0.525926,0.957447,0.391304,0.194471,0.769231,0.151515,0.210526,1.0
8,1.0,0.545455,0.0,0.0,0.137037,0.148936,0.304348,0.194471,0.769231,1.0,0.315789,1.0
9,1.0,0.545455,0.0,0.0,0.433333,0.12766,0.434783,0.194471,0.769231,0.272727,0.526316,1.0


In [2]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

param_grid = {
    'base_estimator'        : [DecisionTreeClassifier(max_depth=1), DecisionTreeClassifier(max_depth=4), DecisionTreeClassifier(max_depth=16)],
    'n_estimators'          : [5,10,20, 50, 100],
    'learning_rate'         : [0.1, 0.5, 1, 2, 10]
}


X_train, X_test, y_train, y_test = train_test_split(train.drop(['Absent'],axis=1), 
                                                    train['Absent'], test_size=0.30, 
                                                    random_state=50)

grid = GridSearchCV(AdaBoostClassifier(),param_grid,refit=True,verbose=3, scoring='balanced_accuracy')
grid.fit(X_train,y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


Fitting 3 folds for each of 75 candidates, totalling 225 fits
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=5 
[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=5, score=0.660, total=  

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=50, score=0.660, total=   0.1s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weig

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=20, score=0.660, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weig

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=10, score=0.718, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=10 
[CV]  base_estimator=DecisionTreeClassifier(class_weight=N

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=100, score=0.742, total=   0.2s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=100 
[CV]  base_estimator=DecisionTreeClassifier(class_weight

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=50, score=0.806, total=   0.1s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weight=N

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=50, score=0.522, total=   0.1s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weight

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=20, score=0.644, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weig

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=20, score=0.582, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_weig

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=20, score=0.666, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_weight=N

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=20, score=0.582, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_weight=N

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=20, score=0.623, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weight

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=20, score=0.684, total=   0.1s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.1, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_we

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=20, score=0.667, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=0.5, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_we

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=20, score=0.662, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=1, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_weight

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=20, score=0.653, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=2, n_estimators=50 
[CV]  base_estimator=DecisionTreeClassifier(class_weight

[CV]  base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=20, score=0.682, total=   0.0s
[CV] base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=16,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), learning_rate=10, n_estimators=20 
[CV]  base_estimator=DecisionTreeClassifier(class_weig

[Parallel(n_jobs=1)]: Done 225 out of 225 | elapsed:   19.4s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=AdaBoostClassifier(algorithm='SAMME.R',
                                          base_estimator=None,
                                          learning_rate=1.0, n_estimators=50,
                                          random_state=None),
             iid='warn', n_jobs=None,
             param_grid={'base_estimator': [DecisionTreeClassifier(class_weight=None,
                                                                   criterion='gini',
                                                                   max_depth=1,
                                                                   max_features=None,
                                                                   max_leaf_nodes=None,
                                                                   min_impurity...
                                                                   max_leaf_nodes=None,
                                           

In [3]:
print(grid.best_params_)
predictions = grid.predict(X_test)
print(confusion_matrix(y_test,predictions))
print(accuracy_score(y_test,predictions))

{'base_estimator': DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=1,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best'), 'learning_rate': 1, 'n_estimators': 100}
[[ 16  14]
 [ 13 107]]
0.82
