# AdaBoost
---
Adaboost is another algoritm that can be used fro regression as well as classification. Like Random Forest we use CART as base estimator inside the adaptive boosting algorithm.
The principle of AdaBoost is to fit a sequence of weak learners, such as decision stumps, on repeatedly modified versions of data.

The predictions from all the weak learners are combined through a weighted majority vote to produce the final prediction.

Boosting iterations consist of applying weights to each of the training samples.

### Performance
While the performance can be slightly improved with some additional hyperparameter optimization.

### AdaBoost Limitation
The resulting "flat" probability distribution of AdaBoost is its main limitation.

It depends upon our case that if our main concern is if we want to assign the correct class, then the prediction probability is less relevant, else we might want to use Random Forests.

In [8]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

import plotly.express as px
import plotly.graph_objects as go

In [4]:
df = pd.read_csv('../data/weatherAUS.csv')

df = df[pd.isnull(df['RainTomorrow'])==False]
df = df.fillna(df.mean())

df['RainTodayFlag'] = df['RainToday'].apply(lambda x: 1 if x=='Yes' else 0)
df['RainTomorrowFlag'] = df['RainTomorrow'].apply( lambda x: 1 if x =='Yes' else 0)

In [9]:
X = df[['WindGustSpeed', 'Humidity3pm']]
y = df['RainTomorrowFlag'].values

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=0)

model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(min_samples_leaf=1000, max_depth=1),
                           n_estimators=50, # default=50
                           learning_rate=1.0, # default=1. 
                           algorithm='SAMME', # SAMME' - discreate, 'SAMME.R' - real
                           random_state=0, # random state for reproducibility
                          )
clf = model.fit(X_train, y_train)

pred_labels_tr = model.predict(X_train)
pred_labels_te = model.predict(X_test)

In [10]:
print('*************** Tree Summary ***************')
print('No. of classes: ', clf.n_classes_)
print('Classes: ', clf.classes_)
print('No. of Estimators: ', len(clf.estimators_))
print('Base Estimator: ', clf.base_estimator_)
print('--------------------------------------------------------')
print("")

print('*************** Evaluation on Test Data ***************')
score_te = model.score(X_test, y_test)
print('Accuracy Score: ', score_te)
print(classification_report(y_test, pred_labels_te))
print('--------------------------------------------------------')
print("")

print('*************** Evaluation on Training Data ***************')
score_tr = model.score(X_train, y_train)
print('Accuracy Score: ', score_tr)
print(classification_report(y_train, pred_labels_tr))
print('--------------------------------------------------------')

*************** Tree Summary ***************
No. of classes:  2
Classes:  [0 1]
No. of Estimators:  50
Base Estimator:  DecisionTreeClassifier(max_depth=1, min_samples_leaf=1000)
--------------------------------------------------------

*************** Evaluation on Test Data ***************
Accuracy Score:  0.830338619501389
              precision    recall  f1-score   support

           0       0.85      0.95      0.90     22067
           1       0.70      0.43      0.53      6372

    accuracy                           0.83     28439
   macro avg       0.77      0.69      0.71     28439
weighted avg       0.82      0.83      0.81     28439

--------------------------------------------------------

*************** Evaluation on Training Data ***************
Accuracy Score:  0.8288939290046944
              precision    recall  f1-score   support

           0       0.85      0.95      0.90     88249
           1       0.69      0.42      0.53     25505

    accuracy               

In [11]:
def Plot_3D(X, X_test, y_test, clf, x1, x2, mesh_size, margin):
            
    mesh_size=mesh_size
    margin=margin

    x_min, x_max = X.iloc[:, 0].fillna(X.mean()).min() - margin, X.iloc[:, 0].fillna(X.mean()).max() + margin
    y_min, y_max = X.iloc[:, 1].fillna(X.mean()).min() - margin, X.iloc[:, 1].fillna(X.mean()).max() + margin
    xrange = np.arange(x_min, x_max, mesh_size)
    yrange = np.arange(y_min, y_max, mesh_size)
    xx, yy = np.meshgrid(xrange, yrange)
            
    Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    Z = Z.reshape(xx.shape)

    fig = px.scatter_3d(x=[], y=[], z=[],
                     opacity=0.8, color_discrete_sequence=['black'])

    fig.update_layout(paper_bgcolor = 'white',
                      scene = dict(xaxis=dict(title=x1,
                                              backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'),
                                   yaxis=dict(title=x2,
                                              backgroundcolor='white',
                                              color='black',
                                              gridcolor='#f0f0f0'
                                              ),
                                   zaxis=dict(title='Probability of Rain Tomorrow',
                                              backgroundcolor='lightgrey',
                                              color='black', 
                                              gridcolor='#f0f0f0',
                                              range=[0, 1],
                                              tickmode = 'linear',
                                              tick0 = 0,
                                              dtick = 0.2
                                              )))
    
    fig.update_traces(marker=dict(size=1))

    fig.add_traces(go.Surface(x=xrange, y=yrange, z=Z, name='AdaBoost Prediction',
                              colorscale='Bluered',
                              reversescale=True,
                              showscale=False, 
                              contours = {"z": {"show": True, "start": 0.5, "end": 0.9, 
                                                "size": 0.5, "color":"white"}}))
    fig.show()
    return fig
  
fig = Plot_3D(X, X_test, y_test, clf, x1='WindGustSpeed', x2='Humidity3pm', mesh_size=1, margin=1)



X does not have valid feature names, but AdaBoostClassifier was fitted with feature names

