In [5]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles

import pandas as pd



In [8]:
# Construct dataset
X1, y1 = make_gaussian_quantiles(cov=2.,
                                 n_samples=200, n_features=2,
                                 n_classes=2, random_state=1)
X2, y2 = make_gaussian_quantiles(mean=(3, 3), cov=1.5,
                                 n_samples=300, n_features=2,
                                 n_classes=2, random_state=1)
X = np.concatenate((X1, X2))
y = np.concatenate((y1, - y2 + 1))


In [13]:
X

array([[ 7.00334571e-01, -2.47067578e-01],
       [-3.95001869e+00,  2.74007953e+00],
       [ 1.50221617e-01, -2.15763780e+00],
       [-1.67205033e+00, -9.41519069e-01],
       [ 2.56048303e+00, -1.84657672e+00],
       [-1.72497907e+00,  3.46393036e+00],
       [ 1.89572778e-01,  1.69996229e+00],
       [-3.34372344e-01,  1.02928372e+00],
       [ 3.09811727e+00, -2.68185934e+00],
       [-1.24066342e+00, -1.22723646e+00],
       [-3.78850955e-01,  7.50035894e-01],
       [ 2.92145879e+00, -2.08053008e+00],
       [-2.67496068e-03, -1.97511955e+00],
       [ 7.48777043e-01,  1.94738919e-01],
       [ 1.60339212e+00, -1.55548115e+00],
       [ 4.95746877e-01, -1.85584900e+00],
       [ 2.06773287e+00, -2.91347893e+00],
       [-1.02614929e+00, -1.22721023e+00],
       [-1.05664139e+00,  2.39349225e+00],
       [-1.55651057e+00,  1.61888380e+00],
       [-1.44516496e+00,  1.12363261e+00],
       [ 8.59137508e-02,  2.98575492e-01],
       [ 1.72281294e-01,  1.59733146e+00],
       [ 3.

In [None]:
# Create and fit adaBoost decision tree/stumps

bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),
                         algorithm="SAMME",
                         n_estimators=200)

bdt.fit(X, y)

plot_colors = "br"
plot_step = 0.02
class_names = "AB"

plt.figure(figsize=(10, 5))



In [None]:
# Plot the decision boundaries
plt.subplot(121)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                     np.arange(y_min, y_max, plot_step))

Z = bdt.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.axis("tight")



In [None]:
# Plot the training points
for i, n, c in zip(range(2), class_names, plot_colors):
    idx = np.where(y == i)
    plt.scatter(X[idx, 0], X[idx, 1],
                c=c, cmap=plt.cm.Paired,
                s=20, edgecolor='k',
                label="Class %s" % n)
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.legend(loc='upper right')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Decision Boundary')



In [None]:
# Plot the two-class decision scores
twoclass_output = bdt.decision_function(X)
plot_range = (twoclass_output.min(), twoclass_output.max())
plt.subplot(122)
for i, n, c in zip(range(2), class_names, plot_colors):
    plt.hist(twoclass_output[y == i],
             bins=10,
             range=plot_range,
             facecolor=c,
             label='Class %s' % n,
             alpha=.5,
             edgecolor='k')
x1, x2, y1, y2 = plt.axis()
plt.axis((x1, x2, y1, y2 * 1.2))
plt.legend(loc='upper right')
plt.ylabel('Samples')
plt.xlabel('Score')
plt.title('Decision Scores')

plt.tight_layout()
plt.subplots_adjust(wspace=0.35)
plt.show()