In [None]:
import numpy as np
from __future__ import division
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import BaggingClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve, classification_report

## Data generation

In [None]:
N = 2000
noise_std = 0.3
X, Y = make_moons(n_samples=N, noise=noise_std)
#X, Y = make_classification(n_samples=N, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=2)
fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(1, 1, 1)
ax.scatter(X[:, 0], X[:, 1], c=Y, linewidth=0, alpha=0.5, s=20)
plt.grid()

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.5)

## Bagging Classifier

In [None]:
model = BaggingClassifier(base_estimator=DecisionTreeClassifier(max_depth=10),
                          n_jobs=2, max_features=1.0, n_estimators=100, bootstrap=True, random_state=0)

model.fit(X_train, Y_train)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05), np.arange(y_min, y_max, 0.05))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
print(classification_report(Y_test, model.predict(X_test), digits=3))

fig = plt.figure(figsize=(14, 5))
ax = fig.add_subplot(1, 2, 1)
ax.contourf(xx, yy, Z, cmap=plt.cm.jet, alpha=0.5)
ax.scatter(X[Y==0, 0], X[Y==0, 1], color='k', marker='o', alpha=0.5)
ax.scatter(X[Y==1, 0], X[Y==1, 1], color='k', marker='x', alpha=0.5)
ax = fig.add_subplot(1, 2, 2)
fpr, tpr, th = roc_curve(Y_train, model.predict_proba(X_train)[:, 1])
ax.plot(fpr, tpr, label='Train', linewidth=4)
fpr, tpr, th = roc_curve(Y_test, model.predict_proba(X_test)[:, 1])
ax.plot(fpr, tpr, label='Test', linewidth=4)
plt.grid()
plt.legend(loc=4)
plt.xlabel('FPR')
plt.ylabel('TPR')
ax.set_ylim([0.0, 1.05])

## Random Forest

In [None]:
model = RandomForestClassifier(max_depth=10, criterion='gini',
                               n_jobs=2, n_estimators=100, max_features=None, random_state=0)

model.fit(X_train, Y_train)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.05), np.arange(y_min, y_max, 0.05))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
print(classification_report(Y_test, model.predict(X_test), digits=3))


fig = plt.figure(figsize=(14, 5))
ax = fig.add_subplot(1, 2, 1)
ax.contourf(xx, yy, Z, cmap=plt.cm.jet, alpha=0.5)
ax.scatter(X[Y==0, 0], X[Y==0, 1], color='k', marker='o', alpha=0.5)
ax.scatter(X[Y==1, 0], X[Y==1, 1], color='k', marker='x', alpha=0.5)
ax = fig.add_subplot(1, 2, 2)
fpr, tpr, th = roc_curve(Y_train, model.predict_proba(X_train)[:, 1])
ax.plot(fpr, tpr, label='Train', linewidth=4)
fpr, tpr, th = roc_curve(Y_test, model.predict_proba(X_test)[:, 1])
ax.plot(fpr, tpr, label='Test', linewidth=4)
plt.grid()
plt.legend(loc=4)
plt.xlabel('FPR')
plt.ylabel('TPR')
ax.set_ylim([0.0, 1.05])