In [30]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

x, y = make_moons(n_samples=10000,
                  shuffle = True,
                  noise = 0.2,
                  random_state = True,)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[("lr", log_clf),("rf", rnd_clf), ("svc", svm_clf)],
    voting="hard"
)
voting_clf.fit(x_train, y_train)

In [11]:
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(clf.__class__.__name__, accuracy_score(y_pred, y_test))

LogisticRegression 0.864
RandomForestClassifier 0.9645
SVC 0.968
VotingClassifier 0.9625


In [12]:
svm_clf = SVC(probability=True)
voting_clf = VotingClassifier(
    estimators=[("lr", log_clf),("rf", rnd_clf), ("svc", svm_clf)],
    voting="soft"
)
voting_clf.fit(x_train, y_train)

from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print(clf.__class__.__name__, accuracy_score(y_pred, y_test))

LogisticRegression 0.864
RandomForestClassifier 0.964
SVC 0.968
VotingClassifier 0.961


In [17]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,max_samples=100, bootstrap=True, n_jobs=-1, oob_score=True
)
bag_clf.fit(x_train, y_train)
y_pred = bag_clf.predict(x_test)
accuracy_score(y_pred, y_test)

0.956

In [18]:
bag_clf.oob_score_

0.960375

In [21]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(x_train, y_train)
y_pred = rnd_clf.predict(x_test)
accuracy_score(y_pred, y_test)

0.9605

In [22]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(splitter="random", max_leaf_nodes=16),
    n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1
)
bag_clf.fit(x_train, y_train)
y_pred = bag_clf.predict(x_test)
accuracy_score(y_pred, y_test)

0.9535

In [27]:
from sklearn.tree import ExtraTreeClassifier

ext_clf = ExtraTreeClassifier()
ext_clf.fit(x_train, y_train)
y_pred = ext_clf.predict(x_test)
accuracy_score(y_pred, y_test)

0.944

In [29]:
from sklearn.datasets import load_iris
iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1)
rnd_clf.fit(iris.data, iris.target)
for name, score in zip(iris.feature_names, rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.09237851127445557
sepal width (cm) 0.024414356325868955
petal length (cm) 0.45686816123880664
petal width (cm) 0.42633897116086883


In [33]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200, algorithm="SAMME.R", learning_rate=0.5
)

ada_clf.fit(x_train, y_train)
y_pred = ada_clf.predict(x_test)
accuracy_score(y_pred, y_test)

0.969

In [34]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

x, y = make_moons(n_samples=1000,
                  shuffle = True,
                  noise = None,
                  random_state = True,)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [20]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(x_train, y_train)

y2 = y_train - tree_reg1.predict(x_train)
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(x_train, y2)

y3 = y_train - tree_reg2.predict(x_train)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(x_train, y3)

y_pred = sum(tree.predict(x_test) for tree in (tree_reg1, tree_reg2, tree_reg3))

In [26]:
from sklearn.ensemble import GradientBoostingRegressor
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)
gbrt.fit(x_train, y_train)

GradientBoostingRegressor(learning_rate=1.0, max_depth=2, n_estimators=3)

In [27]:
gbrt.predict(x_test)

array([7.42036817e-17, 1.13513514e-01, 6.28985507e-01, 0.00000000e+00,
       9.73513514e-01, 1.62318841e-01, 9.73513514e-01, 1.62318841e-01,
       9.73513514e-01, 9.73513514e-01, 9.73513514e-01, 9.73513514e-01,
       9.73513514e-01, 9.73513514e-01, 1.13513514e-01, 9.73513514e-01,
       7.42036817e-17, 9.73513514e-01, 6.28985507e-01, 9.73513514e-01])

In [29]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120)
gbrt.fit(x_train, y_train)

errors = [mean_squared_error(y_test, y_pred) for y_pred in gbrt.staged_predict(x_test)]
bst_n_estimators = np.argmin(errors)

gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators)
gbrt_best.fit(x_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=119)

In [35]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True)

min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(x_train, y_train)
    y_pred = gbrt.predict(x_test)
    val_error = mean_squared_error(y_test, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
    if error_going_up == 5:
        break

In [36]:
gbrt_best.fit(x_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=119)