In [16]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

X, y = make_moons(n_samples=500, random_state=42,noise=0.30)
X_train, X_test,y_train, y_test = train_test_split(X, y, random_state = 42)


log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42,probability=True)

vot_clf = VotingClassifier(estimators=[("log",log_clf), ("rf", rf_clf), ("svm",  svm_clf)],voting='soft')



In [17]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rf_clf, svm_clf, vot_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test,y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.92


In [18]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, max_samples=100, bootstrap=True, n_jobs= -1,oob_score=True)
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.9226666666666666

In [19]:
from sklearn.metrics import accuracy_score

y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.904

In [20]:
bag_clf.oob_decision_function_

array([[0.34673367, 0.65326633],
       [0.36387435, 0.63612565],
       [1.        , 0.        ],
       [0.0078125 , 0.9921875 ],
       [0.02040816, 0.97959184],
       [0.11166253, 0.88833747],
       [0.40540541, 0.59459459],
       [0.06701031, 0.93298969],
       [0.92913386, 0.07086614],
       [0.84210526, 0.15789474],
       [0.51937984, 0.48062016],
       [0.04726368, 0.95273632],
       [0.7154047 , 0.2845953 ],
       [0.84236453, 0.15763547],
       [0.92947103, 0.07052897],
       [0.09448819, 0.90551181],
       [0.03856041, 0.96143959],
       [0.92328767, 0.07671233],
       [0.66842105, 0.33157895],
       [0.95979899, 0.04020101],
       [0.03208556, 0.96791444],
       [0.23243243, 0.76756757],
       [0.88917526, 0.11082474],
       [0.9822335 , 0.0177665 ],
       [0.96410256, 0.03589744],
       [0.00258398, 0.99741602],
       [0.97540984, 0.02459016],
       [1.        , 0.        ],
       [0.03084833, 0.96915167],
       [0.71891892, 0.28108108],
       [0.

In [21]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [22]:
from sklearn.datasets import load_iris

iris = load_iris()

rnd_clf = RandomForestClassifier(n_estimators = 500, n_jobs=-1)
rnd_clf = rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"],rnd_clf.feature_importances_):
    print(name, score)

sepal length (cm) 0.10482673067583043
sepal width (cm) 0.026375756577450577
petal length (cm) 0.4233338439029198
petal width (cm) 0.4454636688437992


In [23]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
DecisionTreeClassifier(max_depth = 1), n_estimators=200,algorithm="SAMME.R",learning_rate=0.5)
ada_clf.fit(X_train,y_train)

AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1),
                   learning_rate=0.5, n_estimators=200)

In [24]:
import numpy as np
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [25]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2)
tree_reg1.fit(X,y)

DecisionTreeRegressor(max_depth=2)

In [26]:
y2 = y - tree_reg1.predict(X)

In [27]:
tree_reg2 = DecisionTreeRegressor(max_depth=2)
tree_reg2.fit(X,y2)

DecisionTreeRegressor(max_depth=2)

In [29]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2)
tree_reg3.fit(X,y3)

DecisionTreeRegressor(max_depth=2)

In [31]:
X_new = np.array([[0.8]])
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
y_pred

array([0.75026781])

In [32]:
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor


X_train, X_val, y_train, y_val = train_test_split(X, y)

gbtr = GradientBoostingRegressor(max_depth = 2,n_estimators=120)
gbtr.fit(X_train, y_train)

erros = [mean_squared_error(y_val, y_pred) for y_pred in gbtr.staged_predict(X_val)]
best_n_estimators = np.argmin(erros) + 1

gbtr_best = GradientBoostingRegressor(max_depth = 2, n_estimators = best_n_estimators)
gbtr_best.fit(X_train, y_train)

GradientBoostingRegressor(max_depth=2, n_estimators=80)

In [34]:
gbtr = GradientBoostingRegressor(max_depth = 2, warm_start=True)

min_val_error = float("inf")
errors_going_up = 0

for n_estimator in range (1,120):
    gbtr.n_estimators = n_estimator
    gbtr.fit(X_train, y_train)
    y_pred = gbtr.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        errors_going_up = 0 
    else:
        errors_going_up += 1
        if errors_going_up == 5:
            break 