In [44]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [45]:
X, y = make_moons(n_samples=500, noise=0.3, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

voting_clf = VotingClassifier( 
    estimators=[ 
        ('lr', LogisticRegression(random_state=42)),
        ('rf', RandomForestClassifier(random_state=42)),
        ('svc', SVC(random_state=42)) 
    ]
)

voting_clf.fit(X_train, y_train)

0,1,2
,estimators,"[('lr', ...), ('rf', ...), ...]"
,voting,'hard'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,100

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [46]:
for name, clf in voting_clf.named_estimators_.items():
    print(name, "=", clf.score(X_test, y_test))

lr = 0.864
rf = 0.896
svc = 0.896


In [47]:
voting_clf.predict(X_test[:1])

array([1])

In [48]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_]

[array([1]), array([1]), array([0])]

In [49]:
voting_clf.score(X_test, y_test)

0.912

In [50]:
voting_clf.voting = "soft"
voting_clf.named_estimators["svc"].probability = True 
voting_clf.fit(X_train, y_train)
voting_clf.score(X_test,y_test)

0.92

In [51]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [52]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, n_jobs=-1, max_samples=100, random_state=42) 
bag_clf.fit(X_train,y_train)

0,1,2
,estimator,DecisionTreeClassifier()
,n_estimators,500
,max_samples,100
,max_features,1.0
,bootstrap,True
,bootstrap_features,False
,oob_score,False
,warm_start,False
,n_jobs,-1
,random_state,42

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [53]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500, n_jobs=-1, oob_score=True, random_state=42) 
bag_clf.fit(X_train, y_train)
bag_clf.oob_score_

0.896

In [54]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.92

In [55]:
from sklearn.ensemble import RandomForestClassifier

In [56]:
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42, n_jobs=-1)

In [57]:
rnd_clf.fit(X_train, y_train)

0,1,2
,n_estimators,500
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,16
,min_impurity_decrease,0.0
,bootstrap,True


In [58]:
y_pred_rf = rnd_clf.predict(X_test)

In [59]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(max_features="sqrt", max_leaf_nodes=16), n_estimators=500, n_jobs=-1, random_state=42)

In [60]:
from sklearn.datasets import load_iris 
iris = load_iris(as_frame=True)
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data, iris.target)
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns):
    print(round(score,2), name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


In [61]:
from sklearn.ensemble import AdaBoostClassifier

In [62]:
ada_clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=30, learning_rate=0.5, random_state=42) 
ada_clf.fit(X_train, y_train)

0,1,2
,estimator,DecisionTreeC...r(max_depth=1)
,n_estimators,30
,learning_rate,0.5
,algorithm,'deprecated'
,random_state,42

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,1
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [63]:
import numpy as np 
from sklearn.tree import DecisionTreeRegressor

In [64]:
np.random.seed(42) 
X = np.random.rand(100, 1) - 0.5
y = 3 * X[:,0] ** 2 + 0.5 * np.random.rand(100) # y = 3x^2 + Gaussian nosie

In [65]:
tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X,y)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [66]:
y2 = y - tree_reg1.predict(X) 
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42) 
tree_reg2.fit(X,y2)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [67]:
y3 = y2 - tree_reg2.predict(X) 
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42) 
tree_reg3.fit(X,y3)

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,2
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [68]:
X_new = np.array([[-0.4], [0.], [0.5]])
sum(tree.predict(X_new) for tree in (tree_reg1,tree_reg2,tree_reg3))

array([0.70741604, 0.26121825, 0.87483979])

In [69]:
from sklearn.ensemble import GradientBoostingRegressor

In [70]:
gbrt = GradientBoostingRegressor(max_depth=2, random_state=42, learning_rate=1.0, n_estimators=3) 
gbrt.fit(X,y)

0,1,2
,loss,'squared_error'
,learning_rate,1.0
,n_estimators,3
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,2
,min_impurity_decrease,0.0


In [71]:
gbrt_best = GradientBoostingRegressor(max_depth=2, learning_rate=0.05, n_estimators=500, n_iter_no_change=10, random_state=42)
gbrt_best.fit(X,y)

0,1,2
,loss,'squared_error'
,learning_rate,0.05
,n_estimators,500
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,2
,min_impurity_decrease,0.0


In [72]:
gbrt_best.n_estimators_

73

In [73]:
from sklearn.ensemble import StackingClassifier

In [74]:
stacking_clf = StackingClassifier(estimators=[
    ('lr', LogisticRegression(random_state=42)), 
    ('rf', RandomForestClassifier(random_state=42,)), 
    ('svc', SVC(probability=True, random_state=42))
], 
    final_estimator=RandomForestClassifier(random_state=43),
    cv=5 # number of cross-validation folds
                                 )

In [75]:
stacking_clf.fit(X_train, y_train)

0,1,2
,estimators,"[('lr', ...), ('rf', ...), ...]"
,final_estimator,RandomForestC...ndom_state=43)
,cv,5
,stack_method,'auto'
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,100

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [76]:
stacking_clf.score(X_test, y_test)

0.928

In [77]:
from sklearn.datasets import load_iris

In [78]:
iris = load_iris(as_frame=True)

In [79]:
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris.data, iris.target)

0,1,2
,n_estimators,500
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [80]:
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns):
    print(round(score,2), name)

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


In [81]:
# Load MINST dataset 
from sklearn.datasets import fetch_openml

In [82]:
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist['data'], mnist['target']

In [83]:
len(X)

70000

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.14285)

In [85]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.166666)

In [86]:
print('train:', len(X_train), 'validate:' , len(X_val), 'test :', len(X_test))

train: 50000 validate: 10000 test : 10000


In [88]:
rf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=24, max_depth=8, n_jobs=-1, random_state=42)
rf.fit(X_train, y_train)
rf.score(X_val, y_val)

0.8562

In [95]:
from sklearn.ensemble import ExtraTreesClassifier

In [97]:
et = ExtraTreesClassifier(n_estimators=500,max_depth = 8, random_state=42, n_jobs = -1)
et.fit(X_train,y_train)
et.score(X_val, y_val)

0.9175

In [101]:
svc = SVC(C=0.7, degree=3, random_state=42, max_iter=100)
svc.fit(X_train, y_train)
svc.score(X_val, y_val)



0.908

In [102]:
stack_clf = VotingClassifier(estimators=[ 
    ('rf', rf), 
    ('et', et),
    ('svc', svc) 
]) 
voting_clf.fit(X_train, y_train)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT

Increase the number of iterations to improve the convergence (max_iter=100).
You might also want to scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0,1,2
,estimators,"[('lr', ...), ('rf', ...), ...]"
,voting,'soft'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,100

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,C,1.0
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,True
,tol,0.001
,cache_size,200
,class_weight,


In [103]:
voting_clf.score(X_val, y_val)

0.9672

In [105]:
pred_1 = rf.predict(X_val)
pred_2 = et.predict(X_val)
pred_3 = svc.predict(X_val)

In [106]:
pred_1

array(['6', '6', '2', ..., '9', '6', '9'], shape=(10000,), dtype=object)

In [124]:
blended_test = np.array([pred_1,pred_2,pred_3]).T

In [125]:
blended_test

array([['6', '6', '6'],
       ['6', '6', '6'],
       ['2', '6', '2'],
       ...,
       ['9', '9', '9'],
       ['6', '6', '6'],
       ['9', '9', '9']], shape=(10000, 3), dtype=object)

In [135]:
blender_rf = RandomForestClassifier(n_estimators=500, max_depth=10, max_leaf_nodes=20, random_state=42, n_jobs=-1, oob_score=True)
blender_rf.fit(blended_test, y_val)

0,1,2
,n_estimators,500
,criterion,'gini'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,20
,min_impurity_decrease,0.0
,bootstrap,True


In [136]:
test_1, test_2, test_3 = rf.predict(X_test), et.predict(X_test), svc.predict(X_test)

In [137]:
bt_2 = np.array([test_1,test_2,test_3]).T
blender_rf.score(bt_2, y_test)

0.9257

In [139]:
blender_rf.oob_score_

0.9281

In [130]:
stacked_class = StackingClassifier(estimators=[
    ('rf', rf), 
    ('et', et),
    ('svc', svc)],
    final_estimator=RandomForestClassifier(random_state=43))
    

In [131]:
stacked_class.fit(X_train, y_train)



0,1,2
,estimators,"[('rf', ...), ('et', ...), ...]"
,final_estimator,RandomForestC...ndom_state=43)
,cv,
,stack_method,'auto'
,n_jobs,
,passthrough,False
,verbose,0

0,1,2
,n_estimators,500
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,24
,min_impurity_decrease,0.0
,bootstrap,True

0,1,2
,n_estimators,500
,criterion,'gini'
,max_depth,8
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False

0,1,2
,C,0.7
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [132]:
stacked_class.score(X_test, y_test)

0.9608