In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

In [2]:
from sklearn.datasets import make_moons
X,y = make_moons(n_samples=10000,noise=0.4) 

In [3]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,
                                                 test_size=0.2)

In [4]:
svc_clf =SVC()
lin_clf =LogisticRegression()
ran_clf =RandomForestClassifier()
vot_clf = VotingClassifier(estimators=[('lr',lin_clf),
                                       ('rf',ran_clf),
                                       ('sv',svc_clf)],
                           voting='hard')

In [5]:
vot_clf.fit(X_train ,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('sv', SVC())])

##### Let’s look at each classifier’s accuracy on the test set:

In [6]:
from sklearn.metrics import accuracy_score
for clf in (lin_clf,ran_clf,svc_clf,vot_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__,"=",
          accuracy_score(y_test,y_pred))

LogisticRegression = 0.832
RandomForestClassifier = 0.8445
SVC = 0.859
VotingClassifier = 0.855


### soft voting testing

In [7]:
svc_clf =SVC(probability=True)
lin_clf =LogisticRegression()
ran_clf =RandomForestClassifier()
vot_clf = VotingClassifier(estimators=[('lr',lin_clf),
                                       ('rf',ran_clf),
                                       ('sv',svc_clf)],
                           voting='soft')

In [8]:
vot_clf.fit(X_train ,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()),
                             ('sv', SVC(probability=True))],
                 voting='soft')

In [9]:
from sklearn.metrics import accuracy_score
for clf in (lin_clf,ran_clf,svc_clf,vot_clf):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__,"=",
          accuracy_score(y_test,y_pred))

LogisticRegression = 0.832
RandomForestClassifier = 0.8445
SVC = 0.859
VotingClassifier = 0.8575


## Bagging and Pasting

##### a)  Bagging

In [10]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

In [11]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                            n_estimators=500,bootstrap=True,
                            max_samples=100,n_jobs=-1)

In [12]:
bag_clf.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1)

In [13]:
y_pred = bag_clf.predict(X_test)

In [14]:
y_pred

array([1, 1, 0, ..., 0, 0, 0], dtype=int64)

##### b)  pasting

In [15]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                            n_estimators=500,bootstrap=False,
                            max_samples=100,n_jobs=-1)

In [16]:
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)
y_pred

array([1, 1, 0, ..., 0, 0, 0], dtype=int64)

In [17]:
bag_clf.base_estimator

DecisionTreeClassifier()

In [18]:
bag_clf.bootstrap

False

In [19]:
bag_clf.classes_

array([0, 1], dtype=int64)

In [20]:
bag_clf.predict_proba(X_test)

array([[0.462, 0.538],
       [0.16 , 0.84 ],
       [0.958, 0.042],
       ...,
       [0.95 , 0.05 ],
       [0.96 , 0.04 ],
       [0.868, 0.132]])

# Out-of-Bag 

In [21]:
bag_clf = BaggingClassifier(DecisionTreeClassifier(),
                            n_estimators=500,bootstrap=True,
                            max_samples=100,n_jobs=-1,
                            oob_score=True)

In [22]:
bag_clf.fit(X_train,y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1, oob_score=True)

In [23]:
bag_clf.oob_score_

0.864625

In [24]:
from sklearn.metrics import accuracy_score
y_pred = bag_clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.8595

In [25]:
bag_clf.oob_decision_function_

array([[0.73939394, 0.26060606],
       [0.90554415, 0.09445585],
       [0.18016194, 0.81983806],
       ...,
       [0.02839757, 0.97160243],
       [0.88306452, 0.11693548],
       [0.04868154, 0.95131846]])

### RandomForestClassifier

In [26]:
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)
y_pred_rf = rnd_clf.predict(X_test)

In [27]:
y_pred_rf

array([1, 1, 0, ..., 0, 0, 0], dtype=int64)

In [28]:
from sklearn.metrics import accuracy_score
accuracy_score(y_pred_rf,y_test)

0.856

###### BaggingClassifier is roughly equivalent to the previous RandomForestClassifier:


In [29]:
bag_clf = BaggingClassifier(
 DecisionTreeClassifier(splitter="random", max_leaf_nodes=16),
 n_estimators=500, max_samples=1.0, bootstrap=True, n_jobs=-1)

In [30]:
bag_clf.fit(X_train,y_train)
y_pred = bag_clf.predict(X_test)
accuracy_score(y_pred,y_test)

0.857

In [31]:
from sklearn.ensemble import ExtraTreesClassifier
ext_clf = ExtraTreesClassifier(n_estimators=500,n_jobs=-1)
ext_clf.fit(X_train,y_train)
accuracy_score(ext_clf.predict(X_test),y_test)

0.8405

In [32]:
from sklearn.ensemble import RandomForestClassifier
ran_clf = RandomForestClassifier(n_estimators=500,n_jobs=-1)
ran_clf.fit(X_train,y_train)
accuracy_score(ran_clf.predict(X_test),y_test)

0.8465

In [33]:
from sklearn.tree import DecisionTreeClassifier
dec_clf = DecisionTreeClassifier(max_depth=5)
dec_clf.fit(X_train,y_train)
accuracy_score(dec_clf.predict(X_test),y_test)

0.856

In [34]:
from sklearn.model_selection import GridSearchCV
param = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}
grid_se = GridSearchCV(estimator=ran_clf,param_grid=param,cv=3)

In [None]:
grid_se.fit(X_train,y_train)