# Random Forest

## Voting Classifier

In [15]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

log_clf = LogisticRegression(solver='liblinear')
rnd_clf = RandomForestClassifier(n_estimators=10)
svm_clf = SVC(gamma='auto', probability=True, random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='soft'
)

### test accuracy

In [16]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.864
SVC 0.888
VotingClassifier 0.912


## Bagging & Pasting

In [9]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500, 
    max_samples=100, bootstrap=True, n_jobs=-1
)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.904


## Out Of Bag

In [10]:
oob_clf = BaggingClassifier(
    DecisionTreeClassifier(), n_estimators=500,
    bootstrap=True, n_jobs=-1, oob_score=True
)

oob_clf.fit(X_train, y_train)
oob_clf.oob_score_

0.896

In [11]:
y_pred = oob_clf.predict(X_test)
accuracy_score(y_test, y_pred)

0.896

In [12]:
oob_clf.oob_decision_function_

array([[0.42931937, 0.57068063],
       [0.35204082, 0.64795918],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.        , 1.        ],
       [0.08108108, 0.91891892],
       [0.3908046 , 0.6091954 ],
       [0.02285714, 0.97714286],
       [0.98989899, 0.01010101],
       [0.97849462, 0.02150538],
       [0.79569892, 0.20430108],
       [0.00568182, 0.99431818],
       [0.79651163, 0.20348837],
       [0.8021978 , 0.1978022 ],
       [0.95698925, 0.04301075],
       [0.0625    , 0.9375    ],
       [0.        , 1.        ],
       [0.98930481, 0.01069519],
       [0.92432432, 0.07567568],
       [0.99453552, 0.00546448],
       [0.02906977, 0.97093023],
       [0.3038674 , 0.6961326 ],
       [0.9127907 , 0.0872093 ],
       [1.        , 0.        ],
       [0.97687861, 0.02312139],
       [0.        , 1.        ],
       [1.        , 0.        ],
       [1.        , 0.        ],
       [0.        , 1.        ],
       [0.59375   , 0.40625   ],
       [0.

## Random Forest

In [17]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)
print(accuracy_score(y_test, y_pred_rf))

0.92
