In [11]:
# create and train a voiting classifier in Sikit-Learn composed of randomForsest, VOtingClassifier, and Logisitic Regression

from sklearn import datasets
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

X, y = datasets.make_moons(n_samples=500, noise=.3, random_state=42);
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42)

voting_clf = VotingClassifier(
    estimators=[
        ("lr", log_clf),
        ("rd", rnd_clf),
        ("sv", svm_clf)
    ],
    voting="hard"
);

voting_clf.fit(X_train, y_train);

In [12]:
# view the accuracy of each model
from sklearn.metrics import accuracy_score;

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
  clf.fit(X_train, y_train);
  y_pred = clf.predict(X_test)
  print(clf.__class__.__name__, accuracy_score(y_test, y_pred));

LogisticRegression 0.8666666666666667
RandomForestClassifier 0.9090909090909091
SVC 0.9090909090909091
VotingClassifier 0.9151515151515152


In [15]:
#Train an ensemble of 500 Decision Tree classifiers: each is trained on 100 training instances randomly sampled from the training set with replacement.

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=100,
    bootstrap=True,
    n_jobs=-1
);

bag_clf.fit(X_train, y_train);
y_pred = bag_clf.predict(X_test);
print("Accuracy:", accuracy_score(y_test, y_pred));

Accuracy: 0.9272727272727272


In [16]:
# Train a bagging classifer with out-of-bag evaluation

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=100,
    bootstrap=True,
    n_jobs=-1,
    oob_score=True
);
bag_clf.fit(X_train, y_train);
print("OOB Score:", bag_clf.oob_score_)

OOB Score: 0.9164179104477612


In [17]:
y_pred = bag_clf.predict(X_test);
print("Prediction:", accuracy_score(y_test, y_pred))

Prediction: 0.9212121212121213


In [21]:
# Train a random forest classifier (Baggling classifier with a decision tree classifier)
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, n_jobs=-1);
rnd_clf.fit(X_train, y_train);
y_pred_rf = rnd_clf.predict(X_test);
print("Accuracy:", accuracy_score(y_test, y_pred_rf));

'''
similar bagging classifier as the RandomForestClassifier above

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(
        splitter="random",
        max_leaf_nodes=16
    ),
    n_estimators=500,
    max_samples=1.0,
    bootstrap=True,
    n_jobs=-1
);

'''

Accuracy: 0.9212121212121213


'\nsimilar bagging classifier as the RandomForestClassifier above\n\nbag_clf = BaggingClassifier(\n    DecisionTreeClassifier(\n        splitter="random",\n        max_leaf_nodes=16\n    ),\n    n_estimators=500,\n    max_samples=1.0,\n    bootstrap=True,\n    n_jobs=-1\n);\n\n'

In [23]:
# define the importance of each feature using a RandomForestClassifier

from sklearn.datasets import load_iris

iris = load_iris();
rnd_clf = RandomForestClassifier(n_estimators=500, n_jobs=-1);
rnd_clf.fit(iris["data"], iris["target"]);
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
  print(name, score)

sepal length (cm) 0.09571711034942516
sepal width (cm) 0.023808353529291677
petal length (cm) 0.45641312029477576
petal width (cm) 0.42406141582650747


In [None]:
# 