Name:

Homework: Commenting Ensemble Methods Code

In [14]:
from sklearn.datasets import make_moons # imports make moons which is a toy dataset
from sklearn.ensemble import RandomForestClassifier, VotingClassifier # Imports ensemble classifiers
from sklearn.linear_model import LogisticRegression # imports logistic regression for classification
from sklearn.model_selection import train_test_split # imports function to split data into training and testing data
from sklearn.svm import SVC # imports support vector classifier

X, y = make_moons(n_samples=500, noise=0.30, random_state=42) # creates a dataset with 500 samples including noise
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # splits the dataset into training and testing sets

voting_clf = VotingClassifier( # creates a voting classifier ensemble method with 3 classifications
    estimators=[
        ('lr', LogisticRegression(random_state=42)), # logisitc regression classifier
        ('rf', RandomForestClassifier(random_state=42)), # random forest classifier
        ('svc', SVC(random_state=42)) # support vector classifier
    ]
)
voting_clf.fit(X_train, y_train) # trianing the voting classifier on training data

In [15]:
for name, clf in voting_clf.named_estimators_.items(): # iterates over each classifier in voting ensemble
...     print(name, "=", clf.score(X_test, y_test)) # printing the name and accuracy score for test data

lr = 0.864
rf = 0.896
svc = 0.896


In [16]:
voting_clf.predict(X_test[:1]) # makes a prediction on the first sample in test set

array([1])

In [17]:
[clf.predict(X_test[:1]) for clf in voting_clf.estimators_] # makes a prediction on the first sample in test set for each classifier in voting ensemble

[array([1]), array([1]), array([0])]

In [18]:
voting_clf.score(X_test, y_test) # evaluates the accuracy of the voting classifier on test set

0.912

In [19]:
voting_clf.voting = "soft" # sets the voting method to soft voting which uses predicted probabilities
voting_clf.named_estimators["svc"].probability = True # enables probability estimatees for SVC classifier
voting_clf.fit(X_train, y_train) # re trains the voting classifier with updated settings
voting_clf.score(X_test, y_test) # evaluates the accuracy of the voting classifier on test set

0.92

In [20]:
from sklearn.ensemble import BaggingClassifier # imports bagging classifier
from sklearn.tree import DecisionTreeClassifier # imports decisiontreeclassifier for individual base learners

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=500,
                            max_samples=100, n_jobs=-1, random_state=42) # creates a bagging classifier with 500 decision trees each trained on 100 samples
bag_clf.fit(X_train, y_train) # trains the bagging classifier on training data
bag_clf.score(X_test, y_test) # evaluates the accuracy of the bagging classifier on test set

0.904

In [21]:
from sklearn.ensemble import RandomForestClassifier # imports randomforestclassifier for random forest model

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16,
                                 n_jobs=-1, random_state=42) # creates a random forest classifier with 500 trees while limiting each to 16 leaf nodes max
rnd_clf.fit(X_train, y_train) # trains the random forest model on training set

y_pred_rf = rnd_clf.predict(X_test) # makes predictions on the test set with training model

In [22]:
from sklearn.datasets import load_iris # imports iris dataset
iris = load_iris(as_frame=True) # loads the dataset as a dataframe
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42) # creates a random forest classifier with 500 trees
rnd_clf.fit(iris.data, iris.target) # trains the random forest model
for score, name in zip(rnd_clf.feature_importances_, iris.data.columns): # iterates over feature importance and feature names to display importance of each feature
  print(round(score, 2), name) # prints each important features score and name

0.11 sepal length (cm)
0.02 sepal width (cm)
0.44 petal length (cm)
0.42 petal width (cm)


In [23]:
from sklearn.ensemble import AdaBoostClassifier # imports adaboostclassifier for boosting ensemble method

ada_clf = AdaBoostClassifier( # creates an adaboostclassifier...
    DecisionTreeClassifier(max_depth=1), n_estimators=30, # with 30 weak learners with a dept 1
    learning_rate=0.5, random_state=42)
ada_clf.fit(X_train, y_train) # trains the adaboostclassifier on training set



In [24]:
from sklearn.ensemble import GradientBoostingRegressor #imports gradient boost regressor

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3,
                                 learning_rate=1.0, random_state=42) # creates gradient boosting regressor with 3 trees each with a depth of 2
gbrt.fit(X, y) # trains the gradient boosting regressor on all data