# Random Forest Classifier

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from mlxtend.plotting import plot_decision_regions

In [None]:
from sklearn.datasets import make_moons

X, y = make_moons(n_samples = 1000, 
                  noise = 0.3, 
                  random_state=42)

colors = ["blue" if label == 0 else "red" for label in y]

plt.figure(figsize=(8, 6))

plt.scatter(X[:, 0], X[:, 1], color = colors)
plt.xlabel("x0", fontsize = 14)
plt.ylabel("x1", fontsize=14)
plt.title("Scatter Plot", fontsize=14)
plt.grid();

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
tree = DecisionTreeClassifier(max_depth=15)
tree.fit(X_train, y_train)
tree_y_pred = tree.predict(X_test)
print(f"Decision Tree Classification Report")
print(classification_report(y_test, tree_y_pred), "\n")


forest = RandomForestClassifier(max_depth = 15, 
                                n_estimators = 500,
                                bootstrap = True)
forest.fit(X_train, y_train)
forest_y_pred = forest.predict(X_test)
print(f"Random Forest Classification Report")
print(classification_report(y_test, forest_y_pred), "\n")

In [None]:
plt.figure(figsize = (8, 6))

plot_decision_regions(X_train, y_train, clf = tree);

In [None]:
plt.figure(figsize = (8, 6))

plot_decision_regions(X_train, y_train, clf = forest);

In [None]:
n_estimators = []
acc_train = []
acc_test = []

for i in range(10, 201, 10):
    rfc = RandomForestClassifier(n_estimators=i)
    rfc.fit(X_train, y_train)
    acc_train.append(accuracy_score(y_train, rfc.predict(X_train)))
    acc_test.append(accuracy_score(y_test, rfc.predict(X_test)))
    
    n_estimators.append(i)
    
plt.figure(figsize=(8, 6))

plt.plot(n_estimators, acc_train, label="training data")
plt.plot(n_estimators, acc_test, label="test data")
plt.xlabel("Number of trees", fontsize=14)
plt.ylabel("Accuracy", fontsize=14)
plt.legend(fontsize=14);
plt.grid();

In [None]:
rfc = RandomForestClassifier()

random_grid = {"n_estimators": range(10, 201, 10),
               "max_features": ["sqrt", "log2"],
               "max_depth": range(10, 101, 10)
              }

rfc_rs = RandomizedSearchCV(rfc, 
                            param_distributions = random_grid,
                            cv = 5, 
                            n_iter = 100, 
                            scoring = "accuracy")

rfc_rs.fit(X_train, y_train)

print(rfc_rs.best_params_)

print(rfc_rs.best_score_)

In [None]:
rfc_rs.best_estimator_

In [None]:
accuracy_score(y_test, rfc_rs.best_estimator_.predict(X_test))

In [None]:
importance = rfc_rs.best_estimator_.feature_importances_

feature_names = ["x0", "x1"]

for name, score in zip(feature_names, importance):
    print(name, score)

In [None]:
model = RandomForestClassifier(n_estimators = 20, 
                               max_features = "sqrt", 
                               max_depth = 10,
                               oob_score = True)

model.fit(X_train, y_train)

In [None]:
model.oob_score_

In [None]:
accuracy_score(y_test, model.predict(X_test))