# Random Forest Classifier

### Import Library Required

In [1]:
%matplotlib notebook
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

### Load Data

In [2]:
data = load_iris(as_frame=True)
X = data.data
y = data.target

data.data.assign(spacies=data.target_names[y])

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),spacies
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


### Split Data and Make Model

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X,y)

clf = RandomForestClassifier(random_state=0, n_estimators=13)
clf.fit(X_train, y_train)

clf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 13,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 0,
 'verbose': 0,
 'warm_start': False}

### Predict Test Data

In [5]:
y_pred = clf.predict(X_test)
X_test.assign(true_spacies=data.target_names[y_test], predicted_species=data.target_names[y_pred])

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),true_spacies,predicted_species
60,5.0,2.0,3.5,1.0,versicolor,versicolor
45,4.8,3.0,1.4,0.3,setosa,setosa
34,4.9,3.1,1.5,0.2,setosa,setosa
104,6.5,3.0,5.8,2.2,virginica,virginica
109,7.2,3.6,6.1,2.5,virginica,virginica
134,6.1,2.6,5.6,1.4,virginica,virginica
83,6.0,2.7,5.1,1.6,versicolor,versicolor
128,6.4,2.8,5.6,2.1,virginica,virginica
79,5.7,2.6,3.5,1.0,versicolor,versicolor
68,6.2,2.2,4.5,1.5,versicolor,versicolor


### Predicted Probability

In [6]:
clf.predict_proba(X_test)

array([[0.        , 0.92307692, 0.07692308],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.23076923, 0.76923077],
       [0.        , 0.53846154, 0.46153846],
       [0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.84615385, 0.15384615],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        ],
       [0.        , 0.53846154, 0.46153846],
       [0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        ],
       [0.        , 0.15384615, 0.84615385],
       [0.        , 0.        , 1.        ],
       [1.

### Check Accuracy and Confusion Matrix

In [8]:
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

plt.figure()
sns.heatmap(cm, annot=True, square=True, cbar=False, xticklabels=data.target_names, yticklabels=data.target_names)

plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Accuracy Score: {:.3}'.format(accuracy));

<IPython.core.display.Javascript object>