<a href="https://colab.research.google.com/github/rolitotiuremolador/WatershedAlgorithm/blob/master/RandomForestClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Random Forest Classifier
[Source](https://www.youtube.com/watch?v=eM4uJ6XGnSM)

In [None]:
# Organize Data
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

np.random.seed(0)
iris = load_iris()
df = pd.DataFrame(iris.data, columns = iris.feature_names)
df.head()

df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
df.head()

In [None]:
# Create Train and Test

#Split data for training and test data 
df['is_train'] = np.random.uniform(0,1,len(df)) <= .75
df.head()

# Create data frames with test rows and training rows
train, test = df[df['is_train']==True], df[df['is_train']==False]
# View the number of instance/rows for train and test
# len(train), len(test)
# train.head()

# Create a list of the first 4 column names
features = df.columns[:4]
# View the features
features

# Converting each species name into digits
y = pd.factorize(train['species'])[0]
# Viewing target
y

In [None]:
# Create a random forest classifier
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(n_jobs=2, random_state=0)
# Training the classifier
clf.fit(train[features], y)

In [None]:
# Make prediction
# Using or applying the trained Classifier to the test
test_features = test[features]
clf.predict(test_features)

# Viewing the predicted probabilities of the first 10 observations
clf.predict_proba(test_features)[15:25]

# Mapping names for the plants for each predicted plant class
preds = iris.target_names[clf.predict(test_features)]

# Viewing the predicted species for the first N observation
preds[17:32]

# Viewing the ACTUAL species for the first five observations
test['species'].tail(15)
# len(test)

# Create a confustion matrix
pd.crosstab(test['species'], preds, rownames=['Actual Species'], colnames=['Predicted Species'])

# Predict on new data
preds = iris.target_names[clf.predict([[13,13,3,3],[45,0.2,0.5,45]])]
preds

#### 5. Visualize data (Plotting) [Source](https://machinelearningmastery.com/a-guide-to-getting-datasets-for-machine-learning-in-python/)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

data, target = load_iris(return_X_y=True, as_frame=True)
data['target'] = target
# data.head()

# Visualize Data
sns.pairplot(data, kind="scatter", diag_kind="kde", hue="target", palette="muted", plot_kws={'alpha':0.7})
plt.show()