# Chapter 6 - Other Popular Machine Learning Methods
## Segment 6 - Ensemble methods with random forest

This is a classification problem, where in we will be estimating the species label for iris flowers.

In [13]:
import numpy as np
import pandas as pd

import sklearn.datasets as datasets
from sklearn.model_selection import train_test_split 
from sklearn import metrics

In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
iris = datasets.load_iris()

df = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.DataFrame(iris.target)

y.columns = ['labels']

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [16]:
y[0:5]

Unnamed: 0,labels
0,0
1,0
2,0
3,0
4,0


The data set contains information on the:
- sepal length (cm)
- sepal width (cm)  
- petal length (cm)  
- petal width (cm)
- species type

In [17]:
df.isnull().any()==True

sepal length (cm)    False
sepal width (cm)     False
petal length (cm)    False
petal width (cm)     False
dtype: bool

In [18]:
print(y.labels.value_counts())

2    50
1    50
0    50
Name: labels, dtype: int64


# Preparing the data for training the model

In [32]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=.2)

# Build a Random Forest model

In [33]:
classifier = RandomForestClassifier(n_estimators=200, random_state=0)

y_train_array = np.ravel(y_train)

classifier.fit(X_train, y_train_array)

y_pred = classifier.predict(X_test)

# Evaluating the model on the test data

In [34]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      0.89      0.94         9
           2       0.93      1.00      0.96        13

    accuracy                           0.97        30
   macro avg       0.98      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30



In [35]:
y_test_array = np.ravel(y_test)
print(y_test_array)

[0 2 1 0 2 2 2 0 1 2 0 2 2 1 2 0 0 2 0 1 2 2 1 1 1 1 2 2 0 1]


In [36]:
print(y_pred)

[0 2 1 0 2 2 2 0 1 2 0 2 2 1 2 0 0 2 0 1 2 2 1 1 1 1 2 2 0 2]
