## Randon Forest Implementation 

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split 
import warnings 
warnings.filterwarnings('ignore')

In [2]:
# Load the dataset 
dataset = load_iris()

In [3]:
print (dataset.DESCR)

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [4]:
dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [5]:
dataset.data

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [6]:
dataset.target 

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [7]:
# indepent feature 
x_clf=dataset.data               

In [8]:
# dependent feature 
y_clf = dataset.target 

In [9]:
x_train ,x_test ,y_train ,y_test = train_test_split (x_clf ,y_clf,test_size = 0.2 ,random_state = 42)

### Random Forest Classifier 

In [10]:
from sklearn.ensemble import RandomForestClassifier

In [11]:
# model training
rf_clf = RandomForestClassifier(n_estimators=100 ,random_state=42)

In [12]:
rf_clf

In [13]:
rf_clf.fit(x_train , y_train )

In [14]:
# prediction 
y_pred = rf_clf.predict(x_test)


In [15]:
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

In [16]:
print(accuracy_score (y_test ,y_pred ))
print (confusion_matrix(y_test ,y_pred ))
print(classification_report(y_test , y_pred ))

1.0
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



## Random Forest Regressor 

In [18]:
    import pandas as pd
    import numpy as np
    import warnings 
    warnings.filterwarnings('ignore')

    data_url = "http://lib.stat.cmu.edu/datasets/boston"
    df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
    data = np.hstack([df.values[::2, :], raw_df.values[1::2, :2]])
    target = df.values[1::2, 2]

In [19]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,0.00632,18.00,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3
1,396.90000,4.98,24.00,,,,,,,,
2,0.02731,0.00,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8
3,396.90000,9.14,21.60,,,,,,,,
4,0.02729,0.00,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8
...,...,...,...,...,...,...,...,...,...,...,...
1007,396.90000,5.64,23.90,,,,,,,,
1008,0.10959,0.00,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0
1009,393.45000,6.48,22.00,,,,,,,,
1010,0.04741,0.00,11.93,0.0,0.573,6.030,80.8,2.5050,1.0,273.0,21.0


In [21]:
x=data

In [22]:
x

array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
        4.9800e+00],
       [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
        9.1400e+00],
       [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
        4.0300e+00],
       ...,
       [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        5.6400e+00],
       [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
        6.4800e+00],
       [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
        7.8800e+00]])

In [23]:
y=target 

In [24]:
y

array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
       18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
       15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
       13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
       21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
       35.4, 24.7, 31.6, 23.3, 19.6, 18.7, 16. , 22.2, 25. , 33. , 23.5,
       19.4, 22. , 17.4, 20.9, 24.2, 21.7, 22.8, 23.4, 24.1, 21.4, 20. ,
       20.8, 21.2, 20.3, 28. , 23.9, 24.8, 22.9, 23.9, 26.6, 22.5, 22.2,
       23.6, 28.7, 22.6, 22. , 22.9, 25. , 20.6, 28.4, 21.4, 38.7, 43.8,
       33.2, 27.5, 26.5, 18.6, 19.3, 20.1, 19.5, 19.5, 20.4, 19.8, 19.4,
       21.7, 22.8, 18.8, 18.7, 18.5, 18.3, 21.2, 19.2, 20.4, 19.3, 22. ,
       20.3, 20.5, 17.3, 18.8, 21.4, 15.7, 16.2, 18. , 14.3, 19.2, 19.6,
       23. , 18.4, 15.6, 18.1, 17.4, 17.1, 13.3, 17.8, 14. , 14.4, 13.4,
       15.6, 11.8, 13.8, 15.6, 14.6, 17.8, 15.4, 21

In [25]:
# data spliting 
x_train ,x_test ,y_train ,y_test = train_test_split(x,y,test_size = 0.2,random_state = 42)

In [26]:
# Random Forest Regressor 
from sklearn.ensemble import RandomForestRegressor

In [27]:
rf_reg = RandomForestRegressor(n_estimators=100,random_state =42)

In [28]:
rf_reg.fit(x_train , y_train)

In [31]:
y_pred =rf_reg.predict(x_test)

In [32]:
y_pred 

array([22.839, 30.676, 16.317, 23.51 , 16.819, 21.374, 19.358, 15.62 ,
       21.091, 21.073, 20.047, 19.297,  8.611, 21.398, 19.378, 25.453,
       19.187,  8.538, 46.132, 14.536, 24.728, 23.996, 14.509, 23.847,
       14.363, 14.796, 21.126, 13.663, 19.535, 21.29 , 19.449, 23.393,
       29.3  , 20.338, 14.596, 15.594, 33.835, 19.123, 20.915, 24.376,
       19.286, 29.61 , 46.108, 19.428, 22.653, 13.676, 15.035, 24.321,
       18.689, 28.821, 21.107, 33.811, 16.502, 25.779, 44.922, 21.982,
       15.416, 32.032, 22.596, 20.296, 25.611, 33.916, 28.134, 18.551,
       26.745, 17.568, 13.992, 23.195, 29.022, 15.663, 21.074, 27.426,
       10.06 , 21.569, 21.952,  7.084, 19.905, 46.154, 11.274, 12.981,
       21.288, 12.562, 19.561,  9.392, 20.76 , 27.283, 15.383, 23.399,
       23.628, 17.617, 21.68 ,  8.019, 19.616, 18.714, 22.592, 19.786,
       41.733, 12.79 , 12.726, 13.119, 20.603, 23.902])

In [35]:
from sklearn.metrics import r2_score , mean_squared_error 

In [36]:
print(r2_score (y_test ,y_pred))
print(mean_squared_error(y_test,y_pred))

0.8922527442109116
7.901513892156864
