# Random Forest
[Random Forest](https://github.com/kyopark2014/ML-Algorithms/blob/main/random-forest.md)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

wine = pd.read_csv('https://bit.ly/wine_csv_data')

In [2]:
wine.head()

Unnamed: 0,alcohol,sugar,pH,class
0,9.4,1.9,3.51,0.0
1,9.8,2.6,3.2,0.0
2,9.8,2.3,3.26,0.0
3,9.8,1.9,3.16,0.0
4,9.4,1.9,3.51,0.0


In [3]:
print(pd.unique(wine['class']))

[0. 1.]


### Split train/test dataset

In [4]:
data = wine[['alcohol', 'sugar', 'pH']].to_numpy()
target = wine['class'].to_numpy()

train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)

### Random Forest Classifier

In [5]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_jobs=-1, random_state=42)
scores = cross_validate(rf, train_input, train_target, return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9973541965122431 0.8905151032797809


In [6]:
print(scores)

{'fit_time': array([0.29513288, 0.29767966, 0.2947762 , 0.29233813, 0.3056531 ]), 'score_time': array([0.03622174, 0.0371933 , 0.03806829, 0.03780055, 0.03945041]), 'test_score': array([0.88461538, 0.88942308, 0.90279115, 0.88931665, 0.88642926]), 'train_score': array([0.9971133 , 0.99663219, 0.9978355 , 0.9973545 , 0.9978355 ])}


In [7]:
rf.fit(train_input, train_target)
print(rf.feature_importances_)

[0.23167441 0.50039841 0.26792718]


### Evaluation

In [8]:
# Predict
y_pred = rf.predict(test_input)

from sklearn.metrics import classification_report
print(classification_report(y_true=test_target, y_pred = y_pred))

              precision    recall  f1-score   support

         0.0       0.79      0.78      0.79       341
         1.0       0.92      0.93      0.93       959

    accuracy                           0.89      1300
   macro avg       0.86      0.85      0.86      1300
weighted avg       0.89      0.89      0.89      1300



In [9]:
rf = RandomForestClassifier(oob_score=True, n_jobs=-1, random_state=42)

rf.fit(train_input, train_target)
print(rf.oob_score_)

0.8934000384837406
