# [Extra Trees](https://github.com/kyopark2014/ML-Algorithms/blob/main/extra-trees.md)

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

wine = pd.read_csv('https://bit.ly/wine_csv_data')

In [2]:
wine.head()

Unnamed: 0,alcohol,sugar,pH,class
0,9.4,1.9,3.51,0.0
1,9.8,2.6,3.2,0.0
2,9.8,2.3,3.26,0.0
3,9.8,1.9,3.16,0.0
4,9.4,1.9,3.51,0.0


In [3]:
print(pd.unique(wine['class']))

[0. 1.]


In [4]:
data = wine[['alcohol', 'sugar', 'pH']].to_numpy()
target = wine['class'].to_numpy()

train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)

In [5]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_validate

et = ExtraTreesClassifier(n_jobs=-1, random_state=42)
scores = cross_validate(et, train_input, train_target, return_train_score=True, n_jobs=-1)

print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9974503966084433 0.8887848893166506


### Evaluation

In [6]:
et.fit(train_input, train_target)

# Predict
y_pred = et.predict(test_input)

from sklearn.metrics import classification_report
print(classification_report(y_true=test_target, y_pred = y_pred))

              precision    recall  f1-score   support

         0.0       0.80      0.76      0.78       341
         1.0       0.92      0.93      0.92       959

    accuracy                           0.89      1300
   macro avg       0.86      0.85      0.85      1300
weighted avg       0.88      0.89      0.89      1300

