## 46 - Modeling: Extra Trees

> One of the actual modeling.


In [None]:
# import needed packages
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import ExtraTreesClassifier
from sklearn import metrics

In [None]:
# read the data
full_data = pd.read_csv('data/cumulative_data_fe.csv')
train_data = pd.read_csv('data/train_data_fe.csv')
test_data = pd.read_csv('data/test_data_fe.csv')

# drop columns
y_train_data = pd.DataFrame(train_data['stone_soil_enc'])
x_train_data = train_data.drop(['id', 'img_id', 'stone_soil', 'stone_soil_enc'], axis = 1)

# drop columns
y_test_data = pd.DataFrame(test_data['stone_soil_enc'])
x_test_data = test_data.drop(['id', 'img_id', 'stone_soil', 'stone_soil_enc'], axis = 1)

In [None]:
# define a function for scoring
def pred_metrics(real, pred) -> None:
    print("Accuracy:\t{}".format(metrics.accuracy_score(real, pred)))
    print("Precision:\t{}".format(metrics.precision_score(real, pred)))
    print("Recall:\t\t{}".format(metrics.recall_score(real, pred)))
    print("F1:\t\t{}".format(metrics.f1_score(real, pred)))

In [None]:
# initialize ET classifier instance and train
et = ExtraTreesClassifier(random_state=10)
et.fit(x_train_data, y_train_data.values.ravel())

ExtraTreesClassifier(random_state=10)

In [None]:
# calculate metrics of training
y_train_pred = et.predict(x_train_data)
pred_metrics(y_train_data, y_train_pred)

Accuracy:	0.9982032119573859
Precision:	1.0
Recall:		0.9731846226862838
F1:		0.9864101022248948


In [None]:
# calculate metrics of testing
y_test_pred = et.predict(x_test_data)
pred_metrics(y_test_data, y_test_pred)

Accuracy:	0.9620277318407328
Precision:	0.8218997361477572
Recall:		0.5741935483870968
F1:		0.6760716223548563


In [None]:
# cross validation
cross_val_score(et, x_train_data, y_train_data.values.ravel(), cv= 10, scoring = 'f1')

array([0.69489051, 0.7       , 0.69121813, 0.6751773 , 0.71966527,
       0.68258427, 0.67821068, 0.68097282, 0.63976945, 0.71468144])

In [None]:
# enable bootstrap
et = ExtraTreesClassifier(oob_score=True, random_state=10, bootstrap=True)
et.fit(x_train_data, y_train_data.values.ravel())

ExtraTreesClassifier(bootstrap=True, oob_score=True, random_state=10)

In [None]:
# calculate metrics of training
y_train_pred = et.predict(x_train_data)
pred_metrics(y_train_data, y_train_pred)

Accuracy:	0.9981714103991095
Precision:	0.9995125517913722
Recall:		0.9731846226862838
F1:		0.9861728988818084


In [None]:
# calculate metrics of testing
y_test_pred = et.predict(x_test_data)
pred_metrics(y_test_data, y_test_pred)

Accuracy:	0.9612008650298944
Precision:	0.8397711015736766
Recall:		0.5410138248847927
F1:		0.6580717488789238
