# Extra Trees Classifier

In [None]:
#imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import pickle
import time
import seaborn as sns

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

pd.set_option('display.max_columns', None)

In [None]:
with open('pickles/df.pkl', 'rb') as f:
    df = pickle.load(f)

# Creating a model. 

In [None]:
X = df.drop(["STAT_CAUSE_DESCR", 'STATE'] , axis='columns')
X = pd.get_dummies(columns=[ 
                            'SOURCE_SYSTEM_TYPE',
                            'FIRE_SIZE_CLASS',
                            'SOURCE_REPORTING_UNIT_NAME'
                           ], 
                   drop_first=True, data=X)

y = df['STAT_CAUSE_DESCR']

In [None]:
null_model = y.value_counts(normalize= True)
null_model

In [None]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=.2,random_state=2023)

In [None]:
sc = StandardScaler()
Z_train = sc.fit_transform(X_train)
Z_test = sc.transform(X_test)

In [None]:
model_et = ExtraTreesClassifier()
model_et.fit(Z_train, y_train)
model_et.score(Z_test, y_test)

In [None]:
f,ax = plt.subplots(1,1,figsize=(7,7))
cm= ConfusionMatrixDisplay.from_estimator(model_et,Z_test,y_test, cmap = 'Greens',
                                          normalize= 'true', 
                                          xticks_rotation='vertical', 
                                          values_format='.1g', 
                                          ax=ax)
plt.title("Confusion Matrix")
plt.grid(None)
sns.set(font_scale=.9)
plt.rcParams["figure.edgecolor"] = "#FAEEDA"
plt.rcParams["figure.facecolor"] = "#FAEEDA"
;

In [None]:
## tweaking the model
rf_params = {
    'n_estimators':[50, 100, 150],
    'max_depth': [None, 4,8]
}

In [None]:
gs= GridSearchCV(model_et, param_grid=rf_params, cv=5, n_jobs=-2)

In [None]:
gs.fit(X_train, y_train)

In [None]:
gs.best_score_

In [None]:
gs.best_params_

In [None]:
gs.get_params()

In [None]:
rf_params2 = {
    'n_estimators':[200,500],
    'max_depth': [None],
    'min_samples_leaf':[1,2]
}

In [None]:
gs2= GridSearchCV(model_et, param_grid=rf_params2, cv=5, n_jobs=-2)

In [None]:
gs2.fit(X_train, y_train)

# BRF

# trim the fat

In [None]:
df.head()

In [None]:
cause_map={'Lightning':0, 'Miscellaneous':1, 'Campfire':2,
         'Equipment Use':3, 'Arson':4, 'Debris Burning':5, 'Smoking':6,
         'Children':7, 'Railroad':8}
df['class_number'] = df['STAT_CAUSE_DESCR'].map(cause_map)

plt.figure(figsize = (8,8))

corr = df.corr()[['class_number']].sort_values('class_number', ascending=False)

mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True

sns.set(font_scale=1)
# Plot our correlation heatmap, while masking the upper triangle to be white.
sns.heatmap(corr, mask = mask, square = True, cmap = 'twilight_shifted_r', annot = True, vmin = -1, vmax = 1);

In [None]:
X = df[['duration','FIRE_SIZE','bdod','phh2o',
         'clay', 'soc', 'sand', 'OWNER_DESCR']]

X = pd.get_dummies(columns=['OWNER_DESCR'], drop_first=True, data=X)

y = df['STAT_CAUSE_DESCR']

In [None]:
df.head()

In [None]:
X_train,X_test, y_train, y_test = train_test_split(X,y,test_size=.2,random_state=2023)

In [None]:
sc = StandardScaler()

Z_train = sc.fit_transform(X_train)

Z_test = sc.transform(X_test)


In [None]:
model_et = ExtraTreesClassifier()
model_et.fit(Z_train, y_train)
model_et.score(Z_test, y_test)