In [422]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

In [423]:
data = pd.read_csv('forestfires.csv')

In [424]:
data.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,3,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,10,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,10,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,3,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,3,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [425]:
data['burned'] = data['area'] > 0

In [426]:
data.drop(['area','day', 'FFMC', 'DMC', 'DC', 'ISI', 'X', 'Y', 'month'], axis=1, inplace=True)

In [427]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 5 columns):
temp      517 non-null float64
RH        517 non-null int64
wind      517 non-null float64
rain      517 non-null float64
burned    517 non-null bool
dtypes: bool(1), float64(3), int64(1)
memory usage: 16.7 KB


In [428]:
y = data['burned']

In [429]:
x = data.drop('burned', axis=1)

In [430]:
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np

In [431]:
X_train, X_valid, y_train, y_valid = train_test_split(x, y, 
                                                      test_size=0.3, 
                                                      random_state=4)

In [432]:
first_tree = DecisionTreeClassifier(random_state=4)

In [433]:
np.mean(cross_val_score(first_tree, X_train, y_train, cv=5))

0.5403729071537291

In [434]:
from sklearn.model_selection import GridSearchCV

In [435]:
tree_params = {'max_depth': np.arange(1, 11)}

In [436]:
tree_grid = GridSearchCV(first_tree, tree_params, cv=5, n_jobs=-1)

In [437]:
tree_grid.fit(X_train, y_train);



In [438]:
tree_grid.best_score_, tree_grid.best_params_

(0.556786703601108, {'max_depth': 8})

In [439]:
tree_valid_pred = tree_grid.predict(X_valid)

In [440]:
from sklearn.metrics import accuracy_score

In [441]:
accuracy_score(y_valid, tree_valid_pred)

0.5833333333333334

In [442]:
from sklearn.tree import export_graphviz

In [443]:
export_graphviz(tree_grid.best_estimator_, out_file='forestfires.dot',
               feature_names=x.columns, filled=True,)