In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/esnt/Data/main/Fires/utah_fires.csv')
df.head()

Unnamed: 0,FIRE_YEAR,DISCOVERY_DOY,DISCOVERY_TIME,LATITUDE,LONGITUDE,FIRE_SIZE,NWCG_CAUSE_CLASSIFICATION
0,2005,179,1654.0,40.088889,-111.378333,0.1,Natural
1,2005,199,1600.0,41.264167,-111.795833,0.1,Human
2,2005,223,600.0,41.258333,-111.849722,0.1,Natural
3,2005,151,1355.0,39.316667,-111.228333,0.1,Human
4,2005,145,2115.0,37.871944,-111.658056,0.1,Natural


Data Preprocessing

In [13]:
X = df.iloc[:,0:6] # Seperating features
y = df['NWCG_CAUSE_CLASSIFICATION']
y = np.where(y == 'Human', 1, 0) # Converting output to binary output where 1 represents human
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=307)

preprocessor = Pipeline([
    ('imputer', SimpleImputer(strategy='mean', missing_values=np.nan)),
    ('scaler', StandardScaler())
])

X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

Random Forest

In [21]:
def evaluate(model, X,y):
  preds = model.predict(X)
  accuracy = round(accuracy_score(y, preds),3)

  print(f'Accuracy: {accuracy}')

In [15]:
params = {
    'n_estimators' : np.arange(100,600,100),
    'max_depth' : [5,10,15,20],
    'min_samples_split' : np.linspace(.1,1,10,endpoint=True)
}

clf = GridSearchCV(RandomForestClassifier(criterion='entropy'), param_grid=params, n_jobs=-1, scoring='accuracy')
clf.fit(X_train, y_train)

In [22]:
evaluate(clf, X_test, y_test) # Test accuracy

Accuracy: 0.774


In [26]:
evaluate(clf, X_train, y_train) # Train accuracy

Accuracy: 0.771


Artificial Neural Network

In [23]:
tf.random.set_seed(42)
ann = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape = [X_train.shape[1],]),

    tf.keras.layers.Dense(150, activation = 'relu'),

    tf.keras.layers.Dense(100, activation = 'relu'),
    
    tf.keras.layers.Dense(1, activation = 'sigmoid'),
])

In [24]:
ann.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=.002),
             loss='binary_crossentropy',
             metrics=["accuracy"])

In [25]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, 
                                                     restore_best_weights=True)

history = ann.fit(X_train, y_train, 
                   epochs = 100, 
                   validation_data = (X_test, y_test),
                   callbacks=early_stopping_cb)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


In [27]:
ann.evaluate(X_test, y_test) # Test Accuracy



[0.4224282205104828, 0.8154550194740295]

In [28]:
ann.evaluate(X_train, y_train) # Train accuracy



[0.38852164149284363, 0.8247182369232178]