In [4]:
pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install keras-tuner

Collecting keras-tuner
  Using cached keras_tuner-1.4.7-py3-none-any.whl (129 kB)
Collecting kt-legacy
  Using cached kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5
Note: you may need to restart the kernel to use updated packages.


In [23]:
# import neccessary tools
import pandas as pd
import numpy as np
from keras_tuner import HyperModel
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2, l1_l2
from keras_tuner.tuners import RandomSearch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [9]:
# Load the dataset
data = pd.read_csv('FireData.csv')

# The first few rows of the dataset
data.head()

Unnamed: 0,OBJECTID,Shape,FOD_ID,FPA_ID,SOURCE_SYSTEM_TYPE,SOURCE_SYSTEM,NWCG_REPORTING_AGENCY,NWCG_REPORTING_UNIT_ID,NWCG_REPORTING_UNIT_NAME,SOURCE_REPORTING_UNIT,...,LATITUDE,LONGITUDE,OWNER_DESCR,STATE,COUNTY,FIPS_CODE,FIPS_NAME,DURATION_HOURS,Precipitation_In_Month,Avg_Temp_In_Month
0,1,b'\x00\x01\xad\x10\x00\x00\xc8\xce\n[_@^\xc0\x...,1,FS-1418826,FED,FS-FIRESTAT,FS,USCAPNF,Plumas National Forest,511,...,40.036944,-121.005833,USFS,CA,63.0,6063.0,Plumas County,4.5,3.69,45.6
1,2,b'\x00\x01\xad\x10\x00\x00\xc8\xe594\xe2\x19^\...,2,FS-1418827,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,...,38.933056,-120.404444,USFS,CA,61.0,6061.0,Placer County,6.75,0.08,60.2
2,3,b'\x00\x01\xad\x10\x00\x00x{\xac \x13/^\xc0@\x...,3,FS-1418835,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,...,38.984167,-120.735556,STATE OR PRIVATE,CA,17.0,6017.0,El Dorado County,1.05,0.08,60.2
3,4,b'\x00\x01\xad\x10\x00\x00\xc8\x13u\xd7s\xfa]\...,4,FS-1418845,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,...,38.559167,-119.913333,USFS,CA,3.0,6003.0,Alpine County,118.0,0.06,66.8
4,5,b'\x00\x01\xad\x10\x00\x00\xd0\x11y\xf8\xb6\xf...,5,FS-1418847,FED,FS-FIRESTAT,FS,USCAENF,Eldorado National Forest,503,...,38.559167,-119.933056,USFS,CA,3.0,6003.0,Alpine County,116.0,0.06,66.8


In [58]:
# Select relevant columns including the target variable
data_of_interest = ['FIRE_YEAR', 'FIRE_SIZE', 'LATITUDE', 'LONGITUDE', 'Avg_Temp_In_Month', 'Precipitation_In_Month', 'NWCG_GENERAL_CAUSE']
data_selected = data[data_of_interest]

X = data_selected.drop('NWCG_GENERAL_CAUSE', axis=1)
y = data_selected['NWCG_GENERAL_CAUSE']

# One-hot encoder
onehot_encoder = OneHotEncoder(sparse_output=False)
y_encoded = onehot_encoder.fit_transform(y.values.reshape(-1, 1))
print(onehot_encoder.categories_)

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Normalize the features
numeric_features = ['FIRE_YEAR', 'FIRE_SIZE', 'LATITUDE', 'LONGITUDE', 'Avg_Temp_In_Month', 'Precipitation_In_Month']
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, numeric_features)])

X_train_transformed = preprocessor.fit_transform(X_train)
X_val_transformed = preprocessor.transform(X_val)
X_test_transformed = preprocessor.transform(X_test)

[array(['Arson/incendiarism', 'Debris and open burning',
       'Equipment and vehicle use', 'Firearms and explosives use',
       'Fireworks', 'Missing data/not specified/undetermined',
       'Misuse of fire by a minor', 'Natural', 'Other causes',
       'Power generation/transmission/distribution',
       'Railroad operations and maintenance', 'Recreation and ceremony',
       'Smoking'], dtype=object)]


In [50]:
# Define a class that facilitates finding the best amount of neural nodes and learning rates for the model

class MyHyperModel(HyperModel):
    def __init__(self, input_shape, num_classes):
        self.input_shape = input_shape
        self.num_classes = num_classes

    def build(self, hp):
        model = Sequential()
        model.add(Dense(units=hp.Int('units',
                                     min_value=32,
                                     max_value=512,
                                     step=16),
                        activation='relu',
                        input_shape=self.input_shape))
        model.add(Dense(self.num_classes, activation='softmax'))
        model.compile(optimizer=Adam(hp.Float('learning_rate', min_value=1e-5, max_value=1e-2, sampling='log')),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])
        return model


In [51]:
# Find the best parameters through 100 trials

tuner = RandomSearch(
    MyHyperModel(input_shape=X_train_transformed.shape[1:], num_classes=y_train.shape[1]),
    objective='val_accuracy',
    max_trials=100,
    executions_per_trial=1,
    )

tuner.search(X_train_transformed, y_train, epochs=10, validation_data=(X_val_transformed, y_val))

Reloading Tuner from ./untitled_project/tuner0.json


In [52]:
# Get the best hyperparameters.
best_hyperparameters = tuner.get_best_hyperparameters()[0]  # This returns the best set of hyperparameters

print('Best hyperparameters:', best_hyperparameters.values)
# Retrieve the best model.
best_model = tuner.get_best_models(num_models=1)[0]

Best hyperparameters: {'units': 416, 'learning_rate': 0.0054654394957180845}


In [53]:
   """
    build_model:
    
    Builds a neural network model with L1/L2 regularization.

    Parameters:
    - n_units: Number of units in the hidden layer.
    - learning_rate: Learning rate for the optimizer.

    Returns:
    - A compiled Keras model.
    """
# Define L1, L2
l1_reg = 0.0005
l2_reg = 0.0005

def build_model(n_units, learning_rate):
    model = Sequential([
        Dense(n_units, activation='relu', input_shape=(X_train_transformed.shape[1],)),
        Dense(n_units, activation='relu', kernel_regularizer=l2(l2_reg)),
        Dense(y_train.shape[1], activation='softmax') 
    ])

    
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model


In [54]:
# Recombine the training and validation sets for this final training step to maximize the data the model learns from
X_full_train = np.concatenate((X_train_transformed, X_val_transformed), axis=0)
y_full_train = np.concatenate((y_train, y_val), axis=0)

# Extract parameters from the best hyperparameters found earlier
best_n_units = best_hyperparameters.get('units')
best_learning_rate = best_hyperparameters.get('learning_rate')

# Rebuild the model using the best hyperparameters
best_model = build_model(best_n_units, best_learning_rate)

# Retrain the model on the full training dataset
history = best_model.fit(X_full_train, y_full_train, epochs=10, batch_size=16, verbose=2)

Epoch 1/10
530/530 - 6s - loss: 1.4729 - accuracy: 0.5663 - 6s/epoch - 11ms/step
Epoch 2/10
530/530 - 5s - loss: 1.3703 - accuracy: 0.5856 - 5s/epoch - 10ms/step
Epoch 3/10
530/530 - 5s - loss: 1.3474 - accuracy: 0.5910 - 5s/epoch - 10ms/step
Epoch 4/10
530/530 - 5s - loss: 1.3299 - accuracy: 0.5938 - 5s/epoch - 10ms/step
Epoch 5/10
530/530 - 5s - loss: 1.3227 - accuracy: 0.5941 - 5s/epoch - 10ms/step
Epoch 6/10
530/530 - 5s - loss: 1.3117 - accuracy: 0.5972 - 5s/epoch - 10ms/step
Epoch 7/10
530/530 - 5s - loss: 1.3053 - accuracy: 0.6019 - 5s/epoch - 10ms/step
Epoch 8/10
530/530 - 5s - loss: 1.2977 - accuracy: 0.6054 - 5s/epoch - 10ms/step
Epoch 9/10
530/530 - 5s - loss: 1.2922 - accuracy: 0.6041 - 5s/epoch - 10ms/step
Epoch 10/10
530/530 - 5s - loss: 1.2893 - accuracy: 0.6091 - 5s/epoch - 10ms/step


In [61]:
# Calculate the average accuracy
accuracies = history.history['accuracy']
average_accuracy = sum(accuracies) / len(accuracies)

print(f"Average accuracy over {len(accuracies)} epochs: {average_accuracy}")

Average accuracy over 10 epochs: 0.5948499858379364


# Prediction Demo

In [68]:
# Example input data (replace with your actual data)
new_observation = {
    'FIRE_YEAR': 2004,
    'FIRE_SIZE': 0.1,
    'LATITUDE': 39.69,
    'LONGITUDE': -120.0,
    'Avg_Temp_In_Month': 73.2,
    'Precipitation_In_Month': 0.19
}

input_data = np.array([[new_observation[key] for key in sorted(new_observation)]])

# Ensure the input data shape matches what the model expects
# Example: input_data might need to be reshaped if your model expects a specific dimensionality

predictions = best_model.predict(input_data)

# If your model performs classification, you'll get probabilities for each class
# Convert probabilities to class labels if necessary
predicted_class = np.argmax(predictions, axis=1)  # For classification tasks

predicted_class = np.argmax(predictions, axis=1)
class_names = ['Arson/incendiarism', 'Debris and open burning',
       'Equipment and vehicle use', 'Firearms and explosives use',
       'Fireworks', 'Missing data/not specified/undetermined',
       'Misuse of fire by a minor', 'Natural', 'Other causes',
       'Power generation/transmission/distribution',
       'Railroad operations and maintenance', 'Recreation and ceremony',
       'Smoking']
predicted_labels = [class_names[i] for i in predicted_class]
print('The cause of the fire is most likely:')
print(predicted_labels)


The cause of the fire is most likely:
['Natural']
