# Predicting Wildfire Magnitude

Aim: We want to predict the magnitude of forest fires from factors such as location, physical conditions up to and during and cause of fire etc using a regression neural network.

Purpose: With this model it will be easier for affected parties (e.g. insurance companies, councils and families) to deal with its aftermath.

Extension: We could try to estimate potential economic loss using the magnitude prediction.

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.losses import MeanSquaredLogarithmicError

In [30]:
# import data
df = pd.read_csv("./FW_Veg_Rem_Combined.csv")

# clean data - remove unwanted columns

columns_to_keep = ["latitude", "longitude", "stat_cause_descr", "Vegetation", "Temp_pre_7", "Temp_cont",
                   "Wind_pre_7", "Wind_cont", "Hum_pre_7", "Hum_cont", "Prec_pre_7", "Prec_cont", "fire_mag"]

df_clean = df[columns_to_keep]

# randomise which datapoints are in the train and which is in the test

shuffled_indices = np.random.permutation(df_clean.index)
df_shuffled = df_clean.iloc[shuffled_indices]
cat_columns = df_shuffled.select_dtypes(include=["object"]).columns

train_perc = 0.8

train_data = df_shuffled[:int(train_perc * len(df_shuffled))] # last n is train data
test_data = df_shuffled[int(train_perc * len(df_shuffled)):] # first N - n is test data

# one-hot-encode "stat_cause_descr" - categorical data
train_encoded = pd.get_dummies(train_data, columns=cat_columns)
test_encoded = pd.get_dummies(test_data, columns=cat_columns)

x_train = train_encoded.drop("fire_mag", axis=1, inplace=False)
x_test = test_encoded.drop("fire_mag", axis=1, inplace=False)

y_train = train_encoded["fire_mag"]
y_test = test_encoded["fire_mag"]

In [None]:
# scale train and test datasets for faster convergence
def scale_datasets(x_train, x_test):

    """
    Standard Scale test and train data
    Z - Score normalization
    """
    standard_scaler = StandardScaler()
    x_train_scaled = pd.DataFrame(standard_scaler.fit_transform(x_train),columns=x_train.columns)
    
    x_test_scaled = pd.DataFrame(standard_scaler.transform(x_test), columns = x_test.columns)
    return x_train_scaled, x_test_scaled()

x_train_final, x_test_final = scale_datasets(x_train, x_test)

In [None]:
# may have to experiment with parameters
hidden_units1 = 2/3 * x_train_final.shape[0]
hidden_units2 = 2/3 * x_train_final.shape[0]
hidden_units3 = 2/3 * x_train_final.shape[0]
learning_rate = 0.01

# create model using the Sequential in tensorflow
def build_model_using_sequential():
    model = Sequential([Dense(hidden_units1, kernel_initializer='normal', activation='softplus'),Dropout(0.8),
                        Dense(hidden_units2, kernel_initializer='normal', activation='softplus'),Dropout(0.8),
                        Dense(hidden_units3, kernel_initializer='normal', activation='softplus'),
                        Dense(1, kernel_initializer='normal', activation='linear')])
    return model

model = build_model_using_sequential()

In [None]:
# loss function
msle = MeanSquaredLogarithmicError()
model.compile(loss=msle, optimizer=Adam(learning_rate=learning_rate), metrics=[msle])

# train the model
history = model.fit(x_train_final.values, y_train.values, epochs=25, batch_size=64,validation_split=0.2)

In [None]:
def plot_history(history, key):
    plt.plot(history.history[key])
    plt.plot(history.history['val_'+key])
    plt.xlabel("Epochs")
    plt.ylabel(key)
    plt.legend([key, 'val_'+key])
    plt.show()
# Plot the history
plot_history(history, 'mean_squared_logarithmic_error')

In [None]:
# predict data
pred_values = model.predict(x_test_final)