# 🔥 Fire...or No Fire?

## **Step 1:** Setup
We need to set up the dataframe to initial state.
Then we can manipulate it more in later steps, by cleaning it, digesting it, etc.

### Import Packages
> "If I have seen further, it is by standing on the shoulders of Giants."<br />
> &mdash; Isaac Newton


In [None]:
# glob crawler for reading files and folders
import glob

# regex parser for advanced string-parsing
import re

# pandas used for dataframe manipulation
import pandas

# numpy is used for odds and ends, like high-efficiency arrays
import numpy

# python imaging library; used for deconstructing images
from PIL import Image

# other imports are described as they are used
from keras.models import Sequential
from keras.losses import binary_crossentropy
from keras.optimizers import Adadelta
from keras.preprocessing.image import img_to_array
from keras.tensorflow_backend import _get_available_gpus
from sklearn.model_selection import train_test_split

In [None]:
_get_available_gpus()

### Crawl Datafiles
The **glob** package is great and sets up our project for success.
Be sure you have your directory-structure set up correctly as stated in the README&hellip;
Otherwise, I cannot guarentee that this project will work for you!

In [None]:
# get all files from dirs in the data dir
files = glob.glob('data/*/*.*')

In [None]:
# prepare dataset in an array
dataset = []

# loop through every file that "glob" found.
for filepath in files:
	# regex used for Windows/MacOS compatibility
	filecrawl = re.split(r'\\+|/+', filepath)

	# remove the "data" folder entry; its not needed
	filecrawl = filecrawl[1:]

	# tag images from the fire-images folder as "fire"
	if filecrawl[0] == 'Fire images':
		filecrawl.append(1)
	else:
		filecrawl.append(0)

	# add filecrawl findings to dataset
	dataset.append(filecrawl)
	# ==NOTE==
	# Because the "glob" package arbitrarily crawls files, the
	# index of an item may not be the same each time this is run.

### Create Dataframe

In [None]:
# create our project's main dataframe
dataframe = pandas.DataFrame(dataset, columns=['folder', 'filename', 'fire'])

# display shows these tables neatly, shown below
display(dataframe.head(), dataframe.tail())

## Step 2: Data Manipulation

### Check for Duplicates

In [None]:
# check for any duplicate filenames in the dataframe
duplicates = dataframe['filename'].duplicated(keep=False)

# Select rows with duplicate filenames
duplicate_rows = dataframe[duplicates]
display(duplicate_rows)

# create a warning if duplicates exist
if duplicates.sum() != 0:
	warning = Warning(
		f'There are {duplicates.sum()}'
		' duplicated filenames in the dataframe.'
		' proceed with caution.'
	) 
	display(warning)

### Rebalance Datapoints
I used [this article][rebalancing] to help figure things out.

[rebalancing]: https://towardsdatascience.com/having-an-imbalanced-dataset-here-is-how-you-can-solve-it-1640568947eb

In [None]:
# == FIXME ==
# this does not seem to be working properly at the moment

"""
from imblearn.ensemble import BalancedBaggingClassifier
from sklearn.tree import DecisionTreeClassifier

# create an object of the classifier, called "rebalancinator"
rebalancinator = BalancedBaggingClassifier(
	base_estimator=DecisionTreeClassifier(),
	sampling_strategy='auto',
	replacement=False,
	random_state=0
)

'''
# train the classifier.
rebalancinator.fit(x_train, y_train)
preds = rebalancinator.predict(x_train)
'''
"""

pass

### Train/Test Split
*Why do we need train/test split?*
*What does it do?*

In [None]:
# for x: keep the dataframe but drop the fire column
x = dataframe.drop(columns=['fire'])

# for y: drop everything in the dataframe but fire
y = dataframe.loc[:, ['fire']]

In [None]:
# Use the built-in "train test split" function
# to generate the four desireable segments of data.
x_train, x_test, y_train, y_test = train_test_split(
	x, y, test_size=0.35)

x_train = x_train.reset_index().drop('index', 1)
y_train = y_train.reset_index().drop('index', 1)
x_test = x_test.reset_index().drop('index', 1)
y_test = y_test.reset_index().drop('index', 1)

display(x_test.head())
display(y_test.head())

### Create Image Vectors

In [None]:
image_length = 32

In [None]:
def get_img_vector(x, index):
	# find filepath
	filename = x['filename'][index]
	folder = x['folder'][index]
	filepath = f'data/{folder}/{filename}'

	# open the image via its filepath
	img = Image.open(filepath)
	# ==NOTE==
	# the Image class was imported from PIL
	# (python image library)

	# remove transparency layer
	img = img.convert('RGB')

	# resize the image
	img = img.resize((image_length, image_length))

	# return the image vector
	return img_to_array(img)

In [None]:
def data_gen(x, y, batch_size):
	# n_batch variables are empty arrays of constant size.
	# x_batch is has RGB values for each pixel's coordinate.
	# y_batch represents whether there is fire or not (0/1).
	x_batch = numpy.zeros((batch_size, image_length, image_length, 3))
	y_batch = numpy.zeros((batch_size, 1))

	# loop through entire dataframe, index by index
	for index in range(len(x)):
		# using batch_size, we can determine 
		x_batch[index % batch_size] = get_img_vector(x, index)
		y_batch[index % batch_size] = y['fire'][index]

		# if there has been {batch_size} items, we yield.
		# the last batch is an outlier; its batch is smaller.
		if ((index + 1) % batch_size == 0
		or (index + 1) == len(dataframe)):
			yield (x_batch, y_batch)

## Step 3: Train Model

In [None]:
# use "sequential" mode from keras module
# see https://jovianlin.io/keras-models-sequential-vs-functional/
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# model = Sequential()
# model.add(Conv2D(16, (4,4), activation='relu', input_shape=(image_length, image_length, 3)))
# model.add(Flatten())
# model.add(Dense(128, activation='relu'))
# model.add(Dense(16, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))

In [None]:
model = Sequential()
model.add(Conv2D(128, (5, 5), activation='relu', input_shape=(image_length, image_length, 3)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.15))
model.add(Dense(1, activation='sigmoid'))

In [None]:
model.compile(
	loss=binary_crossentropy,
	optimizer=Adadelta(),
	metrics=['accuracy']
)

In [None]:
batch_size = 64
epochs = 5

for epoch in range(epochs):
	print(f"Epoch {epoch+1} / {epochs}")
	for x_batch, y_batch in data_gen(x_train, y_train, batch_size):
		model.train_on_batch(
			x_batch, y_batch
		)
		loss, accuracy = model.evaluate(x_batch, y_batch)
		print('accuracy:', accuracy)

## Step 4: Affirm Model Accuracy

In [None]:
def get_y_value(index):
	return y_test['fire'][index]

def get_y_guess(index):
	fire = model.predict(get_img_vector(x_test, index).reshape(-1, image_length, image_length, 3))
	if fire > 0.0:
		return 1
	else:
		return 0

def get_confusion_matrix_objects():
	y_values = []
	y_guesses = []
	for index in list(range(len(x_test))):
		y_values.append(get_y_value(index))
		y_guesses.append(get_y_guess(index))
	return (y_values, y_guesses)

In [None]:
from sklearn.metrics import confusion_matrix
tn, fp, fn, tp = confusion_matrix(*get_confusion_matrix_objects()).ravel()

In [None]:
print(
	f' true positive {tp}\n'
	f' true negative {tn}\n'
	f'false positive {fp}\n'
	f'false negative {fn}\n'
)

In [None]:
for index in list(range(50)):
	predict = model.predict(get_img_vector(x_test, index).reshape(-1, image_length, image_length, 3))
	print(
		f'\n{predict[0][0]} \n' 
		'<img src="./data/'
		f'{x_train["folder"][index]}'
		'/'
		f'{x_train["filename"][index]}">'
	)

<img src="./data/Fire images/images.jpg">
<img src="./data/Fire images/11_10_19-mjs_ft_hotel-fire_19183862.jpg">
<img src="./data/Fire images/FlamesEverett House Fire.transfer_1461845684828_4041471_ver1.0_640_360.jpg">
