In [1]:
import os
import sys
import pandas as pd
import numpy as np

# Image Generator
from keras.preprocessing.image import ImageDataGenerator

# Plotting
import matplotlib.pyplot as plt

# Modelling CNN
import tensorflow as tf
from numba import cuda
import glob 

In [2]:
# folder path or file path constants that will be used in this project

# Root Directory
D_DIR = "D:"
F_DIR = "F:"


# Folder inside D:\\wildfire-sumatera-dataset
WILDFIRE_SUMATERA_DATASET_FOLDER_PATH = f"{D_DIR}\\wildfire-sumatera-dataset"


# Folders and metadatas inside D:\\wildfire-sumatera-dataset
WILDFIRE_SUMATERA_GEOTIFF_FOLDER_PATH        = f"{WILDFIRE_SUMATERA_DATASET_FOLDER_PATH}\\wildfire-sumatera-geotiff"
WILDFIRE_SUMATERA_JPEG_FOLDER_PATH           = f"{WILDFIRE_SUMATERA_DATASET_FOLDER_PATH}\\wildfire-sumatera-jpeg"
# Files (.csv) and metadatas inside D:\\wildfire-sumatera-dataset
METADATA_LANDSAT_8_FILE_PATH  = f"{WILDFIRE_SUMATERA_DATASET_FOLDER_PATH}\\metadata_landsat_8.csv"
METADATA_SENTINEL_2_FILE_PATH = f"{WILDFIRE_SUMATERA_DATASET_FOLDER_PATH}\\metadata_sentinel_2.csv"



# Folders inside D:\\wildfire-sumatera-dataset\\wildfire-sumatera-geotiff
SENTINEL_2_GEOTIFF_FOLDER_PATH = f"{WILDFIRE_SUMATERA_GEOTIFF_FOLDER_PATH}\\sentinel-2"
LANDSAT_8_GEOTIFF_FOLDER_PATH  = f"{WILDFIRE_SUMATERA_GEOTIFF_FOLDER_PATH}\\landsat-8"


# Folders inside D:\\wildfire-sumatera-dataset\\wildfire-sumatera-jpeg
SENTINEL_2_JPEG_FOLDER_PATH = f"{WILDFIRE_SUMATERA_JPEG_FOLDER_PATH}\\sentinel-2"
LANDSAT_8_JPEG_FOLDER_PATH  = f"{WILDFIRE_SUMATERA_JPEG_FOLDER_PATH}\\landsat-8"



# Folders inside D:\\wildfire-sumatera-dataset\\wildfire-sumatera-geotiff\\landsat-8
LANDSAT_8_PREFIRE_GEOTIFF_FOLDER_PATH  = f"{LANDSAT_8_GEOTIFF_FOLDER_PATH}\\prefire"
LANDSAT_8_POSTFIRE_GEOTIFF_FOLDER_PATH = f"{LANDSAT_8_GEOTIFF_FOLDER_PATH}\\postfire"

# Folders inside D:\\wildfire-sumatera-dataset\\wildfire-sumatera-geotiff\\sentinel-2
SENTINEL_2_PREFIRE_GEOTIFF_FOLDER_PATH  = f"{SENTINEL_2_GEOTIFF_FOLDER_PATH}\\prefire"
SENTINEL_2_POSTFIRE_GEOTIFF_FOLDER_PATH = f"{SENTINEL_2_GEOTIFF_FOLDER_PATH}\\postfire"

dirs = [
    WILDFIRE_SUMATERA_DATASET_FOLDER_PATH,
    WILDFIRE_SUMATERA_GEOTIFF_FOLDER_PATH, 
    WILDFIRE_SUMATERA_JPEG_FOLDER_PATH,
    
    SENTINEL_2_GEOTIFF_FOLDER_PATH, 
    LANDSAT_8_GEOTIFF_FOLDER_PATH,
    SENTINEL_2_JPEG_FOLDER_PATH,
    LANDSAT_8_JPEG_FOLDER_PATH,
    
    LANDSAT_8_PREFIRE_GEOTIFF_FOLDER_PATH,
    LANDSAT_8_POSTFIRE_GEOTIFF_FOLDER_PATH,
    SENTINEL_2_PREFIRE_GEOTIFF_FOLDER_PATH,
    SENTINEL_2_POSTFIRE_GEOTIFF_FOLDER_PATH,
]

for dir_ in dirs:
    if not os.path.exists(dir_):
        os.mkdir(dir_)
        print(f"{dir_} has been created")
    else:
        print(f"{dir_} already exist")

D:\wildfire-sumatera-dataset already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-jpeg already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\sentinel-2 already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\landsat-8 already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-jpeg\sentinel-2 already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-jpeg\landsat-8 already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\landsat-8\prefire already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\landsat-8\postfire already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\sentinel-2\prefire already exist
D:\wildfire-sumatera-dataset\wildfire-sumatera-geotiff\sentinel-2\postfire already exist


# Constants

In [3]:
RANDOM_STATE = 42
BATCH_SIZE = 32
SEED = RANDOM_STATE
WIDTH = 256
HEIGHT = 256
CHANNEL = 3
INPUT_SIZE = (WIDTH, HEIGHT, CHANNEL)

# Prepare Data

In [4]:
# landsat-8
landsat_df = pd.read_csv(METADATA_LANDSAT_8_FILE_PATH)
landsat_df_backup = landsat_df.copy()

In [5]:
landsat_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7839 entries, 0 to 7838
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   folder_paths_geotiff  7839 non-null   object 
 1   mean_dnbr             7839 non-null   float64
 2   median_dnbr           7839 non-null   float64
 3   image_condition       7839 non-null   int64  
 4   file_paths_jpeg       7839 non-null   object 
 5   class                 7839 non-null   int64  
dtypes: float64(2), int64(2), object(2)
memory usage: 367.6+ KB


In [6]:
# convert column 'class' type to string
landsat_df.loc[:, 'class'] = landsat_df.loc[:, 'class'].astype(str)
# filter image that is in a good condition
landsat_df = landsat_df[landsat_df['image_condition'] == 1]
landsat_df.reset_index()

Unnamed: 0,index,folder_paths_geotiff,mean_dnbr,median_dnbr,image_condition,file_paths_jpeg,class
0,0,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.050867,0.028118,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
1,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.050547,0.027762,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
2,2,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.054626,0.029420,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
3,3,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.027477,0.017275,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0
4,4,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.050742,0.028028,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
...,...,...,...,...,...,...,...
7831,7834,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.046115,0.033393,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0
7832,7835,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.033706,0.023961,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0
7833,7836,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.079479,0.063010,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
7834,7837,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0.051759,0.036960,1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1


In [7]:
# spliting data for testing and training
from sklearn.model_selection import train_test_split

# Spliting based on class
# split between train and val_test with 20% split ratio
x_train, x_val_test, y_train, y_val_test, = train_test_split(
    landsat_df['file_paths_jpeg'], 
    landsat_df['class'], 
    stratify = landsat_df['class'],
    test_size = 0.2,
    random_state = RANDOM_STATE
)

# split val_test to val and test with 50% split ratio
x_val, x_test, y_val, y_test = train_test_split(
    x_val_test, 
    y_val_test, 
    stratify = y_val_test,
    test_size = 0.5,
    random_state = RANDOM_STATE
)

In [8]:
print("train image = {} and label = {}".format(len(x_train), len(y_train)))
print('Class quantity in train')
display(y_train.value_counts())
print("val image = {} and label = {}".format(len(x_val), len(y_val)))
print('Class quantity in val')
display(y_val.value_counts())
print("test image = {} and label = {}".format(len(x_test), len(y_test)))
print('Class quantity in test')
display(y_test.value_counts())

train image = 6268 and label = 6268
Class quantity in train


1    3911
2    1282
0    1075
Name: class, dtype: int64

val image = 784 and label = 784
Class quantity in val


1    489
2    161
0    134
Name: class, dtype: int64

test image = 784 and label = 784
Class quantity in test


1    489
2    160
0    135
Name: class, dtype: int64

In [9]:
y_train.values

array(['1', '0', '1', ..., '1', '2', '1'], dtype=object)

In [10]:
train_dict = {
    'file_paths_jpeg': x_train.values,
    'class': y_train.values,
}
train_df = pd.DataFrame(data=train_dict)

val_dict = {
    'file_paths_jpeg': x_val.values,
    'class': y_val.values,
}
val_df = pd.DataFrame(data=val_dict)

test_dict = {
    'file_paths_jpeg': x_test.values,
    'class': y_test.values,
}
test_df = pd.DataFrame(data=test_dict)

In [11]:
display(train_df.head())
display(val_df.head())
display(test_df.head())

Unnamed: 0,file_paths_jpeg,class
0,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0
2,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
3,D:\wildfire-sumatera-dataset\wildfire-sumatera...,2
4,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1


Unnamed: 0,file_paths_jpeg,class
0,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,2
2,D:\wildfire-sumatera-dataset\wildfire-sumatera...,0
3,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
4,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1


Unnamed: 0,file_paths_jpeg,class
0,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
1,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
2,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
3,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1
4,D:\wildfire-sumatera-dataset\wildfire-sumatera...,1


# Create generator (Image Augmentation)

In [12]:
train_generator = ImageDataGenerator(
    rescale= 1./255,
    rotation_range = 20,
    width_shift_range = 0.2,
    height_shift_range= 0.2,
    shear_range= 0.2,
)

val_generator = ImageDataGenerator(
    rescale= 1./255,
)

test_generator = ImageDataGenerator(
    rescale= 1./255,
)

In [13]:
# Applying Generator by using data from dataframe
train_data = train_generator.flow_from_dataframe(
    dataframe = train_df,
    directory = LANDSAT_8_JPEG_FOLDER_PATH,
    x_col = 'file_paths_jpeg',
    y_col = 'class',
    batch_size = BATCH_SIZE,
    target_size = (WIDTH,HEIGHT),
    class_mode = "categorical",
)

validation_data = val_generator.flow_from_dataframe(
    dataframe = val_df,
    directory = LANDSAT_8_JPEG_FOLDER_PATH,
    x_col = 'file_paths_jpeg',
    y_col = 'class',
    batch_size = BATCH_SIZE,
    seed = SEED,
    target_size = (WIDTH,HEIGHT),
    class_mode = "categorical",
)

test_data = test_generator.flow_from_dataframe(
    dataframe = test_df,
    directory = LANDSAT_8_JPEG_FOLDER_PATH,
    x_col = 'file_paths_jpeg',
    y_col = 'class',
    batch_size = len(test_df.index),
    seed = SEED,
    target_size = (WIDTH,HEIGHT),
    class_mode = "categorical")

Found 6268 validated image filenames belonging to 3 classes.
Found 784 validated image filenames belonging to 3 classes.
Found 784 validated image filenames belonging to 3 classes.


### Create Model Prototype version 1

In [42]:
import tensorflow as tf
from keras.models import Sequential 
from keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, Activation


def cnn_model(input_shape):
    # Create Sequential model
    model = tf.keras.Sequential()
    
    # Convolution 2D Layer with kernel size 3x3 (filters=32)
    model.add(tf.keras.layers.Conv2D(
        filters=16, 
        kernel_size=(3, 3), 
        input_shape=input_shape, 
        activation='relu',
    ))
    # Max Pooling layer (2D) with pool size of 2x2 (default)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
    # Convolution 2D Layer with kernel size 3x3 (filters=64)
    model.add(tf.keras.layers.Conv2D(
        filters=32, 
        kernel_size=(3, 3), 
        activation='relu',
    ))
    # Max Pooling layer (2D) with pool size of 2x2 (default)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
    # Convolution 2D Layer with kernel size 3x3 (filters=128)
    model.add(tf.keras.layers.Conv2D(
        filters=64, 
        kernel_size=(3, 3),
        activation='relu',
    ))
    # Max Pooling layer (2D) with pool size of 2x2 (default)
    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
    
    # Flatten layer
    model.add(tf.keras.layers.Flatten())
    # Fully Connected Layer
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    # Drop Out Layer
    model.add(tf.keras.layers.Dropout(0.5))
    
    # Output model (multiclass)
    model.add(tf.keras.layers.Dense(units=3, activation='softmax'))
    
    return model

In [43]:
# check if gpu is detected and ready to be used by tensorflow for neural network training process
print("GPU is available, Device = {}".format(tf.config.list_physical_devices("GPU"))
      if tf.config.list_physical_devices("GPU") else "GPU is not available")

GPU is available, Device = [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [44]:
# Reset Device and Free Memory from GPU
from numba import cuda
device = cuda.get_current_device()
device.reset()

In [None]:
model = cnn_model(INPUT_SIZE)
model.summary()

In [None]:
imgs, labels = next(val_df)

# Implement callbacks

In [None]:
index = len(glob.glob('./best_cnn_model*')) + 1
print(index)
callbacks_softmax = [
    tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5, verbose = 2),
    tf.keras.callbacks.ModelCheckpoint(
        filepath = './best_cnn_model_{}'.format(), 
        monitor = 'val_loss', 
        save_best_only = True,
        mode = 'min', 
        verbose = 2,
    )
]

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss= tf.keras.'categorical_crossentropy',
    metrics=['accuracy', 'precision', 'recall'],
)

In [None]:
with tf.device('/GPU:0'):
    history_softmax = model.fit(
    train_data,
    validation_data=validation_data,
    epochs=50,
    steps_per_epoch=train_data.samples//BATCH_SIZE,
    validation_steps=valid_data.samples//BATCH_SIZE,
    workers=8,
    callbacks=callbacks_softmax) 