# CNN Baseline Model

## Load Data

In [79]:
import numpy as np
import os
from PIL import Image
import csv

### Small Dataset Processing

In [80]:
def data_processing():

    path = os.path.abspath("toydata")

    tiff_files = []

    for f in os.listdir(path):
        if f.endswith('.tif'):
            tiff_files.append(path + '/' + f)

    tiff_dict = {}

    # dictionary of tiff files
    for f in tiff_files:
        k = f.split('/')[-1].split('.tif')[0]
        tiff_dict[k] = f

    # convert to np array
    fire_id = Image.open(tiff_dict['fireid'])
    fire_id = np.array(fire_id)
    fire_id[fire_id == -9999] = 0

    fireline = Image.open(tiff_dict['Global_fire_atlas_firelinecrop'])
    fireline = np.array(fireline)
    fireline[fireline == -9999] = 0

    # get list of unique fire_ids
    fire_ids = set()

    for row in fire_id:
        for val in row:
            fire_ids.add(val)

    # remove 0 from fire_ids set because it does not denote a fire
    fire_ids.remove(0)

    # get dict with key value pairs of fire_id and an empty dict
    fire_data_dict = {}

    for id in fire_ids:
        id = str(id)
        fire_data_dict[id] = {}

    for id in fire_ids:
        indices = np.where(fire_id == id, 1, 0)
        fire_data_dict[str(id)] = indices
        
    return fire_data_dict, fireline

In [81]:
fire_data_dict, fireline = data_processing()

### Toy Dataset

In [82]:
# create a toy dataset to test create_one_matrices

toy_fire_data_dict = {}
toy_fire_data_dict['1'] = np.mat('0 0 0 0 0; 0 1 1 1 0; 0 1 1 0 0; 0 0 1 1 0; 0 1 1 1 1; 0 0 0 1 1')
toy_fireline = np.mat('0 0 0 0 0; 0 2 2 2 0; 0 2 2 0 0; 0 0 3 3 0; 0 3 3 3 3; 0 0 0 4 4')

### Create One-Hot Fire-Day Matrices

In [83]:
def create_one_hot_matrices(data_dict, fireline):
    '''
    This returns a dictionary with the structure:
    {
        "fire_id": {
            "day_of_year": one-hot encoded 2D array of fire spread on that day,
            "day_of_year": one-hot encoded 2D array of fire spread on that day
        }

    }
    '''
    
    fire_data_dict = {}

    for key, val in data_dict.items():
        data = {}
                
        for y in range(1, 366):
            mask = ((fireline == y) & (val == 1))
            mask = mask.astype(int)
        
            if np.sum(mask) > 0:
                data[str(y)] = mask
        
        fire_data_dict[key] = data
        
    return fire_data_dict

In [84]:
# Test on toy dataset
toy_fire_data_dict = create_one_hot_matrices(toy_fire_data_dict, toy_fireline)
print(toy_fire_data_dict)

{'1': {'2': matrix([[0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]), '3': matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 1, 1, 1, 1],
        [0, 0, 0, 0, 0]]), '4': matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1]])}}


In [85]:
def create_supervised_data(fire_data_dict):
    '''
    Turn this dictionary:
    {
        "fire_id": {
            "day_of_year": one-hot encoded 2D array of fire spread on that day,
            "day_of_year": one-hot encoded 2D array of fire spread on that day
        }

    }
    
    into:
    [
        (one-hot encoded 2D array of fire spread on that day_1, one-hot encoded 2D array of fire spread on day_2),
        (one-hot encoded 2D array of fire spread on that day_2, one-hot encoded 2D array of fire spread on day_3),
    ]
    '''
    
    train_labels = []

    for key, value in fire_data_dict.items():
        burn_matrices = list(value.values())
        
        for index, day in enumerate(burn_matrices):

            if index < len(burn_matrices) - 1:
                day_1 = burn_matrices[index]
                day_2_index = index + 1
                day_2 = burn_matrices[day_2_index]
                
                pair = (day_1, day_2)
                train_labels.append(pair)

    return train_labels

toy = create_supervised_data(toy_fire_data_dict)
print(toy)

[(matrix([[0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]), matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 1, 1, 1, 1],
        [0, 0, 0, 0, 0]])), (matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 1, 1, 1, 1],
        [0, 0, 0, 0, 0]]), matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1]]))]


In [86]:
### tuning variables ###
matrix_dim = 4
side_1 = int(matrix_dim/2)
side_2 = side_1 - 1

########################

data = []

for (x, y) in toy:    
    (x_len, y_len) = x.shape

    x = np.pad(x, matrix_dim, 'constant', constant_values=0)
    y = np.pad(y, matrix_dim, 'constant', constant_values=0)
    
    vals = np.where(y == 1)
    vals = list(zip(vals[0], vals[1]))
    
    for (xi, yi) in vals:
        xi_r = xi + side_1
        xi_l = xi - side_2
        yi_b = yi + side_1
        yi_t = yi - side_2
        
        m = x[xi_l:xi_r, yi_t:yi_b]

        data.append((m, 1))

print(data)

[(array([[1, 1, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[1, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[1, 1, 1],
       [0, 0, 0],
       [0, 0, 0]]), 1), (array([[1, 1, 0],
       [0, 0, 0],
       [0, 0, 0]]), 1)]


### Write to disk

In [87]:
def write_to_disk(data, name):
    
    with open(name, "w") as f:
        writer = csv.writer(f)
        writer.writerows(data)
        
write_to_disk(toy, "toy_data.csv")

In [88]:
# Run on small dataset
small_fire_data_dict = create_one_hot_matrices(fire_data_dict, fireline)
small_dataset = create_supervised_data(small_fire_data_dict)
write_to_disk(small_dataset, "small_data.csv")

### Read from disk

In [89]:
# TO-DO

### Baseline CNN

In [90]:
from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Conv1D, LSTM, UpSampling2D, Conv2DTranspose
from keras import backend as K

Using TensorFlow backend.


In [91]:
# get data into (X, 470, 456) shape
X = []
Y = []

for (x, y) in small_dataset:
    x = np.asarray(x)
    y = np.asarray(y)
    X.append(x)
    Y.append(y)
    
X = np.asarray(X)
Y = np.asarray(Y)

In [92]:
# reshape data for CNN
X = X.reshape(135, 470, 456, 1)
Y = Y.reshape(135, 470, 456, 1)

In [93]:
# split into train/test
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

# take a look at the shapes
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(108, 470, 456, 1)
(27, 470, 456, 1)
(108, 470, 456, 1)
(27, 470, 456, 1)


In [94]:
# Create Model
model = Sequential()

# Add Model Layers
model.add(AveragePooling2D(pool_size=(2,2), padding='valid', input_shape=(470, 456, 1)))
model.add(Conv2D(64, kernel_size=(1,1), padding='valid', activation='relu'))

model.add(Dropout(0.5))
model.add(Conv2D(32, kernel_size=(1,1), padding='valid', activation='relu'))

model.add(Conv2DTranspose(1, kernel_size=(2,2), strides=(2,2)))


# Look At Summary
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
average_pooling2d_1 (Average (None, 235, 228, 1)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 235, 228, 64)      128       
_________________________________________________________________
dropout_1 (Dropout)          (None, 235, 228, 64)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 235, 228, 32)      2080      
_________________________________________________________________
conv2d_transpose_1 (Conv2DTr (None, 470, 456, 1)       129       
Total params: 2,337
Trainable params: 2,337
Non-trainable params: 0
_________________________________________________________________


In [95]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [96]:
model.fit(X_train, Y_train,validation_data=(X_test, Y_test), epochs=3)

Train on 108 samples, validate on 27 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.callbacks.History at 0x7fcfc2ef3d10>