# CNN Baseline Model

## Load Data

In [1]:
import numpy as np
import os
from PIL import Image
import csv

### Small Dataset Processing

In [2]:
def data_processing():

    path = os.path.abspath("toydata")

    tiff_files = []

    for f in os.listdir(path):
        if f.endswith('.tif'):
            tiff_files.append(path + '/' + f)

    tiff_dict = {}

    # dictionary of tiff files
    for f in tiff_files:
        k = f.split('/')[-1].split('.tif')[0]
        tiff_dict[k] = f

    # convert to np array
    fire_id = Image.open(tiff_dict['fireid'])
    fire_id = np.array(fire_id)
    fire_id[fire_id == -9999] = 0

    fireline = Image.open(tiff_dict['Global_fire_atlas_firelinecrop'])
    fireline = np.array(fireline)
    fireline[fireline == -9999] = 0

    # get list of unique fire_ids
    fire_ids = set()

    for row in fire_id:
        for val in row:
            fire_ids.add(val)

    # remove 0 from fire_ids set because it does not denote a fire
    fire_ids.remove(0)

    # get dict with key value pairs of fire_id and an empty dict
    fire_data_dict = {}

    for id in fire_ids:
        id = str(id)
        fire_data_dict[id] = {}

    for id in fire_ids:
        indices = np.where(fire_id == id, 1, 0)
        fire_data_dict[str(id)] = indices
        
    return fire_data_dict, fireline

In [3]:
fire_data_dict, fireline = data_processing()

### Toy Dataset

In [4]:
# create a toy dataset to test create_one_matrices

toy_fire_data_dict = {}
toy_fire_data_dict['1'] = np.mat('0 0 0 0 0; 0 1 1 1 0; 0 1 1 0 0; 0 0 1 1 0; 0 1 1 1 1; 0 0 0 1 1')
toy_fireline = np.mat('0 0 0 0 0; 0 2 2 2 0; 0 2 2 0 0; 0 0 3 3 0; 0 3 3 3 3; 0 0 0 4 4')

### Create One-Hot Fire-Day Matrices

In [5]:
def create_one_hot_matrices(data_dict, fireline):
    '''
    This returns a dictionary with the structure:
    {
        "fire_id": {
            "day_of_year": one-hot encoded 2D array of fire spread on that day,
            "day_of_year": one-hot encoded 2D array of fire spread on that day
        }

    }
    '''
    
    fire_data_dict = {}

    for key, val in data_dict.items():
        data = {}
                
        for y in range(1, 366):
            mask = ((fireline == y) & (val == 1))
            mask = mask.astype(int)
        
            if np.sum(mask) > 0:
                data[str(y)] = mask
        
        fire_data_dict[key] = data
        
    return fire_data_dict

In [6]:
# Test on toy dataset
toy_fire_data_dict = create_one_hot_matrices(toy_fire_data_dict, toy_fireline)
print(toy_fire_data_dict)

{'1': {'2': matrix([[0, 0, 0, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 1, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]]), '3': matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 1, 1, 0],
        [0, 1, 1, 1, 1],
        [0, 0, 0, 0, 0]]), '4': matrix([[0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 1, 1]])}}


In [7]:
def create_supervised_data(fire_data_dict):
    '''
    Turn this dictionary:
    {
        "fire_id": {
            "day_of_year": one-hot encoded 2D array of fire spread on that day,
            "day_of_year": one-hot encoded 2D array of fire spread on that day
        }

    }
    
    into:
    [
        (one-hot encoded 2D array of fire spread on that day_1, one-hot encoded 2D array of fire spread on day_2),
        (one-hot encoded 2D array of fire spread on that day_2, one-hot encoded 2D array of fire spread on day_3),
    ]
    '''
    
    train_labels = []

    for key, value in fire_data_dict.items():
        burn_matrices = list(value.values())
        
        for index, day in enumerate(burn_matrices):

            if index < len(burn_matrices) - 1:
                day_1 = burn_matrices[index]
                day_2_index = index + 1
                day_2 = burn_matrices[day_2_index]
                
                pair = (day_1, day_2)
                train_labels.append(pair)

    return train_labels

toy = create_supervised_data(toy_fire_data_dict)

### Write to disk

In [8]:
def write_to_disk(data, name):
    
    with open(name, "w") as f:
        writer = csv.writer(f)
        writer.writerows(data)
        
write_to_disk(toy, "toy_data.csv")

In [9]:
# Run on small dataset
small_fire_data_dict = create_one_hot_matrices(fire_data_dict, fireline)
small_dataset = create_supervised_data(small_fire_data_dict)
write_to_disk(small_dataset, "small_data.csv")

### Read from disk

In [80]:
def read_from_disk(data):
    
    with open(data, 'r') as file:
        reader = csv.reader(file, delimiter=',', quotechar='"')
        
        for row in reader:
            for x in row:
                x = np.ndarray(x)
                print(x)
            
    
    return data

r = read_from_disk("toy_data.csv")

ValueError: maximum supported dimension for an ndarray is 32, found 78

### Baseline CNN

In [60]:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K

In [114]:
# get data into (X, 470, 456) shape
X = []
Y = []

for (x, y) in small_dataset:
    x = np.asarray(x)
    y = np.asarray(y)
    X.append(x)
    Y.append(x)
    
X = np.asarray(X)
Y = np.asarray(Y)

print(X.shape)
print(Y.shape)

(135, 470, 456)

In [115]:
# reshape data for CNN
X = X.reshape(135, 470, 456, 1)
Y = Y.reshape(135, 470, 456, 1)

In [121]:
# split into train/test
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

# take a look at the shapes
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(108, 470, 456, 1)
(27, 470, 456, 1)
(108, 470, 456, 1)
(27, 470, 456, 1)


In [None]:
# Build Model
