# CNN Model - Previous Fire Data and Weather

This notebook takes data from S3, trains a CNN model using Keras and Tensorflow, and saves the model to S3.

In [1]:
# Load packages
import boto3
import csv
import io
import json
import math
import numpy as np
import os
import pandas as pd
import pickle
import random

from datetime import datetime as dt
from keras.models import model_from_json
from matplotlib import pyplot as plt
from PIL import Image

Using TensorFlow backend.


### Variables and Hyperparameters

In [2]:
# s3 config
s3_client = boto3.client('s3')
bucket_name = 'hotzone'

# CNN config

# the desired height and width (in pixels) of the matrix to feed into the CNN
# 1 pixel side = 500 meters = 0.310686 miles
matrix_dim = 32

# test size for train/test split
test_size = 0.2

# training epochs
epoc = 10

## Pull Data from S3

In [3]:
def pull_data_from_s3(s3_client, bucket_name, key_name):
    '''
    Pulls pre-processed data from S3.

    Args:
        - s3_client: boto3 s3 client
        - bucket_name: name of bucket on s3 to pull data from
        - key_name: directory/file_name to pull data from
    Returns:
        - Nothing
    
    https://stackoverflow.com/questions/48049557/how-to-write-npy-file-to-s3-directly
    '''
    
    array_data = io.BytesIO()
    s3_client.download_fileobj(bucket_name, key_name, array_data)
    
    array_data.seek(0)
    array = pickle.load(array_data)

    return array

In [4]:
years = [2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2012, 2013, 2014, 2015, 2016]

fire = []
Y = []

for y in years:
    fire_key_name = "input_fire/fire_{}.pickle".format(str(y))
    label_key_name = "labels/label_{}.pickle".format(str(y))
    
    fire_data = pull_data_from_s3(s3_client, bucket_name, fire_key_name)
    labels = pull_data_from_s3(s3_client, bucket_name, label_key_name)
    
    fire.append(fire_data)
    Y.append(labels)

In [5]:
fire = np.concatenate(fire)
Y = np.concatenate(Y)

print(fire.shape)
print(Y.shape)

(145155, 32, 32, 1)
(145155,)


## Build CNN

In [6]:
# import packages

from __future__ import print_function

import tensorflow as tf

import keras
import keras.backend as K

from keras.models import Sequential, Model
from keras.layers import AveragePooling2D, Conv1D, Conv2D, MaxPooling2D, Dropout, Flatten, Dense, Input, concatenate

In [7]:
# compute f1 score manually - taken from https://datascience.stackexchange.com/a/45166

def recall_m(y_true, y_pred):
    '''
    Computes recall.
    
    Args:
        - y_true: true values of target variable.
        - y_pred: predicted values of target variable.
    Returns:
        - recall: true positives / actual results
    '''
    
    true_pos = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_pos = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_pos / (possible_pos + K.epsilon())

    return recall


def precision_m(y_true, y_pred):
    '''
    Computes precision.
    
    Args:
        - y_true: true values of target variable.
        - y_pred: predicted values of target variable.
    Returns:
        - precision: true positives / predicted results
    '''
    
    true_pos = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_pos = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_pos / (predicted_pos + K.epsilon())
    
    return precision


def f1_score(y_true, y_pred):
    '''
    Args:
        - y_true: true values of target variable.
        - y_pred: predicted values of target variable.
    Returns:
        - score: f1 score
    '''
    
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    score = 2*((precision*recall)/(precision+recall+K.epsilon()))
    
    return score

In [8]:
# Create model_2: image data, weather data, and fire speed/direction data with functional API

# Define image inputs shape
image_shape = fire[0].shape
image_inputs = Input(shape = image_shape)

# Define weather inputs shape
# weather_shape = weather[0].shape
# weather_inputs = Input(shape = weather_shape)

# Add layers for fire image interpretation
fire_1 = AveragePooling2D(pool_size=(2, 2), strides=None, padding='valid')(image_inputs)

fire_2 = Conv2D(32, kernel_size=(3, 3), activation='sigmoid')(fire_1)
fire_3 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(fire_2)
fire_4 = Dropout(0.2)(fire_3)

fire_5 = Conv2D(64, kernel_size=(3, 3), activation='sigmoid')(fire_4)
fire_6 = MaxPooling2D(pool_size=(2,2), strides=None, padding='valid')(fire_5)
fire_7 = Dropout(0.2)(fire_6)

fire_8 = Flatten()(fire_7)
fire_9 = Dense(128, activation='sigmoid')(fire_8)

# Combine the layers
# concat = concatenate([fire_9, weather_inputs])

# Final dense layer 
# predictions = Dense(1, activation='sigmoid')(concat)
predictions = Dense(1, activation='sigmoid')(fire_9)

# Define the model
# model_2 = Model(inputs=[image_inputs, weather_inputs], outputs=predictions)
model_2 = Model(inputs=image_inputs, outputs=predictions)

In [9]:
%%time
# compile the model
model_2.compile(
    optimizer='adam', 
    loss='binary_crossentropy', 
    metrics=['accuracy', f1_score, tf.keras.metrics.AUC()]
)

CPU times: user 160 ms, sys: 0 ns, total: 160 ms
Wall time: 157 ms


In [10]:
%%time
# fit the model
# model_2.fit(
#     x = [fire, weather], 
#     y = Y,
#     validation_split = test_size, 
#     epochs=epoc
# )

model_2.fit(
    x = fire, 
    y = Y,
    validation_split = test_size, 
    epochs=epoc
)

Train on 116124 samples, validate on 29031 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
CPU times: user 20min 35s, sys: 17min 15s, total: 37min 51s
Wall time: 9min 35s


<keras.callbacks.callbacks.History at 0x7f541c0adf98>

## Save CNN to S3

In [11]:
def save_array_to_s3(s3_client, array, bucket_name, key_name):
    '''
    Uploads pre-processed data to S3.

    Args:
        - s3_client: boto3 s3 client
        - array: numpy array to save to s3
        - bucket_name: name of bucket on s3 to save array to
        - key_name: directory/file_name to save data to
    Returns:
        - Nothing
    
    https://stackoverflow.com/questions/48049557/how-to-write-npy-file-to-s3-directly
    '''
    
    array_data = io.BytesIO()
    pickle.dump(array, array_data)
    array_data.seek(0)
    
    s3_client.upload_fileobj(array_data, bucket_name, key_name)

In [12]:
# get model config and model weights

config = model_2.get_config()
weights = model_2.get_weights()

In [13]:
# save model config and model weights to s3

save_array_to_s3(s3_client, config, bucket_name, 'models/model_config.pickle')
save_array_to_s3(s3_client, weights, bucket_name, 'models/model_weights.pickle')

## Load CNN from S3

In [14]:
new_config = pull_data_from_s3(s3_client, bucket_name, 'models/model_config.pickle')
new_weights = pull_data_from_s3(s3_client, bucket_name, 'models/model_weights.pickle')


new_model = keras.Model.from_config(new_config)
new_model.set_weights(new_weights)