In [1]:
import pandas as pd
import tensorflow as tf
import keras as ks
import numpy as np
from sklearn.metrics import recall_score, precision_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import datetime
import time
from keras.models import Sequential
from keras.layers import Dense, Dropout, Conv2D, Flatten, Activation, MaxPooling2D
from scripts.model_functions import create_model
from scripts.helper_functions import *
import matplotlib.pylab as plt
import seaborn as sns


Using TensorFlow backend.


In [2]:
# Create new variables to be used in Keras and the CNN

# number of items to use for training
BATCH_SIZE = 400 

# Number of identifying classes 
#   WE have two, Bloom and no bloom 1/0
NUM_CLASSES = 2 

# number of times to repeat process
EPOCHS = 100

In [3]:

# Load the data
df = pd.read_csv('../../data/cleaned/site1_vineyard.csv')
df.head()

Unnamed: 0,Date (mm.dd.yyyy),Time 24hr,Temp C,Sp Cond (uS/cm),pH (mV),pH,Turbidity (NTU),Chlorophyll (ug/L),Chlorophyll RFU,ODOSat%,ODO (mg/L),BGA-Phycocyanin RFU
0,5/5/2017,0:00,15.02,1848,-100.1,8.36,16.84,4.4,1.3,90.2,9.04,0.4
1,5/5/2017,0:15,14.99,1847,-100.1,8.36,16.76,4.2,1.2,90.2,9.04,0.4
2,5/5/2017,0:30,14.96,1847,-100.1,8.36,16.82,4.3,1.3,90.1,9.04,0.4
3,5/5/2017,0:45,14.95,1848,-100.1,8.36,17.19,4.5,1.3,90.0,9.03,0.4
4,5/5/2017,1:00,14.92,1848,-100.0,8.36,16.85,4.5,1.3,89.8,9.02,0.4


In [4]:
target = df['BGA-Phycocyanin RFU'].apply(lambda x : x/0.2334)
dataset = df.drop(columns=['Chlorophyll (ug/L)', 'Chlorophyll RFU'])
dataset['BGA (ug/L)'] = target
dataset.head(5)

Unnamed: 0,Date (mm.dd.yyyy),Time 24hr,Temp C,Sp Cond (uS/cm),pH (mV),pH,Turbidity (NTU),ODOSat%,ODO (mg/L),BGA-Phycocyanin RFU,BGA (ug/L)
0,5/5/2017,0:00,15.02,1848,-100.1,8.36,16.84,90.2,9.04,0.4,1.713796
1,5/5/2017,0:15,14.99,1847,-100.1,8.36,16.76,90.2,9.04,0.4,1.713796
2,5/5/2017,0:30,14.96,1847,-100.1,8.36,16.82,90.1,9.04,0.4,1.713796
3,5/5/2017,0:45,14.95,1848,-100.1,8.36,17.19,90.0,9.03,0.4,1.713796
4,5/5/2017,1:00,14.92,1848,-100.0,8.36,16.85,89.8,9.02,0.4,1.713796


In [None]:

timestamp = dataset['Date (mm.dd.yyyy)'] + ' '+ dataset['Time 24hr']
timestamp = pd.to_datetime(timestamp)
dataset['Timestamp'] = timestamp
dataset.head()
segments, targets = segment_dataset(dataset, "Timestamp")


WINDOW SIZE 2
LOOKING AHEAD 1 day(s)
days ahead 2 window size 2

elapsed 1 window size 1

Starting the window
Window Segmentation 2.05% done
Window Segmentation 2.58% done
Window Segmentation 3.10% done
Window Segmentation 3.63% done
Window Segmentation 4.16% done
Window Segmentation 4.69% done
Window Segmentation 5.21% done
Window Segmentation 5.74% done
Window Segmentation 6.27% done
Window Segmentation 6.80% done
Window Segmentation 7.33% done
Window Segmentation 7.85% done
Window Segmentation 8.38% done
Window Segmentation 8.91% done
Window Segmentation 9.44% done
Window Segmentation 9.96% done
Window Segmentation 10.49% done
Window Segmentation 11.02% done
Window Segmentation 11.55% done
Window Segmentation 12.08% done
Window Segmentation 12.60% done
Window Segmentation 13.13% done
Window Segmentation 13.66% done
Window Segmentation 14.19% done
Window Segmentation 14.71% done
Window Segmentation 15.24% done
Window Segmentation 15.77% done
Window Segmentation 16.30% done
Window Seg

In [None]:
# dont need data and time now that we have Timestamp. Lets remove them

dataset = dataset.drop(columns=['Date (mm.dd.yyyy)', 'Time 24hr'])
dataset.head(10)


In [None]:
target = dataset['BGA (ug/L)'].apply(lambda x: 1 if x > 20 else 0)
dataset['Bloom'] = target
dataset.dtypes

In [None]:
# lets try to normalize this now....
#dataset_columns = ['Temp C','Sp Cond (uS/cm)', 'pH (mV)','pH', 'Turbidity (NTU)', 'ODOSat%','ODO (mg/L)', 'Bloom']
#scaler = MinMaxScaler()
#ds_scaled = scaler.fit_transform(dataset[dataset_columns])
#dataset = pd.DataFrame(ds_scaled,columns=dataset_columns)
#dataset['Timestamp']= timestamp
#dataset.describe()


# Functions to take a moving window of the data of 10 time stamps

In [None]:
feature_columns = dataset_columns[:-1]
(segments, labels) = segment_dataset(dataset, "Timestamp")
print('done')

In [None]:
segments.shape

In [None]:
labels.shape

In [None]:
labels

# Shaping the data to be used in the model.

In [None]:
segments = segments.reshape(len(segments),9,7,1)
segments.shape

In [None]:
#what the heck does this look like now?
X = 5
y = 40
plt.figure(figsize=(X,y))
columns = 10
for x in range(0, 10):
    plt.subplot(len(segments) / columns + 1, columns, x + 1)
    plt.imshow(segments[x][0]*255,cmap='gray')
#plt.imshow(segments[0][0] * 255, cmap='gray')

In [None]:
labels.shape

In [None]:
labels = labels.reshape(labels.shape[0],1)
labels.shape

# Breaking apart training and test data

In [None]:

x_train, x_test, y_train, y_test = train_test_split(segments, labels, test_size=0.1, random_state=42)

In [None]:
print("x_train shape:",x_train.shape)
print("x_test shape:",x_test.shape)
print("y_train shape:",y_train.shape)
print("y_test shape:",y_test.shape)

In [None]:
y_train_mod = ks.utils.to_categorical(y_train, NUM_CLASSES)
y_test_mod = ks.utils.to_categorical(y_test, NUM_CLASSES)
input_shape = (9,7,1)


In [None]:

# Gets the precision of the different metrics
def create_class_predictions(pred):
    retval = np.array([])
    for row in pred:
        max_value = (-1,-1)
        for index, value in enumerate(row):
            if value > max_value[1]:
                max_value = (index, value)
        retval = np.append(retval, max_value[0])
    return retval


def create_layers(num_layers):
    layers = [Flatten(), Dropout(0.2), Dense(NUM_CLASSES, activation='softmax', input_dim=2)]
    for i in range(0, num_layers):
        layers.insert(0, Conv2D(44, 7, input_shape=input_shape, activation='relu', padding='same'))
    return layers

# Come on, let's create the model already!

In [None]:
# From the layer/node investigation
# it was found to be the following layers to be the best
# Conv2D 44,4
# Conv2D 44, 7
# Conv2D 44, 4
# Conv2D 44 7
# Flatten()
# Dropout(0.2)
# Dense(44)
# Dense(2)

# let's train a model to see if we get similar results with that
model = Sequential()
model.add(Conv2D(44, 7, input_shape=input_shape, activation='relu', padding='same'))
model.add(Conv2D(44, 4, activation='relu', padding='same'))
model.add(Conv2D(44, 4, activation='relu', padding='same'))
model.add(Conv2D(44, 7, activation='relu', padding='same'))
model.add(Flatten())
model.add(Dropout(0.2)) 
model.add(Dense(44))
model.add(Dense(NUM_CLASSES, activation='softmax'))
model.compile(loss=ks.losses.categorical_crossentropy,
        optimizer=ks.optimizers.Adam(lr=0.0001),
        metrics=['accuracy'])
model.fit(x=x_train, y=y_train_mod, batch_size=BATCH_SIZE, epochs=EPOCHS, verbose=1)
score = model.evaluate(x_train, y_train_mod, verbose=1)


In [None]:
# Evaluate the model
predictions = model.predict(x_test)
predict = create_class_predictions(predictions)
y = y_test.reshape(-1,)
recall = recall_score(y, predict)
precision = precision_score(y, predict)
print("Recall:",recall)
print("Precision:", precision)
print(confusion_matrix(y, predict))

## Save the model for deployment

In [None]:
# ignoring dropout for deployment
K.set_learning_phase(0)
 
# Set a file path to save the model in.
model_name = "cnn_model"
model_version = "1"
tf_path = "./../../saved_models/{}/{}".format(model_name, model_version)
 
# Get the session from the Keras back-end to save the model in TF format.
with K.get_session() as sess:
    tf.saved_model.simple_save(sess, tf_path, inputs={'input': model.input}, outputs={t.name: t for t in model.outputs})
