In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from datetime import datetime
from scipy import stats

%matplotlib inline
# plt.style.use('ggplot')

In [75]:
def read_data(file_path, column_names):
    data = pd.read_csv(file_path, header = 0, names = column_names)
    return data

def feature_normalize(dataset):
    mu = np.mean(dataset, axis = 0)
    sigma = np.std(dataset, axis = 0)
    return (dataset - mu) / sigma

def convert_timestamp(dataset):
    dataset['Timestamp'] = dataset['Timestamp'].apply(lambda x:
                                                     datetime.strptime(x, '%m/%d/%y %H:%M').timestamp())
    return dataset

def convert_BGA_RFU(dataset, threshold):
    dataset['BGA_Phycocyanin_RFU'] = dataset['BGA_Phycocyanin_RFU'].apply(lambda x: 1 if x >= threshold else 0)
    return dataset

def plot_axis(ax, x, y, title):
    ax.plot(x, y)
    ax.set_title(title)
    ax.xaxis.set_visible(false)
    ax.set_ylim([min(y) - np.std(y), max(y) + np.std(y)])
    ax.set_xlim([min(y), max(x)])
    ax.grid(true)
    
def plot_activity(activity, data):
    fig, (ax0, ax1, ax2) = plt.subplots(nrows = 3, figsize = (15, 10), sharex = True)
    plot_axis(ax0, data['timestamp'], data['x-axis'], 'x-axis')
    plot_axis(ax1, data['timestamp'], data['y-axis'], 'y-axis')
    plot_axis(ax2, data['timestamp'], data['z-axis'], 'z-axis')
    plt.subplots_adjust(hspace=0.2)
    fig.suptitle(activity)
    plt.subplots_adjust(top=0.90)
    plt.show()
    
# We step 50% down based on window size
def windows(data, size):
    counter_output = 0
    start = 0
    while start < data.count():
        yield int(start), int(start + size)
        start += (size / 2)
        counter_output += 1
        if counter_output % 10 == 0:
            print('Window Segmentation {0:.2f}% done'.format((start / data.count()) * 100))
    
# 32 chosen for 8 hours of 15 minute intervals
# TODO: NEED TO PASS IN COLUMNS AND DETERMINE FROM THAT
def segment_signal(dataset, window_size = 90, columns = 5):
    segments = np.empty((0, window_size, columns))
    labels = np.empty((0))
    count = 0;
    for (start, end) in windows(dataset["Timestamp"], window_size):
        temperature = dataset['Temperature'][start:end]
        conductivity = dataset['Sp_Cond'][start:end]
        turbidity = dataset['Turbidity'][start:end]
        ph = dataset['pH'][start:end]
        odo = dataset['ODO'][start:end]
        if(len(dataset['Timestamp'][start:end]) == window_size):
            segments = np.vstack([segments, np.dstack([temperature, conductivity, turbidity, ph, odo])])
            labels = np.append(labels, stats.mode(dataset['BGA_Phycocyanin_RFU'][start:end])[0][0])
    return segments, labels

In [76]:
cols = ['Timestamp', 'Temperature', 'Sp_Cond', 'pH_mV', 'pH', 'Turbidity', 'Chlorophyll', 'Chlorophyll_RFU',
        'ODOSat', 'ODO', 'BGA_Phycocyanin_RFU']

cols_to_keep = []
cols_to_keep.append('Timestamp')
cols_to_keep.append('Temperature')
cols_to_keep.append('Sp_Cond')
# cols_to_keep.append('pH_mV')
cols_to_keep.append('pH')
cols_to_keep.append('Turbidity')
# cols_to_keep.append('Chlorophyll')
# cols_to_keep.append('Chlorophyll_RFU')
# cols_to_keep.append('ODOSat')
cols_to_keep.append('ODO')
cols_to_keep.append('BGA_Phycocyanin_RFU')

lake_dataset = read_data('./data/cleaned/utah_lake_vineyard.csv', cols) 

for col in cols:
    if col not in cols_to_keep:
        lake_dataset = lake_dataset.drop(col, axis=1)
        
lake_dataset = convert_timestamp(lake_dataset)
lake_dataset = convert_BGA_RFU(lake_dataset, 1.0)
lake_dataset.head()

Unnamed: 0,Timestamp,Temperature,Sp_Cond,pH,Turbidity,ODO,BGA_Phycocyanin_RFU
0,1493960000.0,15.02,1848,8.36,16.84,9.04,0
1,1493961000.0,14.99,1847,8.36,16.76,9.04,0
2,1493962000.0,14.96,1847,8.36,16.82,9.04,0
3,1493963000.0,14.95,1848,8.36,17.19,9.03,0
4,1493964000.0,14.92,1848,8.36,16.85,9.02,0


In [77]:
window_size = 90

In [78]:
segments, labels = segment_signal(lake_dataset)

Window Segmentation 2.38% done
Window Segmentation 4.75% done
Window Segmentation 7.13% done
Window Segmentation 9.50% done
Window Segmentation 11.88% done
Window Segmentation 14.25% done
Window Segmentation 16.63% done
Window Segmentation 19.00% done
Window Segmentation 21.38% done
Window Segmentation 23.75% done
Window Segmentation 26.13% done
Window Segmentation 28.50% done
Window Segmentation 30.88% done
Window Segmentation 33.25% done
Window Segmentation 35.63% done
Window Segmentation 38.00% done
Window Segmentation 40.38% done
Window Segmentation 42.75% done
Window Segmentation 45.13% done
Window Segmentation 47.50% done
Window Segmentation 49.88% done
Window Segmentation 52.25% done
Window Segmentation 54.63% done
Window Segmentation 57.00% done
Window Segmentation 59.38% done
Window Segmentation 61.75% done
Window Segmentation 64.13% done
Window Segmentation 66.50% done
Window Segmentation 68.88% done
Window Segmentation 71.25% done
Window Segmentation 73.63% done
Window Segme

In [79]:
labels = np.asarray(pd.get_dummies(labels), dtype = np.int8)

In [80]:
labels.shape

(420, 2)

In [81]:
labels

array([[1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1, 0],
       [1,

In [82]:
lake_dataset.shape

(18947, 7)

In [83]:
segments.shape

(420, 90, 5)

In [84]:
# Args: 
reshaped_segments = segments.reshape(len(segments), 1, window_size, 5)

In [85]:
reshaped_segments.shape

(420, 1, 90, 5)

In [86]:
train_test_split = np.random.rand(len(reshaped_segments)) < 0.70
train_x = reshaped_segments[train_test_split]
train_y = labels[train_test_split]
test_x = reshaped_segments[~train_test_split]
test_y = labels[~train_test_split]

In [87]:
input_height = 1
input_width = window_size
num_labels = 2
num_channels = 5

batch_size = 10
kernel_size = 60
depth = 60
num_hidden = 500

learning_rate = 0.0001
training_epochs = 100

total_batchs = train_x.shape[0] // batch_size

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev = 0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.0, shape = shape)
    return tf.Variable(initial)

def depthwise_conv2d(x, W):
    return tf.nn.depthwise_conv2d(x,W, [1, 1, 1, 1], padding='VALID')

def apply_depthwise_conv(x,kernel_size,num_channels,depth):
    weights = weight_variable([1, kernel_size, num_channels, depth])
    biases = bias_variable([depth * num_channels])
    return tf.nn.relu(tf.add(depthwise_conv2d(x, weights),biases))
    
def apply_max_pool(x,kernel_size,stride_size):
    return tf.nn.max_pool(x, ksize=[1, 1, kernel_size, 1], 
                          strides=[1, 1, stride_size, 1], padding='VALID')

In [89]:
X = tf.placeholder(tf.float32, shape=[None,input_height,input_width,num_channels])
Y = tf.placeholder(tf.float32, shape=[None,num_labels])

c = apply_depthwise_conv(X,kernel_size,num_channels,depth)
p = apply_max_pool(c,20,2)
c = apply_depthwise_conv(p,6,depth*num_channels,depth//10)

shape = c.get_shape().as_list()
c_flat = tf.reshape(c, [-1, shape[1] * shape[2] * shape[3]])

f_weights_l1 = weight_variable([shape[1] * shape[2] * depth * num_channels * (depth//10), num_hidden])
f_biases_l1 = bias_variable([num_hidden])
f = tf.nn.tanh(tf.add(tf.matmul(c_flat, f_weights_l1),f_biases_l1))

out_weights = weight_variable([num_hidden, num_labels])
out_biases = bias_variable([num_labels])
y_ = tf.nn.softmax(tf.matmul(f, out_weights) + out_biases)

In [90]:
loss = -tf.reduce_sum(Y * tf.log(y_))
optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(loss)

correct_prediction = tf.equal(tf.argmax(y_,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [91]:
with tf.Session() as session:
    tf.global_variables_initializer().run()
    for epoch in range(training_epochs):
        cost_history = np.empty(shape=[1],dtype=float)
        for b in range(total_batchs):    
            offset = (b * batch_size) % (train_y.shape[0] - batch_size)
            batch_x = train_x[offset:(offset + batch_size), :, :, :]
            batch_y = train_y[offset:(offset + batch_size), :]
            _, c = session.run([optimizer, loss],feed_dict={X: batch_x, Y : batch_y})
            cost_history = np.append(cost_history,c)
        print ("Epoch: ",epoch," Training Loss: ",np.mean(cost_history)," Training Accuracy: ",
              session.run(accuracy, feed_dict={X: train_x, Y: train_y}))
    
    print ("Testing Accuracy:", session.run(accuracy, feed_dict={X: test_x, Y: test_y}))

Epoch:  0  Training Loss:  8.62618238603  Training Accuracy:  0.602694
Epoch:  1  Training Loss:  15.8260535109  Training Accuracy:  0.606061
Epoch:  2  Training Loss:  22.5664296853  Training Accuracy:  0.606061
Epoch:  3  Training Loss:  29.3884073404  Training Accuracy:  0.606061
Epoch:  4  Training Loss:  36.7286204644  Training Accuracy:  0.609428
Epoch:  5  Training Loss:  43.3233490693  Training Accuracy:  0.606061
Epoch:  6  Training Loss:  49.8753967789  Training Accuracy:  0.609428
Epoch:  7  Training Loss:  56.421941295  Training Accuracy:  0.609428
Epoch:  8  Training Loss:  62.9769350039  Training Accuracy:  0.609428
Epoch:  9  Training Loss:  69.5196790564  Training Accuracy:  0.609428
Epoch:  10  Training Loss:  76.0700707304  Training Accuracy:  0.609428
Epoch:  11  Training Loss:  82.7381804176  Training Accuracy:  0.606061
Epoch:  12  Training Loss:  89.2946051426  Training Accuracy:  0.606061
Epoch:  13  Training Loss:  95.8508860655  Training Accuracy:  0.606061
Epo

KeyboardInterrupt: 

In [None]:
cost_history.shape

# PLAYGROUND

In [None]:
lake_dataset.head()

In [None]:
lake_dataset.iloc[0]['Timestamp']

In [None]:
lake_dataset.head()

In [None]:
print(datetime.strptime('5/5/17 13:15', '%m/%d/%y %H:%M').timestamp())