In [42]:
import json
import numpy as np
import pandas as pd
import datetime
import time
import matplotlib.pyplot as plt

%matplotlib inline

In [43]:
import os
import tensorflow as tf
#from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

## Load Data

In [44]:
fpath = "/home/b/Downloads/plotholes-4daba-export(7).json"
with open (fpath) as f:
    json_content = json.loads(f.read())

In [45]:
WINDOW_SIZE = 16
STRIDE = 1

In [46]:
start_time_list = []
all_acc_df = {}
all_gyro_df = {}

for key in json_content:
    val = json_content[key]
    acc_readings = json.loads(val.get('acc',[]))    
    if not acc_readings: 
        # Skip logs without any accelerometer readings
        continue 
        
    start_epoch = float(acc_readings[0]['timestamp'])    
    start_time = time.strftime('%m-%d %H:%M:%S', time.localtime(start_epoch))
    gyro_readings = json.loads(val.get('gyro',[]))
    start_time_list.append([start_time,len(acc_readings),len(gyro_readings)])
    acc_df = pd.DataFrame(acc_readings).astype(np.float32)
    #acc_df[['acc_x','acc_y','acc_z']] -= acc_df[['acc_x','acc_y','acc_z']][:10].mean()
    if gyro_readings:
        gyro_df = pd.DataFrame(gyro_readings).astype(np.float32)
        #gyro_df[['gyro_x','gyro_y','gyro_z']] -= gyro_df[['gyro_x','gyro_y','gyro_z']][:10].mean()
    else:
        gyro_df = None
    # Basic Viz
    # acc_df.drop(['pothhole', 'speedbreaker', 'timestamp'], axis=1, inplace=True)
    # acc_df = acc_df.astype(np.float32)
    # gyro_df = gyro_df.astype(np.float32)
    all_acc_df[start_time] = acc_df
    all_gyro_df[start_time] = gyro_df
    
start_time_list.sort(key=lambda x : x[0])
#print (len(all_readings))

In [47]:
def extract_bad_samples(data, output_list, class_type='pothhole', window=7, stride=1):
    
    single_class = data.loc[data[class_type] == 1.0]
    diff  = np.diff(single_class.index)
    pos = np.where(diff > 1)[0] + 1
    list_of_valid_values = np.split(single_class.index, pos)
    #print(list_of_valid_values)
    try:
        data = data.drop(['pothhole', 'speedbreaker', 'timestamp', 'accident'],axis=1)
    except:
        data = data.drop(['pothhole', 'speedbreaker', 'timestamp'],axis=1)
    # Splitting into continguous sections
    
    for valid_values in list_of_valid_values:
        all_frames = data.loc[valid_values]
        end_idx = len(all_frames)
        for i in range(0, end_idx, stride):
            if i+window < end_idx:
                output_list.append(np.array(all_frames[i:i+window]))
                
    #print(len(output_list))
    
    return output_list

def extract_good_samples(data, output_list, window=5, stride=2):
    
    single_class = data.loc[(data['pothhole'] == 0.0) & (data['speedbreaker'] == 0.0)]
    diff  = np.diff(single_class.index)
    pos = np.where(diff > 1)[0] + 1
    list_of_valid_values = np.split(single_class.index, pos)
    #print(list_of_valid_values)
    try:
        data = data.drop(['pothhole', 'speedbreaker', 'timestamp', 'accident'],axis=1)
    except:
        data = data.drop(['pothhole', 'speedbreaker', 'timestamp'],axis=1)
    # Splitting into continguous sections
    
    for valid_values in list_of_valid_values:
        all_frames = data.loc[valid_values]
        end_idx = len(all_frames)
        for i in range(0, end_idx, stride):
            if i+window < end_idx:
                output_list.append(np.array(all_frames[i:i+window]))
    
    return output_list

In [48]:
# Accelerometer
output_list = []
start_idxs = list(range(53, 84)) + [87]

for idx in start_idxs:
    start_time = start_time_list[idx][0]
    current_acc_df = all_acc_df[start_time]
    output_list = extract_good_samples(current_acc_df, output_list, window = WINDOW_SIZE, 
                                       stride = 1)
    #print(len(output_list))

acc_good_road_data = np.array(output_list)

output_list = []
#start_idxs = [-13, -12, -8, -6, -4, -3]

for idx in start_idxs:
    start_time = start_time_list[idx][0]
    current_acc_df = all_acc_df[start_time]
    output_list = extract_bad_samples(current_acc_df, output_list, 
                                      class_type = 'pothhole', window = WINDOW_SIZE, 
                                      stride = 1)
    output_list = extract_bad_samples(current_acc_df, output_list, 
                                      class_type = 'speedbreaker', window = WINDOW_SIZE,
                                      stride = 1)
    
acc_bad_road_data = np.array(output_list)

np.random.seed(42)
perm = np.random.permutation(acc_good_road_data.shape[0])
acc_good_road_data = acc_good_road_data[perm][:acc_bad_road_data.shape[0]]

In [49]:
## Get natural data
NATURAL_WALK_STRIDE = 3 
natural_walk_idxs = [-2, -1]
natural_walk_data = []
for idx in natural_walk_idxs:
    start_time = start_time_list[idx][0]
    current_acc_df = all_acc_df[start_time]
    
    try:
        current_acc_df = current_acc_df.drop(['pothhole', 'speedbreaker', 
                                              'timestamp', 'accident'],axis=1)
    except:
        current_acc_df = current_acc_df.drop(['pothhole', 'speedbreaker', 
                                              'timestamp'],axis=1)
        
    end_idx = len(current_acc_df)
    output_list = []
    
    for i in range(0, end_idx, NATURAL_WALK_STRIDE):
        if i+WINDOW_SIZE < end_idx:
            values = current_acc_df.loc[i:i+WINDOW_SIZE-1]
            output_list.append(np.array(values))
            #print(len(output_list))
    
    natural_walk_data.append(np.array(output_list))

natural_walk_data = np.concatenate((natural_walk_data[0], natural_walk_data[1]), axis=0)
print(natural_walk_data.shape)

(514, 16, 3)


In [50]:
def diff_per_sample(good_road_data, bad_road_data):
    
    diff_good_road_data = []
    for sample in good_road_data:
        values = []
        for i in range(1, sample.shape[0]):
            values.append(sample[i] - sample[i-1])
        diff_good_road_data.append(np.array(values))

    diff_bad_road_data = []
    for sample in bad_road_data:
        values = []
        for i in range(1, sample.shape[0]):
            values.append(sample[i] - sample[i-1])
        diff_bad_road_data.append(np.array(values))

    diff_bad_road_data  = np.array(diff_bad_road_data)
    diff_good_road_data = np.array(diff_good_road_data)

    print(diff_good_road_data.shape)
    print(diff_bad_road_data.shape)
    
    return diff_good_road_data, diff_bad_road_data

In [51]:
def reverse_readings(data):
    reversed = []
    for sample in data:
        reversed.append(np.flipud(sample))
    
    return np.array(reversed)

In [52]:
# ONLY FOR NATURAL WALK.
np.random.seed(42)
ratio = 2
perm = np.random.permutation(natural_walk_data.shape[0])
natural_walk_data = natural_walk_data[perm][:(acc_bad_road_data.shape[0] // 2)]

diff_acc_good_road_data, diff_acc_bad_road_data = diff_per_sample(natural_walk_data, 
                                                                  acc_bad_road_data)

(96, 15, 3)
(192, 15, 3)


In [31]:
# Diff per sample FOR ONLY ACC WITHOUT NATURAL WALK
diff_acc_good_road_data, diff_acc_bad_road_data = diff_per_sample(acc_good_road_data, 
                                                                  acc_bad_road_data)

(192, 15, 3)
(192, 15, 3)


In [53]:
# USE ONLY FOR DIFF - ACC

good_labels = [[1.0, 0.0] for _ in range(diff_acc_good_road_data.shape[0])]
bad_labels = [[0.0, 1.0] for _ in range(diff_acc_bad_road_data.shape[0])]

labels = np.array(good_labels + bad_labels)
data = np.concatenate((diff_acc_good_road_data, diff_acc_bad_road_data), axis=0)

In [54]:
## Noise for reducing sensitivity

data = data + np.random.normal(loc = 0.0, scale = 0.25, size = (data.shape))

In [55]:
trainX_, testX, trainY_, testY = train_test_split(data, labels, test_size=0.2, random_state=42)
trainX, valX, trainY, valY = train_test_split(trainX_, trainY_, test_size=0.1, random_state=42)

In [56]:
## Data Augmentation

trainX = np.concatenate((reverse_readings(trainX), trainX), axis = 0)
trainY = np.concatenate((trainY, trainY), axis = 0)

In [57]:
trainX.shape

(414, 15, 3)

## TF Core

In [59]:
N_TIME_STEPS = WINDOW_SIZE - 1 # -1 for diff
N_FEATURES = 3 # 3 for only acc, 6 for acc + gyro
N_CLASSES = 2
N_HIDDEN_UNITS = 64
L2_LOSS = 0.0015
LEARNING_RATE = 0.0025
N_EPOCHS = 100
BATCH_SIZE = 20
MODEL_NAME = 'diff_natural_W15_N025'

In [60]:
def create_LSTM_model(inputs):
    W = {
        'hidden': tf.Variable(tf.random_normal([N_FEATURES, N_HIDDEN_UNITS])),
        'output': tf.Variable(tf.random_normal([N_HIDDEN_UNITS, N_CLASSES]))
    }
    biases = {
        'hidden': tf.Variable(tf.random_normal([N_HIDDEN_UNITS], mean=1.0)),
        'output': tf.Variable(tf.random_normal([N_CLASSES]))
    }
    
    X = tf.transpose(inputs, [1, 0, 2])
    X = tf.reshape(X, [-1, N_FEATURES])
    hidden = tf.nn.relu(tf.matmul(X, W['hidden']) + biases['hidden'])
    hidden = tf.split(hidden, N_TIME_STEPS, 0)

    # Stack 2 LSTM layers
    lstm_layers = [tf.contrib.rnn.BasicLSTMCell(N_HIDDEN_UNITS, forget_bias=1.0) for _ in range(2)]
    lstm_layers = tf.contrib.rnn.MultiRNNCell(lstm_layers)

    outputs, _ = tf.contrib.rnn.static_rnn(lstm_layers, hidden, dtype=tf.float32)

    # Get output for the last time step
    lstm_last_output = outputs[-1]

    return tf.matmul(lstm_last_output, W['output']) + biases['output']

In [61]:
tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, N_TIME_STEPS, N_FEATURES], name="input")
Y = tf.placeholder(tf.float32, [None, N_CLASSES])

pred_Y = create_LSTM_model(X)
pred_softmax = tf.nn.softmax(pred_Y, name="y_")

l2 = L2_LOSS * sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred_Y, labels = Y)) + l2

optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
correct_pred = tf.equal(tf.argmax(pred_softmax, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))

saver = tf.train.Saver()

history = dict(train_loss=[], train_acc=[], test_loss=[], test_acc=[])

In [62]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_count = len(trainX)
    
    for i in range(1, N_EPOCHS + 1):
        for start, end in zip(range(0, train_count, BATCH_SIZE),
                              range(BATCH_SIZE, train_count + 1, BATCH_SIZE)):
            sess.run(optimizer, feed_dict={X: trainX[start:end],
                                           Y: trainY[start:end]})

        _, acc_train, loss_train = sess.run([pred_softmax, accuracy, loss], feed_dict={
                                                X: trainX, Y: trainY})

        _, acc_test, loss_test = sess.run([pred_softmax, accuracy, loss], feed_dict={
                                                X: valX, Y: valY})

        history['train_loss'].append(loss_train)
        history['train_acc'].append(acc_train)
        history['test_loss'].append(loss_test)
        history['test_acc'].append(acc_test)

        print('epoch: %d test accuracy: %.3f loss: %.3f' % 
              (i, acc_test, loss_test))
    
    predictions, acc_final, loss_final = sess.run([pred_softmax, accuracy, loss], 
                                              feed_dict={X: testX, Y: testY})
    
    tf.train.write_graph(sess.graph_def, '', './checkpoint/' + MODEL_NAME + '.pbtxt')  
    saver.save(sess, save_path = "./checkpoint/" + MODEL_NAME + ".ckpt")

print()
print('final results: accuracy: %.3f loss: %.3f' %(acc_final, loss_final))

epoch: 1 test accuracy: 0.913 loss: 0.811
epoch: 2 test accuracy: 0.957 loss: 0.699
epoch: 3 test accuracy: 1.000 loss: 0.579
epoch: 4 test accuracy: 1.000 loss: 0.540
epoch: 5 test accuracy: 1.000 loss: 0.510
epoch: 6 test accuracy: 1.000 loss: 0.486
epoch: 7 test accuracy: 1.000 loss: 0.467
epoch: 8 test accuracy: 1.000 loss: 0.450
epoch: 9 test accuracy: 1.000 loss: 0.435
epoch: 10 test accuracy: 1.000 loss: 0.421
epoch: 11 test accuracy: 1.000 loss: 0.409
epoch: 12 test accuracy: 1.000 loss: 0.395
epoch: 13 test accuracy: 1.000 loss: 0.385
epoch: 14 test accuracy: 1.000 loss: 0.374
epoch: 15 test accuracy: 1.000 loss: 0.368
epoch: 16 test accuracy: 1.000 loss: 0.365
epoch: 17 test accuracy: 1.000 loss: 0.351
epoch: 18 test accuracy: 1.000 loss: 0.341
epoch: 19 test accuracy: 1.000 loss: 0.338
epoch: 20 test accuracy: 1.000 loss: 0.334
epoch: 21 test accuracy: 1.000 loss: 0.330
epoch: 22 test accuracy: 1.000 loss: 0.321
epoch: 23 test accuracy: 1.000 loss: 0.314
epoch: 24 test accur

KeyboardInterrupt: 

In [None]:
from tensorflow.python.tools import freeze_graph

In [None]:
input_graph_path = './checkpoint/' + MODEL_NAME+'.pbtxt'
checkpoint_path = './checkpoint/' +MODEL_NAME+'.ckpt'
restore_op_name = "save/restore_all"
filename_tensor_name = "save/Const:0"
output_frozen_graph_name = './checkpoint/'+MODEL_NAME+'.pb'

freeze_graph.freeze_graph(input_graph_path, input_saver="",
                          input_binary=False, input_checkpoint=checkpoint_path, 
                          output_node_names="y_", restore_op_name="save/restore_all",
                          filename_tensor_name="save/Const:0", 
                          output_graph=output_frozen_graph_name, 
                          clear_devices=True, initializer_nodes="")

In [34]:
!ls checkpoint/*.pb -lt
# !mv checkpoint/mean.pb checkpoint/mean_calib.pb

-rw-rw-r-- 1 b b 309385 Jan 20 02:25 checkpoint/mean_calib.pb
-rw-rw-r-- 1 b b 309385 Jan 19 23:54 checkpoint/diff2.pb
-rw-rw-r-- 1 b b 309385 Jan 19 23:44 checkpoint/diff.pb
-rw-rw-r-- 1 b b 309385 Jan 19 23:10 checkpoint/trials.pb
-rw-rw-r-- 1 b b 298369 Jan 19 20:15 checkpoint/activity_contrib_1.pb
-rw-rw-r-- 1 b b 300367 Jan 19 19:26 checkpoint/activity_contrib.pb


In [25]:
print(tf.get_default_graph().get_operations())

[<tf.Operation 'input' type=Placeholder>, <tf.Operation 'Placeholder' type=Placeholder>, <tf.Operation 'random_normal/shape' type=Const>, <tf.Operation 'random_normal/mean' type=Const>, <tf.Operation 'random_normal/stddev' type=Const>, <tf.Operation 'random_normal/RandomStandardNormal' type=RandomStandardNormal>, <tf.Operation 'random_normal/mul' type=Mul>, <tf.Operation 'random_normal' type=Add>, <tf.Operation 'Variable' type=VariableV2>, <tf.Operation 'Variable/Assign' type=Assign>, <tf.Operation 'Variable/read' type=Identity>, <tf.Operation 'random_normal_1/shape' type=Const>, <tf.Operation 'random_normal_1/mean' type=Const>, <tf.Operation 'random_normal_1/stddev' type=Const>, <tf.Operation 'random_normal_1/RandomStandardNormal' type=RandomStandardNormal>, <tf.Operation 'random_normal_1/mul' type=Mul>, <tf.Operation 'random_normal_1' type=Add>, <tf.Operation 'Variable_1' type=VariableV2>, <tf.Operation 'Variable_1/Assign' type=Assign>, <tf.Operation 'Variable_1/read' type=Identity>,