In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.metrics import categorical_accuracy
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import plot_confusion_matrix
from sklearn import metrics
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import os
import datetime

In [2]:
%matplotlib widget
plt.style.use('seaborn-whitegrid')

In [3]:
### CNN ###
DATA_SPLIT_PERC = 0.9
CHANNELS_CNN = 1
BATCH_SIZE = 4
EPOCHS = 10

### System ###
checkpoint_str = f"test/cp_ep{EPOCHS}_bsize{BATCH_SIZE}_adam.ckpt"
checkpoint_path = Path(checkpoint_str)
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Make sure not to overwrite existing models
try:
    if len(os.listdir(checkpoint_path)) > 0:
        checkpoint_str += '_'
except FileNotFoundError:
    pass

In [4]:
# Load and shape the dataset
DATASETS = {0: ('sensor_readings_2.data', 2), 1: ('sensor_readings_4.data', 4), 2: ('sensor_readings_24.data', 24)}
for data in DATASETS:
    print(data, ':', DATASETS[data][0])
DATA_ID = int(input('Select dataset: '))
NUM_SENSORS = DATASETS[DATA_ID][1]
NUM_CLASSES = 4
input_shape = (1, NUM_CLASSES)
df = pd.read_csv(f'Data/{DATASETS[DATA_ID][0]}', header=None)
df

0 : sensor_readings_2.data
1 : sensor_readings_4.data
2 : sensor_readings_24.data
Select dataset: 2


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,15,16,17,18,19,20,21,22,23,24
0,0.438,0.498,3.625,3.645,5.000,2.918,5.000,2.351,2.332,2.643,...,0.593,0.502,0.493,0.504,0.445,0.431,0.444,0.440,0.429,Slight-Right-Turn
1,0.438,0.498,3.625,3.648,5.000,2.918,5.000,2.637,2.332,2.649,...,0.592,0.502,0.493,0.504,0.449,0.431,0.444,0.443,0.429,Slight-Right-Turn
2,0.438,0.498,3.625,3.629,5.000,2.918,5.000,2.637,2.334,2.643,...,0.593,0.502,0.493,0.504,0.449,0.431,0.444,0.446,0.429,Slight-Right-Turn
3,0.437,0.501,3.625,3.626,5.000,2.918,5.000,2.353,2.334,2.642,...,0.593,0.502,0.493,0.504,0.449,0.431,0.444,0.444,0.429,Slight-Right-Turn
4,0.438,0.498,3.626,3.629,5.000,2.918,5.000,2.640,2.334,2.639,...,0.592,0.502,0.493,0.504,0.449,0.431,0.444,0.441,0.429,Slight-Right-Turn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5451,0.910,5.000,3.997,2.785,2.770,2.572,2.433,1.087,1.772,1.040,...,0.660,0.648,0.657,0.686,5.000,1.045,5.000,5.000,1.562,Move-Forward
5452,0.926,5.000,4.015,2.792,2.777,2.571,1.768,1.071,1.762,1.021,...,0.652,0.640,0.649,1.593,1.616,1.058,5.000,5.000,1.085,Sharp-Right-Turn
5453,0.937,5.000,4.034,2.799,2.784,2.571,1.754,1.053,1.752,1.002,...,0.648,0.633,0.642,0.741,5.000,1.065,5.000,5.000,1.105,Sharp-Right-Turn
5454,0.945,4.052,4.052,2.809,2.791,2.441,1.757,1.034,1.743,0.983,...,0.641,0.626,0.635,0.754,5.000,1.076,5.000,5.000,1.118,Move-Forward


In [5]:
num_samples = df.shape[0]
directions = {'Move-Forward': 0, 'Sharp-Right-Turn': 1,
              'Slight-Right-Turn': 2, 'Slight-Left-Turn': 3}
df.iloc[:, -1].replace(directions, inplace=True)
df = df.sample(frac=1).reset_index(drop=True)

split = lambda df, perc : (df.iloc[:int(perc * df.shape[0])], df.iloc[int(perc * df.shape[0]):])
x , y = df.iloc[:, :-1], df.iloc[:, -1]
(x_train, x_test), (y_train, y_test) = split(x, 0.9), split(y, 0.9)

x_train : np.ndarray = x_train.values.reshape(x_train.shape + (CHANNELS_CNN,))
x_test : np.ndarray = x_test.values.reshape(x_test.shape + (CHANNELS_CNN,))


y_train = to_categorical(y_train, NUM_CLASSES)
y_test = to_categorical(y_test, NUM_CLASSES)
#    
# The first parameter of 'shape' is the number of samples.
num_train_samples = x_train.shape[0]
num_batches = int(num_train_samples / BATCH_SIZE)

In [6]:
print('Rows: %d, columns: %d' % (x_train.shape[0], x_train.shape[1]))
print('Rows: %d, columns: %d' % (x_test.shape[0], x_test.shape[1]))

Rows: 4910, columns: 24
Rows: 546, columns: 24


In [7]:
# Save the model every epoch, only if the model has improved
cp_callback = ModelCheckpoint(checkpoint_str,
                              monitor='accuracy',
                              save_best_only=True,
                              mode='max',
                              save_weights_only=False,
                              save_freq='epoch',
                              #save_freq=5*num_batches,
                              verbose=2)
tb_callback = keras.callbacks.TensorBoard(log_dir=log_dir,
                                          histogram_freq=1)
callbacks = [cp_callback, tb_callback]

In [8]:
model = keras.models.Sequential([
    keras.layers.Conv1D(filters=96, kernel_size=(11,), activation='relu', input_shape=x_train.shape[1:]),
    keras.layers.BatchNormalization(),
    #keras.layers.MaxPool1D(pool_size=(3,), strides=(2,)),
    keras.layers.Conv1D(filters=256, kernel_size=(5,), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    #keras.layers.MaxPool1D(pool_size=(3,), strides=(2,)),
    keras.layers.Conv1D(filters=384, kernel_size=(3,), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(filters=384, kernel_size=(3,), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(filters=256, kernel_size=(3,), activation='relu', padding="same"),
    keras.layers.BatchNormalization(),
    #keras.layers.MaxPool1D(pool_size=(3,), strides=(2,)),
    keras.layers.Flatten(),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(4096, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

In [9]:
# Compile the currently loaded model
opt = tf.keras.optimizers.Adam(0.001)
model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy'])

In [10]:
import os
def delete_folder(folder):
    for root, dirs, files in os.walk(folder, topdown=False):
        for name in files:
            os.remove(os.path.join(root, name))
        for name in dirs:
            os.rmdir(os.path.join(root, name))

In [11]:
delete_folder("./logs")
delete_folder("./graphs")

In [12]:
# Train the currently loaded model
history = model.fit(x_train, y_train,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    verbose=1,
                    validation_data=(x_test, y_test),
                    callbacks=callbacks)

Epoch 1/10

Epoch 00001: accuracy improved from -inf to 0.61385, saving model to test\cp_ep10_bsize4_adam.ckpt
INFO:tensorflow:Assets written to: test\cp_ep10_bsize4_adam.ckpt\assets
Epoch 2/10

Epoch 00002: accuracy improved from 0.61385 to 0.70020, saving model to test\cp_ep10_bsize4_adam.ckpt
INFO:tensorflow:Assets written to: test\cp_ep10_bsize4_adam.ckpt\assets
Epoch 3/10

Epoch 00003: accuracy improved from 0.70020 to 0.73136, saving model to test\cp_ep10_bsize4_adam.ckpt
INFO:tensorflow:Assets written to: test\cp_ep10_bsize4_adam.ckpt\assets
Epoch 4/10

Epoch 00004: accuracy improved from 0.73136 to 0.78289, saving model to test\cp_ep10_bsize4_adam.ckpt
INFO:tensorflow:Assets written to: test\cp_ep10_bsize4_adam.ckpt\assets
Epoch 5/10

Epoch 00005: accuracy improved from 0.78289 to 0.83055, saving model to test\cp_ep10_bsize4_adam.ckpt
INFO:tensorflow:Assets written to: test\cp_ep10_bsize4_adam.ckpt\assets
Epoch 6/10

Epoch 00006: accuracy improved from 0.83055 to 0.84949, savin

In [13]:
# Evaluate the model on the training set
score = model.evaluate(x_train, y_train, verbose=0)
print('loss: ', score[0])
print('score: ', score[1])

loss:  0.2584766745567322
score:  0.9120162725448608


In [14]:
# Evaluate the model on the test set
score = model.evaluate(x_test, y_test, verbose=0)
print('loss: ', score[0])
print('score: ', score[1])

loss:  0.281747967004776
score:  0.9047619104385376


In [15]:
# Log the data obtained by the last evaluation
with open('log.txt', 'a') as file:
    file.write('File name: ' + checkpoint_str + '\n')
    file.write('loss: ' + str(score[0]) + '\n')
    file.write('score: ' + str(score[1]) + '\n')
    file.write('accuracy: ')
    file.write(str(history.history['accuracy'][-1]) + '\n')
    file.write('\n')

    file.close()

In [16]:
# Load the model and print a summary of its structure
model = tf.keras.models.load_model('./' + checkpoint_str)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 14, 96)            1152      
_________________________________________________________________
batch_normalization (BatchNo (None, 14, 96)            384       
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 14, 256)           123136    
_________________________________________________________________
batch_normalization_1 (Batch (None, 14, 256)           1024      
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 14, 384)           295296    
_________________________________________________________________
batch_normalization_2 (Batch (None, 14, 384)           1536      
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 14, 384)           4

In [17]:
# plotting
fig1, ax1 = plt.subplots(figsize=(10, 5))
fig2, ax2 = plt.subplots(figsize=(10, 5))

# summarize history for accuracy
ax1.plot(history.history['accuracy'])
ax1.plot(history.history['val_accuracy'])
ax1.set_title('model accuracy ' + str(score[1]))
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epoch')
ax1.legend(['train', 'test'], loc='upper left')
plt.savefig('./graphs/Accuracy')
plt.show()


# summarize history for loss
ax2.plot(history.history['loss'])
ax2.plot(history.history['val_loss'])
ax2.set_title('model loss ' + str(score[0]))
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['train', 'test'], loc='upper left')
plt.savefig('./graphs/Loss')
plt.show()   

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [18]:
import sys
np.set_printoptions(threshold=sys.maxsize)

In [19]:
class_labels = list(directions.keys())

In [20]:
rounded_labels=np.argmax(y_test, axis=1)
y_pred=model.predict(x_test)
rounded_predictions=np.argmax(y_pred, axis=1)
cm = metrics.confusion_matrix(rounded_labels, rounded_predictions)
cm

array([[208,  17,   6,   5],
       [  9, 192,   5,   3],
       [  5,   0,  69,   0],
       [  1,   1,   0,  25]], dtype=int64)

In [21]:
import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True, annot_kws={"size": 16})
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [23]:
import shap
# KernelExplainer is a general approach that can work with any ML framework
# Its inputs are the predictions and training data

# Summarize the training set to accelerate analysis
df_train_normed_summary = shap.kmeans(x_train, 10)

# Instantiate an explainer with the model predictions and training data summary
explainer = shap.KernelExplainer(model.predict, df_train_normed_summary)

# Extract Shapley values from the explainer
shap_values = explainer.shap_values(x_train)

ValueError: Found array with dim 3. Estimator expected <= 2.

In [None]:
np.savetxt('shap0.txt', shap_values[0])
np.savetxt('shap1.txt', shap_values[1])
np.savetxt('shap2.txt', shap_values[2])
np.savetxt('shap3.txt', shap_values[3])

In [None]:
# Summarize the Shapley values in a plot
shap.initjs()
plt.figure(figsize = (12,10))
shap.summary_plot(shap_values[1], x_train)

In [None]:
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_test.iloc[0,:], link="logit")

In [None]:
shap.dependence_plot(1, shap_values[1], x_train, interaction_index=1)