In [None]:
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
#from os import listdir
import os
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
#from keras.preprocessing import sequence
import tensorflow as tf
from tensorflow.keras import layers
from keras.optimizers import Adam
from keras.models import load_model
import keras.metrics
from keras.callbacks import ModelCheckpoint

In [None]:
path, dirs, files = next(os.walk("./Data_preprocessed/")) #path with the preprocessed signals
nod=len(dirs) #number of classes/folders

In [None]:
#reading the preprocessed signals

# create empty list
dataframes_list = list()
labels_list = list()

for j in range(nod):
    c_path=os.path.join(path, dirs[j])
    print(j)
    print(c_path)
    c_path, c_dirs, c_files = next(os.walk(c_path))
    file_count = len(c_files)
    print(file_count)
    
    # append datasets to the list
    for i in range(file_count):
        temp_df = pd.read_csv(c_path + '/' + c_files[i])
        temp_df=temp_df.loc[:,~temp_df.columns.str.match("Unnamed")]

        dataframes_list.append(temp_df) # all signals
        labels_list.append(j) #lebels of classes

In [None]:
len_sequences = []
for one_seq in dataframes_list:
    len_sequences.append(len(one_seq))
pd.Series(len_sequences).describe()

In [None]:
thisdict = {
  0: "P_Crossroad_Left",
  1: "P_Crossroad_Right",
  2: "P_Crossroad_Straight",
  3: "P_Parking_Diagonal_Left",
  4: "P_Parking_Diagonal_Right",
  5: "P_Parking_Parallel_Left",
  6: "P_Parking_Parallel_Right",
  7: "P_Parking_Perpendicular_Left",
  8: "P_Parking_Perpendicular_Right",
  9: "P_Roundabout_Left",
  10: "P_Roundabout_Right",
  11: "P_Roundabout_Straight",
  12: "S_Bending",
  13: "S_Drinking",
  14: "S_Eating",
  15: "S_Turning_Back"
}

In [None]:
train, train_target, test, test_target = prepare(dataframes_list, labels_list, data_dist=True)

In [None]:
model4 = make_model(input_shape=train.shape[1:])

In [None]:
# build model

epochs = 400
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model_vtest.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0002
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model4.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history4 = model4.fit(
    train,
    train_target,
    shuffle=True,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

In [None]:
model4 = keras.models.load_model("best_model_v64.h5")

In [None]:
# model evaluation

accuracy_summ(history4)
loss_summ(history4)

In [None]:
#evaluate on test set 

test_loss, test_acc = model4.evaluate(test, test_target)
print("Test accuracy", test_acc)
print("Test loss", test_loss)

In [None]:
# precision, recall, f1-score for all classes

results_log = classification_report(test_target, pred)
print(results_log)

In [None]:
scatter_plots(train, train_target)
scatter_plots(test, test_target)

In [None]:
confusion_m(model4, test, test_target)

In [None]:
# preparing the data for NN
def prepare(dataframes_list, labels_list, data_dist=False):
    dataframes_list_train, dataframes_list_test, labels_list_train, labels_list_test = train_test_split(dataframes_list, labels_list, test_size=0.1)
    train = np.array(dataframes_list_train)
    test = np.array(dataframes_list_test)
    train_target = np.array(labels_list_train)
    test_target = np.array(labels_list_test)
    train, train_target = shuffle(train, train_target)
    test, test_target = shuffle(test, test_target)
    # data distribution
    if data_dist==True:
        plot_distribution(labels_list_train, dataframes_list_train)
        plot_distribution(labels_list_test, dataframes_list_test)
    return train, train_target, test, test_target

In [None]:
# defining neural network architecture

num_classes = len(np.unique(train_target))
def make_model(input_shape):
    input_layer = keras.layers.Input(input_shape)

    conv1 = keras.layers.Conv1D(filters=128, kernel_size=5, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)
    conv1=keras.layers.MaxPooling1D(3)(conv1)

    conv2 = keras.layers.Conv1D(filters=128, kernel_size=5, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=128, kernel_size=5, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)
    
    gap = keras.layers.GlobalAveragePooling1D()(conv3)
    
    gap = keras.layers.Dropout(0.4)(gap)
    
    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)
    

    return keras.models.Model(inputs=input_layer, outputs=output_layer)

In [None]:
#print model architecture

from keras.utils.vis_utils import plot_model
plot_model(model4, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# summarize history for accuracy

def accuracy_summ(history4):

    plt.plot(history4.history['sparse_categorical_accuracy'])

    plt.plot(history4.history['val_sparse_categorical_accuracy'])

    plt.title('model accuracy')

    plt.ylabel('accuracy')

    plt.xlabel('epoch')

    plt.legend(['train', 'validation'], loc='upper left')

    plt.show()

# summarize history for loss

def loss_summ(history4):

    plt.plot(history4.history['loss'])

    plt.plot(history4.history['val_loss'])

    plt.title('model loss')

    plt.ylabel('loss')

    plt.xlabel('epoch')

    plt.legend(['train', 'validation'], loc='upper left')

    plt.show()

In [None]:
#printing confusion matrix

from pretty_confusion_matrix import pp_matrix
#model4 = keras.models.load_model("best_model_v64.h5")
def confusion_m(model4, t_set, target):
    pred = model4.predict(t_set)
    pred = pred.argmax(axis=-1)
    cm=confusion_matrix(target,pred)
    df_cm = pd.DataFrame(cm, index=range(0, 16), columns=range(0, 16))
    cmap = 'icefire'
    pred_val_axis ='lin'
    pp_matrix(df_cm, cmap=cmap, pred_val_axis=pred_val_axis, fz=8, figsize=[12, 12])

In [None]:
from sklearn.decomposition import PCA
import seaborn as sns
def scatter_plots(t_set, target):
    pred_set = model4.predict(t_set)
    pred_class = []
    for i in range(len(pred_set)):
        pred_class.append(np.argmax(pred_set[i]))
    pca = PCA(n_components=2)
    pts = pca.fit_transform(pred_set)
    ax = plt.figure(figsize=(7,10))
    sns.scatterplot(
        x=pts[:,0], y=pts[:,1],
        hue=target,
        palette=sns.color_palette("viridis", as_cmap=True),
        legend="full"
    )
    plt.xlabel('pca-one')
    plt.ylabel('pca-two')
    plt.show()


    ax = plt.figure(figsize=(7,10)).gca(projection='3d')
    ax.scatter(pts[:,0], pts[:,1], c = target, cmap="viridis")
    ax.set_xlabel('pca-one')
    ax.set_ylabel('pca-two')
    ax.set_zlabel('pca-three')
    plt.show()


In [None]:
import plotly.graph_objects as go
# function that plots the distribution of classes 
def plot_distribution(labels_list_train, dataframes_list_train):
    labels = list(dict.fromkeys(labels_list_train))
    labels=[thisdict.get(item,item)  for item in labels]
    counts = pd.Series(labels_list_train).value_counts(sort=False).tolist()
    
    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(title_text='Distribution of classes in the training set')
    
    fig.show()