<font size="+0.5">Notebook for plotting confusion matrix and print precision and recall metrics<font>

In [None]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

from scipy.signal import savgol_filter
from sklearn.metrics import confusion_matrix, precision_score, recall_score

<font size="+0.5">Load scaler and encoder with which data was transformed when model was training<font>

In [None]:
with open('models/x_accelerometer.pcl', "rb") as file:
    x_scaler = pickle.load(file)

with open('models/y_accelerometer.pcl', "rb") as file:
    y_scaler = pickle.load(file)

with open('models/z_accelerometer.pcl', "rb") as file:
    z_scaler = pickle.load(file)
    
with open("models/encoder.pcl", "rb") as file:
        encoder = pickle.load(file)

In [None]:
label_dict = {"Aggressive acceleration": "critical",
             "Aggressive breaking": "critical", 
             "Aggressive left lane change": "significant",
             "Aggressive left turn": "significant",
             "Aggressive right lane change": "significant",
             "Aggressive right turn": "significant",
             "Non-aggressive event": "negligible"}

In [None]:
def change_label(row):
    return label_dict[row['event']]

In [None]:
# Function for plotting confusion matrix
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(encoder.classes_))
    plt.xticks(tick_marks, encoder.classes_, rotation=45)
    plt.yticks(tick_marks, encoder.classes_)
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], '.1f'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
def print_statistic(data, model_file, model_name, linear=False, gyroscope=False, features_approach=False):
    
    # Load model
    with open(os.path.join("models", f"{model_file}.pcl"), "rb") as file:
        model = pickle.load(file)
    
    print(f"Model: {model_name}")
    
    # Transform target values
    data['event_label'] = data.apply(change_label, axis=1)
    
    y_true = data['event_label']
    data = data.drop(["event_label", "event"], axis=1)
    
    if features_approach:
        print("ARTICLE APPROACH")
        columns_to_predict = data.columns
    else:
        print("SIMPLE APPROACH")
        # Add acceleration feature
        data['acceleration'] = np.sqrt(data['x_accelerometer'] ** 2 + data['y_accelerometer'] ** 2 + data['z_accelerometer'] ** 2)
        if linear:
            # Scale accelerometer x, y, z, values and acceleration feature
            data['x_accelerometer'] = x_scaler.transform(data['x_accelerometer'].values.reshape(-1, 1))
            data['y_accelerometer'] = y_scaler.transform(data['y_accelerometer'].values.reshape(-1, 1))
            data['z_accelerometer'] = z_scaler.transform(data['z_accelerometer'].values.reshape(-1, 1))
            data['acceleration'] = np.sqrt(
                data['x_accelerometer'] ** 2 + data['y_accelerometer'] ** 2 + data[
                    'z_accelerometer'] ** 2)
        # Create list with columns which will be used for predicting
        columns_to_predict = ["x_accelerometer", "y_accelerometer", "z_accelerometer", "acceleration"]
        if gyroscope:
            columns_to_predict += ["x_gyroscope", "y_gyroscope", "z_gyroscope"]
    # Predict
    y_pred = encoder.inverse_transform(model.predict(data[columns_to_predict]))
    
    # Calculate confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    # Calculate precision by each class
    precision_scores = precision_score(encoder.transform(y_true), encoder.transform(y_pred), average=None)
    for i, clasS in enumerate(encoder.classes_):
        print(f"Precision score for class {clasS} is {precision_scores[i]}")
    
    print()
    
    # Calculate recall by each class
    recall_scores = recall_score(encoder.transform(y_true), encoder.transform(y_pred), average=None)
    for i, clasS in enumerate(encoder.classes_):
        print(f"Recall score for class {clasS} is {recall_scores[i]}")
    
    # Plot confusion matrix
    plt.figure(figsize=(7, 7))
    plot_confusion_matrix(cm)

In [None]:
val = pd.read_csv('data/val_filtered_accelerometer.csv')
print_statistic(val, "linear-accelerometer", "SUPPORT VECTOR MACHINE", linear=True)

In [None]:
val = pd.read_csv('data/val_filtered_accelerometer.csv')
print_statistic(val, "non-linear-accelerometer", "LIGHTGBM")

In [None]:
val_acc = pd.read_csv('data/val_filtered_accelerometer.csv')
val_gyr = pd.read_csv('data/val_filtered_gyroscope.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "linear-accelerometer-gyroscope", "SUPPORT VECTOR MACHINE", linear=True, gyroscope=True)

In [None]:
val_acc = pd.read_csv('data/val_filtered_accelerometer.csv')
val_gyr = pd.read_csv('data/val_filtered_gyroscope.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "non-linear-accelerometer-gyroscope", "LIGHTGBM", gyroscope=True)

In [None]:
val = pd.read_csv('data/val_accelerometer_features.csv')
print_statistic(val, "linear-accelerometer-features", "MLP", features_approach=True)

In [None]:
val = pd.read_csv('data/val_accelerometer_features.csv')
print_statistic(val, "non-linear-accelerometer-features", "RANDOM FOREST", features_approach=True)

In [None]:
val_acc = pd.read_csv('data/val_accelerometer_features.csv')
val_gyr = pd.read_csv('data/val_gyroscope_features.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "linear-accelerometer-gyroscope-features", "MLP", features_approach=True)

In [None]:
val_acc = pd.read_csv('data/val_accelerometer_features.csv')
val_gyr = pd.read_csv('data/val_gyroscope_features.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "non-linear-accelerometer-gyroscope-features", "RANDOM FOREST", features_approach=True)