<font size="+0.5">Notebook for plotting confusion matrix and print precision and recall metrics<font>

In [None]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle

from scipy.signal import savgol_filter
from sklearn.metrics import confusion_matrix, precision_score, recall_score

<font size="+0.5">Load scaler and encoder with which data was transformed when model was training<font>

In [None]:
with open('models/x_scaler.pcl', "rb") as file:
    x_scaler = pickle.load(file)

with open('models/y_scaler.pcl', "rb") as file:
    y_scaler = pickle.load(file)

with open('models/z_scaler.pcl', "rb") as file:
    z_scaler = pickle.load(file)
    
with open("models/encoder.pcl", "rb") as file:
        encoder = pickle.load(file)

In [None]:
label_dict = {"Aggressive acceleration": "critical",
             "Aggressive breaking": "critical", 
             "Aggressive left lane change": "significant",
             "Aggressive left turn": "significant",
             "Aggressive right lane change": "significant",
             "Aggressive right turn": "significant",
             "Non-aggressive event": "negligible"}

In [None]:
def change_label(row):
    return label_dict[row['event']]

In [None]:
# Function for plotting confusion matrix
def plot_confusion_matrix(cm, title='Confusion matrix', cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(encoder.classes_))
    plt.xticks(tick_marks, encoder.classes_, rotation=45)
    plt.yticks(tick_marks, encoder.classes_)
    
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], '.1f'),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# Function for print statistic in simple approach: using x, y, z axis as features
def print_statistic(data, model_file, model_name, linear=False, gyroscope=False):
    
    # Load model
    with open(os.path.join("models", f"{model_file}.pcl"), "rb") as file:
        model = pickle.load(file)
    
    print(f"Model: {model_name}")
    
    # Transform target values
    data['event_label'] = data.apply(change_label, axis=1)
    
    # Filter accelerometer x, y, z, values
    data['x_accelerometer_fil'] = savgol_filter(data['x_accelerometer'].values, 51, 5)
    data['y_accelerometer_fil'] = savgol_filter(data['y_accelerometer'].values, 51, 5)
    data['z_accelerometer_fil'] = savgol_filter(data['z_accelerometer'].values, 51, 5)
    
    # Add acceleration feature
    data['acceleration_fil'] = np.sqrt(data['x_accelerometer_fil'] ** 2 + data['y_accelerometer_fil'] ** 2 + data['z_accelerometer_fil'] ** 2)
    
    if linear:
        # Scale accelerometer x, y, z, values and acceleration feature
        data['x_accelerometer_fil'] = x_scaler.transform(data['x_accelerometer_fil'].values.reshape(-1, 1))
        data['y_accelerometer_fil'] = y_scaler.transform(data['y_accelerometer_fil'].values.reshape(-1, 1))
        data['z_accelerometer_fil'] = z_scaler.transform(data['z_accelerometer_fil'].values.reshape(-1, 1))
        data['acceleration_fil'] = np.sqrt(
            data['x_accelerometer_fil'] ** 2 + data['y_accelerometer_fil'] ** 2 + data[
                'z_accelerometer_fil'] ** 2)
    
    # Create list with columns which will be used for predicting
    valid_columns = ["x_accelerometer_fil", "y_accelerometer_fil", "z_accelerometer_fil", "acceleration_fil"]
    
    if gyroscope:
        # Filter gyroscope x, y, z, values
        data['x_gyroscope_fil'] = savgol_filter(data['x_gyroscope'].values, 31, 4)
        data['y_gyroscope_fil'] = savgol_filter(data['y_gyroscope'].values, 31, 4)
        data['z_gyroscope_fil'] = savgol_filter(data['z_gyroscope'].values, 31, 4)
        valid_columns += ["x_gyroscope_fil", "y_gyroscope_fil", "z_gyroscope_fil"]
    
    # Predict
    y_pred = encoder.inverse_transform(model.predict(data[valid_columns]))
    
    # Calculate confusion matrix
    cm = confusion_matrix(data["event_label"].values, y_pred)
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    
    # Calculate precision by each class
    precision_scores = precision_score(encoder.transform(data["event_label"].values), encoder.transform(y_pred), average=None)
    for i, clasS in enumerate(encoder.classes_):
        print(f"Precision score for class {clasS} is {precision_scores[i]}")
    
    print()
    
    # Calculate recall by each class
    recall_scores = recall_score(encoder.transform(data["event_label"].values), encoder.transform(y_pred), average=None)
    for i, clasS in enumerate(encoder.classes_):
        print(f"Recall score for class {clasS} is {recall_scores[i]}")
    
    # Plot confusion matrix
    plt.figure(figsize=(7, 7))
    plot_confusion_matrix(cm)

In [None]:
val = pd.read_csv('data/val_filtered_accelerometer.csv')
print_statistic(val, "svc_time", "SVC", linear=True)

In [None]:
val = pd.read_csv('data/val_filtered_accelerometer.csv')
print_statistic(val, "lightgbm_time", "LIGHTGBM")

In [None]:
val_acc = pd.read_csv('data/val_filtered_accelerometer.csv')
val_gyr = pd.read_csv('data/val_filtered_gyroscope.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "svc_gyroscope_time", "SVC", linear=True, gyroscope=True)

In [None]:
val_acc = pd.read_csv('data/val_filtered_accelerometer.csv')
val_gyr = pd.read_csv('data/val_filtered_gyroscope.csv')
val = pd.concat([val_acc, val_gyr.drop(["event"], axis=1)], axis=1)
print_statistic(val, "GRADIENTBC_gyroscope_time", "GRADIENT BOOSTING", gyroscope=True)