### 1. Baseline
* original dataset

In [23]:
import numpy as np
import os

MFCC = 13 # num of MFCC features

mfcc_folder = '../dataset/mfcc_features'
np_file = os.listdir(mfcc_folder)

# prepare data
X = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
y = np.zeros(len(np_file), dtype=int)  # (n_samples, )
# each group is one video
filenames = set()
groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

# load form directory
for i, file in enumerate(np_file):
    mfcc = np.load(os.path.join(mfcc_folder, file))
    X[i] = mfcc.mean(axis=1)    # (MFCC, )
    # 0: normal driving, 1: car crash
    label = 0 if int(file.split('_')[0]) < 5 else 1
    y[i] = label    
    filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
    # search with set to find unique value
    if filename not in filenames:
        filenames.add(filename)
        groups[i] = len(filenames)
    else:
        groups[i] = list(filenames).index(filename)

print(X[0])  # print the first sample
print(y[0])  # print the label of the first sample
print(groups[0])  # print the group of the first sample

[-398.98666382  113.03168488  -11.29673195   27.68769264    3.6051693
   11.02839375   -4.29368401    7.7390089     3.64515638    5.37020397
   -3.23544407    9.0578289    -4.63530731]
0
1


In [11]:
# random seed
import random
random.seed(0)
np.random.seed(0)
seed = 0

In [12]:
from sklearn.model_selection import GroupKFold
group_kfold = GroupKFold(n_splits=5) # 5-fold
print(group_kfold.get_n_splits(X, y, groups))  # print number of splits

5


In [13]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf = tree.DecisionTreeClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    clf.fit(X[train_index], y[train_index])
    # testing
    y_pred = clf.predict(X[test_index])
    y_pred_proba = clf.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf.predict(X[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.5233644859813084
Precision: 1.0
Recall: 0.32894736842105265
F1 Score: 0.49504950495049505
ROC AUC: 0.6644736842105263
Confusion Matrix:
[[31  0]
 [51 25]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.8552631578947368
Precision: 0.5
Recall: 0.9090909090909091
F1 Score: 0.6451612903225806
ROC AUC: 0.8776223776223776
Confusion Matrix:
[[55 10]
 [ 1 10]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.6710526315789473
Precision: 0.25
Recall: 0.8888888888888888
F1 Score: 0.3902439024390244
ROC AUC: 0.7653399668325042
Confusion Matrix:
[[43 24]
 [ 1  8]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.7631578947368421
Precision: 0.34615384615384615
Recall: 0.9
F1 Score: 0.5
ROC AUC: 0.8212121212121213
Confusion Matrix:
[[49 17]
 [ 1  9]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.84
Precision: 0.4444444444444444
Recall: 0.8
F1 Score: 0.5714285714285714
ROC AUC: 0.823076923076923
Confusion Matrix:
[[55 1

In [14]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.0286 seconds
Average time per batch: 0.29 ms
Average time per sample: 0.00 ms
Inference speed: 262520.28 samples/second


2. Data augmenation

In [25]:
def read_dataAug(mfcc_folder):
    np_file = os.listdir(mfcc_folder)

    # prepare data
    X_aug = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
    y_aug = np.zeros(len(np_file), dtype=int)  # (n_samples, )
    # each group is one video
    filenames = set()
    groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

    # load form directory
    for i, file in enumerate(np_file):
        mfcc = np.load(os.path.join(mfcc_folder, file))
        X_aug[i] = mfcc.mean(axis=1)    # (MFCC, )
        # 0: normal driving, 1: car crash
        label = 0 if int(file.split('_')[1]) < 5 else 1
        y_aug[i] = label    
        filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
        # search with set to find unique value
        if filename not in filenames:
            filenames.add(filename)
            groups[i] = len(filenames)
        else:
            groups[i] = list(filenames).index(filename)

    return X_aug, y_aug

if __name__ == "__main__":
    mfcc_folder = '../dataset/aug_mfcc_features'
    X_aug, y_aug = read_dataAug(mfcc_folder)
    print(X_aug[0])  # print the first sample
    print(y_aug[0])  # print the label of the first sample
    print(groups[0])  # print the group of the first sample

[-270.81295776  -15.02099133   38.02788925   13.21731472   10.0347805
    4.48674297    3.34693432    3.60442758    4.46276379    3.18351889
    2.26930118    1.81936967    0.86694759]
0
1


In [28]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_aug = tree.DecisionTreeClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    X_combined = np.vstack((X[train_index].copy(), X_aug.copy()))
    y_combined = np.hstack((y[train_index].copy(), y_aug.copy()))
    
    clf_aug.fit(X_combined, y_combined)
    
    # testing
    y_pred = clf_aug.predict(X[test_index])
    y_pred_proba = clf_aug.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y_combined, clf_aug.predict(X_combined))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.5327102803738317
Precision: 1.0
Recall: 0.34210526315789475
F1 Score: 0.5098039215686274
ROC AUC: 0.6710526315789473
Confusion Matrix:
[[31  0]
 [50 26]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.881578947368421
Precision: 0.5555555555555556
Recall: 0.9090909090909091
F1 Score: 0.6896551724137931
ROC AUC: 0.893006993006993
Confusion Matrix:
[[57  8]
 [ 1 10]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.7105263157894737
Precision: 0.27586206896551724
Recall: 0.8888888888888888
F1 Score: 0.42105263157894735
ROC AUC: 0.7877280265339967
Confusion Matrix:
[[46 21]
 [ 1  8]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.7894736842105263
Precision: 0.38461538461538464
Recall: 1.0
F1 Score: 0.5555555555555556
ROC AUC: 0.8787878787878788
Confusion Matrix:
[[50 16]
 [ 0 10]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.7866666666666666
Precision: 0.35
Recall: 0.7
F1 Score: 0.4666666666666667
ROC AUC:

In [29]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_aug.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_aug.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.0333 seconds
Average time per batch: 0.33 ms
Average time per sample: 0.00 ms
Inference speed: 225314.47 samples/second


3. tsne

In [31]:
from sklearn.manifold import TSNE
# 使用 t-SNE 进行降维
tsne = TSNE(n_components=2, random_state=seed)
X_tsne = tsne.fit_transform(X)

In [32]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_tsne = tree.DecisionTreeClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X_tsne, y, groups)):
    print(f"Fold {i}:")
    # training
    clf_tsne.fit(X_tsne[train_index], y[train_index])
    
    # testing
    y_pred = clf_tsne.predict(X_tsne[test_index])
    y_pred_proba = clf_tsne.predict_proba(X_tsne[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf_tsne.predict(X_tsne[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.616822429906542
Precision: 0.972972972972973
Recall: 0.47368421052631576
F1 Score: 0.6371681415929203
ROC AUC: 0.7207130730050934
Confusion Matrix:
[[30  1]
 [40 36]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.9473684210526315
Precision: 0.8181818181818182
Recall: 0.8181818181818182
F1 Score: 0.8181818181818182
ROC AUC: 0.8937062937062937
Confusion Matrix:
[[63  2]
 [ 2  9]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.631578947368421
Precision: 0.1935483870967742
Recall: 0.6666666666666666
F1 Score: 0.3
ROC AUC: 0.6467661691542288
Confusion Matrix:
[[42 25]
 [ 3  6]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.8421052631578947
Precision: 0.4375
Recall: 0.7
F1 Score: 0.5384615384615384
ROC AUC: 0.7818181818181819
Confusion Matrix:
[[57  9]
 [ 3  7]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.8266666666666667
Precision: 0.4117647058823529
Recall: 0.7
F1 Score: 0.5185185185185185
ROC AUC: 0

In [34]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = tsne.fit_transform(X[test_index])
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_tsne.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_tsne.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.0339 seconds
Average time per batch: 0.34 ms
Average time per sample: 0.00 ms
Inference speed: 221463.08 samples/second
