### 1. Baseline
* original dataset

In [12]:
import numpy as np
import os

MFCC = 13 # num of MFCC features

mfcc_folder = '../dataset/mfcc_features'
np_file = os.listdir(mfcc_folder)

# prepare data
X = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
y = np.zeros(len(np_file), dtype=int)  # (n_samples, )
# each group is one video
filenames = set()
groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

# load form directory
for i, file in enumerate(np_file):
    mfcc = np.load(os.path.join(mfcc_folder, file))
    X[i] = mfcc.mean(axis=1)    # (MFCC, )
    # 0: normal driving, 1: car crash
    label = 0 if int(file.split('_')[0]) < 5 else 1
    y[i] = label    
    filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
    # search with set to find unique value
    if filename not in filenames:
        filenames.add(filename)
        groups[i] = len(filenames)
    else:
        groups[i] = list(filenames).index(filename)

print(X[0])  # print the first sample
print(y[0])  # print the label of the first sample
print(groups[0])  # print the group of the first sample

[-398.98666382  113.03168488  -11.29673195   27.68769264    3.6051693
   11.02839375   -4.29368401    7.7390089     3.64515638    5.37020397
   -3.23544407    9.0578289    -4.63530731]
0
1


In [13]:
# random seed
import random
random.seed(0)
seed = 0

In [14]:
from sklearn.model_selection import GroupKFold
group_kfold = GroupKFold(n_splits=5) # 5-fold
print(group_kfold.get_n_splits(X, y, groups))  # print number of splits

5


In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf = RandomForestClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    # print([train_index])
    clf.fit(X[train_index], y[train_index])
    # testing
    y_pred = clf.predict(X[test_index])
    y_pred_prob = clf.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf.predict(X[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_prob)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.17073170731707318
Precision: 1.0
Recall: 0.11688311688311688
F1 Score: 0.20930232558139536
ROC AUC: 0.761038961038961
Confusion Matrix:
[[ 5  0]
 [68  9]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.9512195121951219
Precision: 0.75
Recall: 0.9
F1 Score: 0.8181818181818182
ROC AUC: 0.9812500000000001
Confusion Matrix:
[[69  3]
 [ 1  9]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.9878048780487805
Precision: 1.0
Recall: 0.9
F1 Score: 0.9473684210526315
ROC AUC: 0.9805555555555555
Confusion Matrix:
[[72  0]
 [ 1  9]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.9024390243902439
Precision: 0.5714285714285714
Recall: 0.8
F1 Score: 0.6666666666666666
ROC AUC: 0.9402777777777778
Confusion Matrix:
[[66  6]
 [ 2  8]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.7926829268292683
Precision: 0.34615384615384615
Recall: 1.0
F1 Score: 0.5142857142857142
ROC AUC: 0.9490106544901066
Confusion Matrix:
[[56 1

In [16]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 1.5761 seconds
Average time per batch: 15.76 ms
Average time per sample: 0.19 ms
Inference speed: 5202.75 samples/second


2. DataAug

In [17]:
def read_dataAug(mfcc_folder):
    np_file = os.listdir(mfcc_folder)

    # prepare data
    X_aug = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
    y_aug = np.zeros(len(np_file), dtype=int)  # (n_samples, )
    # each group is one video
    filenames = set()
    groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

    # load form directory
    for i, file in enumerate(np_file):
        mfcc = np.load(os.path.join(mfcc_folder, file))
        X_aug[i] = mfcc.mean(axis=1)    # (MFCC, )
        # 0: normal driving, 1: car crash
        label = 0 if int(file.split('_')[1]) < 5 else 1
        y_aug[i] = label    
        filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
        # search with set to find unique value
        if filename not in filenames:
            filenames.add(filename)
            groups[i] = len(filenames)
        else:
            groups[i] = list(filenames).index(filename)

    return X_aug, y_aug

if __name__ == "__main__":
    mfcc_folder = '../dataset/aug_mfcc_features'
    X_aug, y_aug = read_dataAug(mfcc_folder)
    print(X_aug[0])  # print the first sample
    print(y_aug[0])  # print the label of the first sample
    print(groups[0])  # print the group of the first sample

[-270.81295776  -15.02099133   38.02788925   13.21731472   10.0347805
    4.48674297    3.34693432    3.60442758    4.46276379    3.18351889
    2.26930118    1.81936967    0.86694759]
0
1


In [18]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_aug = RandomForestClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    # print(train_index)
    X_combined = np.vstack((X[train_index].copy(), X_aug.copy()))
    y_combined = np.hstack((y[train_index].copy(), y_aug.copy()))
    
    clf_aug.fit(X_combined, y_combined)
    
    # testing
    y_pred = clf_aug.predict(X[test_index])
    y_pred_proba = clf_aug.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y_combined, clf_aug.predict(X_combined))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.25609756097560976
Precision: 1.0
Recall: 0.2077922077922078
F1 Score: 0.34408602150537637
ROC AUC: 0.7298701298701299
Confusion Matrix:
[[ 5  0]
 [61 16]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.9878048780487805
Precision: 1.0
Recall: 0.9
F1 Score: 0.9473684210526315
ROC AUC: 1.0
Confusion Matrix:
[[72  0]
 [ 1  9]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.975609756097561
Precision: 1.0
Recall: 0.8
F1 Score: 0.8888888888888888
ROC AUC: 0.9986111111111111
Confusion Matrix:
[[72  0]
 [ 2  8]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.9634146341463414
Precision: 0.8888888888888888
Recall: 0.8
F1 Score: 0.8421052631578947
ROC AUC: 0.9611111111111111
Confusion Matrix:
[[71  1]
 [ 2  8]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.8292682926829268
Precision: 0.391304347826087
Recall: 1.0
F1 Score: 0.5625
ROC AUC: 0.9596651445966514
Confusion Matrix:
[[59 14]
 [ 0  9]]



In [19]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_aug.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_aug.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 1.9151 seconds
Average time per batch: 19.15 ms
Average time per sample: 0.23 ms
Inference speed: 4281.84 samples/second


3. tsne

In [20]:
from sklearn.manifold import TSNE
# 使用 t-SNE 进行降维
tsne = TSNE(n_components=2, random_state=seed)
X_tsne = tsne.fit_transform(X)

In [21]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_tsne = RandomForestClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X_tsne, y, groups)):
    print(f"Fold {i}:")
    # training
    clf_tsne.fit(X_tsne[train_index], y[train_index])
    
    # testing
    y_pred = clf_tsne.predict(X_tsne[test_index])
    y_pred_proba = clf_tsne.predict_proba(X_tsne[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf_tsne.predict(X_tsne[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:


Training Accuracy: 1.0
Validation Accuracy: 0.43902439024390244
Precision: 0.9696969696969697
Recall: 0.4155844155844156
F1 Score: 0.5818181818181818
ROC AUC: 0.6961038961038961
Confusion Matrix:
[[ 4  1]
 [45 32]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.9024390243902439
Precision: 0.5555555555555556
Recall: 1.0
F1 Score: 0.7142857142857143
ROC AUC: 0.9451388888888889
Confusion Matrix:
[[64  8]
 [ 0 10]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.9512195121951219
Precision: 0.875
Recall: 0.7
F1 Score: 0.7777777777777778
ROC AUC: 0.9840277777777777
Confusion Matrix:
[[71  1]
 [ 3  7]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.8414634146341463
Precision: 0.42105263157894735
Recall: 0.8
F1 Score: 0.5517241379310345
ROC AUC: 0.9375
Confusion Matrix:
[[61 11]
 [ 2  8]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.7195121951219512
Precision: 0.23076923076923078
Recall: 0.6666666666666666
F1 Score: 0.34285714285714286
ROC AUC: 0.77397260273972

In [22]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = tsne.fit_transform(X[test_index])
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_tsne.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_tsne.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 1.6883 seconds
Average time per batch: 16.88 ms
Average time per sample: 0.21 ms
Inference speed: 4857.00 samples/second
