### 1. Baseline
* original dataset

In [1]:
import numpy as np
import os

MFCC = 13 # num of MFCC features

mfcc_folder = '../dataset/mfcc_features'
np_file = os.listdir(mfcc_folder)

# prepare data
X = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
y = np.zeros(len(np_file), dtype=int)  # (n_samples, )
# each group is one video
filenames = set()
groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

# load form directory
for i, file in enumerate(np_file):
    mfcc = np.load(os.path.join(mfcc_folder, file))
    X[i] = mfcc.mean(axis=1)    # (MFCC, )
    # 0: normal driving, 1: car crash
    label = 0 if int(file.split('_')[0]) < 5 else 1
    y[i] = label    
    filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
    # search with set to find unique value
    if filename not in filenames:
        filenames.add(filename)
        groups[i] = len(filenames)
    else:
        groups[i] = list(filenames).index(filename)

print(X[0])  # print the first sample
print(y[0])  # print the label of the first sample
print(groups[0])  # print the group of the first sample

[-398.98666382  113.03168488  -11.29673195   27.68769264    3.6051693
   11.02839375   -4.29368401    7.7390089     3.64515638    5.37020397
   -3.23544407    9.0578289    -4.63530731]
0
1


In [2]:
# random seed
import random
random.seed(0)
seed = 0

In [3]:
from sklearn.model_selection import GroupKFold
group_kfold = GroupKFold(n_splits=5) # 5-fold
print(group_kfold.get_n_splits(X, y, groups))  # print number of splits

5


In [4]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf = GradientBoostingClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    clf.fit(X[train_index], y[train_index])
    # testing
    y_pred = clf.predict(X[test_index])
    y_pred_prob = clf.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf.predict(X[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_prob)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.24390243902439024
Precision: 1.0
Recall: 0.20512820512820512
F1 Score: 0.3404255319148936
ROC AUC: 0.9358974358974359
Confusion Matrix:
[[ 4  0]
 [62 16]]

Fold 1:
Training Accuracy: 1.0
Validation Accuracy: 0.9390243902439024
Precision: 0.6428571428571429
Recall: 1.0
F1 Score: 0.782608695652174
ROC AUC: 0.9863013698630136
Confusion Matrix:
[[68  5]
 [ 0  9]]

Fold 2:
Training Accuracy: 1.0
Validation Accuracy: 0.7439024390243902
Precision: 0.2857142857142857
Recall: 0.8888888888888888
F1 Score: 0.43243243243243246
ROC AUC: 0.821917808219178
Confusion Matrix:
[[53 20]
 [ 1  8]]

Fold 3:
Training Accuracy: 1.0
Validation Accuracy: 0.926829268292683
Precision: 0.6923076923076923
Recall: 0.8181818181818182
F1 Score: 0.75
ROC AUC: 0.9731113956466069
Confusion Matrix:
[[67  4]
 [ 2  9]]

Fold 4:
Training Accuracy: 1.0
Validation Accuracy: 0.7195121951219512
Precision: 0.23076923076923078
Recall: 0.6666666666666666
F1 Score: 0.34285714285

In [5]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.1061 seconds
Average time per batch: 1.06 ms
Average time per sample: 0.01 ms
Inference speed: 77277.53 samples/second


2. DataAug

In [6]:
def read_dataAug(mfcc_folder):
    np_file = os.listdir(mfcc_folder)

    # prepare data
    X_aug = np.zeros((len(np_file), MFCC)) # (n_samples, n_features)
    y_aug = np.zeros(len(np_file), dtype=int)  # (n_samples, )
    # each group is one video
    filenames = set()
    groups = np.zeros(len(np_file), dtype=int)  # (n_samples, )

    # load form directory
    for i, file in enumerate(np_file):
        mfcc = np.load(os.path.join(mfcc_folder, file))
        X_aug[i] = mfcc.mean(axis=1)    # (MFCC, )
        # 0: normal driving, 1: car crash
        label = 0 if int(file.split('_')[1]) < 5 else 1
        y_aug[i] = label    
        filename = file[file.find('_') + 1:file.rfind('_')]  # segment is the second part of the filename
        # search with set to find unique value
        if filename not in filenames:
            filenames.add(filename)
            groups[i] = len(filenames)
        else:
            groups[i] = list(filenames).index(filename)

    return X_aug, y_aug

if __name__ == "__main__":
    mfcc_folder = '../dataset/aug_mfcc_features'
    X_aug, y_aug = read_dataAug(mfcc_folder)
    print(X_aug[0])  # print the first sample
    print(y_aug[0])  # print the label of the first sample
    print(groups[0])  # print the group of the first sample

[-270.81295776  -15.02099133   38.02788925   13.21731472   10.0347805
    4.48674297    3.34693432    3.60442758    4.46276379    3.18351889
    2.26930118    1.81936967    0.86694759]
0
1


In [7]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_aug = GradientBoostingClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    print(f"Fold {i}:")
    # training
    X_combined = np.vstack((X[train_index].copy(), X_aug.copy()))
    y_combined = np.hstack((y[train_index].copy(), y_aug.copy()))
    
    clf_aug.fit(X_combined, y_combined)
    
    # testing
    y_pred = clf_aug.predict(X[test_index])
    y_pred_proba = clf_aug.predict_proba(X[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y_combined, clf_aug.predict(X_combined))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 0.9986449864498645
Validation Accuracy: 0.36585365853658536
Precision: 1.0
Recall: 0.3333333333333333
F1 Score: 0.5
ROC AUC: 0.9743589743589743
Confusion Matrix:
[[ 4  0]
 [52 26]]

Fold 1:
Training Accuracy: 0.994579945799458
Validation Accuracy: 0.926829268292683
Precision: 0.6
Recall: 1.0
F1 Score: 0.75
ROC AUC: 0.9893455098934552
Confusion Matrix:
[[67  6]
 [ 0  9]]

Fold 2:
Training Accuracy: 0.997289972899729
Validation Accuracy: 0.9024390243902439
Precision: 0.5333333333333333
Recall: 0.8888888888888888
F1 Score: 0.6666666666666666
ROC AUC: 0.9604261796042618
Confusion Matrix:
[[66  7]
 [ 1  8]]

Fold 3:
Training Accuracy: 0.9932249322493225
Validation Accuracy: 0.926829268292683
Precision: 0.6923076923076923
Recall: 0.8181818181818182
F1 Score: 0.75
ROC AUC: 0.9846350832266326
Confusion Matrix:
[[67  4]
 [ 2  9]]

Fold 4:
Training Accuracy: 0.9986449864498645
Validation Accuracy: 0.8658536585365854
Precision: 0.4444444444444444
Recall: 0.8888888888888

In [8]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = X[test_index]
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_aug.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_aug.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.1191 seconds
Average time per batch: 1.19 ms
Average time per sample: 0.01 ms
Inference speed: 68822.65 samples/second


3. tsne

In [9]:
from sklearn.manifold import TSNE
# 使用 t-SNE 进行降维
tsne = TSNE(n_components=2, random_state=seed)
X_tsne = tsne.fit_transform(X)

In [10]:
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix

clf_tsne = GradientBoostingClassifier(random_state=seed)

# fit data (using k-fold)
for i, (train_index, test_index) in enumerate(group_kfold.split(X_tsne, y, groups)):
    print(f"Fold {i}:")
    # training
    clf_tsne.fit(X_tsne[train_index], y[train_index])
    
    # testing
    y_pred = clf_tsne.predict(X_tsne[test_index])
    y_pred_proba = clf_tsne.predict_proba(X_tsne[test_index])[:, 1]  # probabilities for ROC AUC

    print(f"Training Accuracy: {accuracy_score(y[train_index], clf_tsne.predict(X_tsne[train_index]))}")
    print(f"Validation Accuracy: {accuracy_score(y[test_index], y_pred)}")
    print(f"Precision: {precision_score(y[test_index], y_pred)}")
    print(f"Recall: {recall_score(y[test_index], y_pred)}")
    print(f"F1 Score: {f1_score(y[test_index], y_pred)}")
    print(f"ROC AUC: {roc_auc_score(y[test_index], y_pred_proba)}")
    print(f"Confusion Matrix:\n{confusion_matrix(y[test_index], y_pred)}")
    print()

Fold 0:
Training Accuracy: 1.0
Validation Accuracy: 0.4024390243902439
Precision: 1.0
Recall: 0.3717948717948718
F1 Score: 0.5420560747663551
ROC AUC: 0.9631410256410255
Confusion Matrix:
[[ 4  0]
 [49 29]]

Fold 1:
Training Accuracy: 0.9725609756097561
Validation Accuracy: 0.8902439024390244
Precision: 0.5
Recall: 0.7777777777777778
F1 Score: 0.6086956521739131
ROC AUC: 0.949771689497717
Confusion Matrix:
[[66  7]
 [ 2  7]]

Fold 2:
Training Accuracy: 0.9847560975609756
Validation Accuracy: 0.7926829268292683
Precision: 0.3181818181818182
Recall: 0.7777777777777778
F1 Score: 0.45161290322580644
ROC AUC: 0.8462709284627093
Confusion Matrix:
[[58 15]
 [ 2  7]]

Fold 3:
Training Accuracy: 0.9725609756097561
Validation Accuracy: 0.8536585365853658
Precision: 0.47058823529411764
Recall: 0.7272727272727273
F1 Score: 0.5714285714285714
ROC AUC: 0.9161331626120358
Confusion Matrix:
[[62  9]
 [ 3  8]]

Fold 4:
Training Accuracy: 0.9786585365853658
Validation Accuracy: 0.9024390243902439
Precis

In [11]:
import time
import numpy as np

# Get a subset of test data for inference
# For this example, I'll use the last fold from the GroupKFold as test data
for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
    if i == 4:  # Use the last fold as test data
        test_data = tsne.fit_transform(X[test_index])
        break

# Warm-up runs
print("Warming up...")
for _ in range(10):
    _ = clf_tsne.predict(test_data)

# Measure inference time
print("Measuring inference time...")
n_iterations = 100
start_time = time.time()

for _ in range(n_iterations):
    _ = clf_tsne.predict(test_data)

end_time = time.time()

# Calculate statistics
total_time = end_time - start_time
avg_time_per_batch = total_time / n_iterations
avg_time_per_sample = total_time / (n_iterations * len(test_data))

print(f"Total time for {n_iterations} iterations: {total_time:.4f} seconds")
print(f"Average time per batch: {avg_time_per_batch*1000:.2f} ms")
print(f"Average time per sample: {avg_time_per_sample*1000:.2f} ms")
print(f"Inference speed: {n_iterations * len(test_data) / total_time:.2f} samples/second")


Warming up...
Measuring inference time...
Total time for 100 iterations: 0.0919 seconds
Average time per batch: 0.92 ms
Average time per sample: 0.01 ms
Inference speed: 89192.37 samples/second
