In [1]:
import numpy as np
import pandas as pd

from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical
from sklearn.utils import resample
from model import LSTMModel
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline
from collections import Counter
import numpy as np

In [2]:
# Define paths and filenames
DATA_PATH = Path('../results/')
DATASET_FNAME = 'dataset1_result_feature.csv'

In [3]:
# Load the dataset
try:
    dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0)
except IOError as e:
    print('File not found, try to run previous crowdsignals scripts first!')
    raise e

In [4]:
# Separate features and target variables
X = dataset.drop(
    columns=['labelnormal', 'labelturnright', 'labelturnleft', 'labelbrake', 'labelstop', 'labelaccelerate'])
y = dataset[['labelnormal', 'labelturnright', 'labelturnleft', 'labelbrake', 'labelstop', 'labelaccelerate']].copy()

In [5]:
# Ensure the dataset is split in chronological order, assuming data is already sorted by time
# Commented out original splitting by index to use train_test_split for randomized split
# train_size = int(0.7 * len(dataset))  # Use 70% of the data for training
# X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
# y_train, y_test = y['label'].iloc[:train_size], y['label'].iloc[train_size:]
# print(len(X_train), len(X_test))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
pipeline = make_pipeline(
    SMOTE(sampling_strategy='all', random_state=42, k_neighbors=5),  # 调整 SMOTE 的参数
    RandomUnderSampler(sampling_strategy='not minority', random_state=42)  # 调整 RandomUnderSampler 的参数
)

# 运行pipeline进行重采样
X_train, y_train = pipeline.fit_resample(X_train, y_train.values)

In [7]:
# Convert DataFrames to numpy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reshape X_train and X_test for LSTM input
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [8]:
# Define the model
model = LSTMModel(X_train, y_train)

  super().__init__(**kwargs)


In [13]:
# Train the model
model.train(epochs=10, batch_size=32)

Epoch 1/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9484 - loss: 0.1450
Epoch 2/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9515 - loss: 0.1459
Epoch 3/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9535 - loss: 0.1321
Epoch 4/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9507 - loss: 0.1358
Epoch 5/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9517 - loss: 0.1533
Epoch 6/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9582 - loss: 0.1260
Epoch 7/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9611 - loss: 0.1162
Epoch 8/10
[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9640 - loss: 0.1123
Epoch 9/10
[1m165/165[0m [32m━━━━━━━━

In [14]:
# Use model to predit
y_pred = model.predict(X_test)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


In [15]:
# Convert multi-label to single label
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

In [16]:
# Evaluate model performance
accuracy = accuracy_score(y_test_labels, y_pred_labels)
print("Model Accuracy with Selected Features and Grid Search:", accuracy)
print("Classification Report:\n", classification_report(y_test_labels, y_pred_labels))

Model Accuracy with Selected Features and Grid Search: 0.9235807860262009
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95       389
           1       0.73      0.85      0.79        13
           2       0.76      0.90      0.83        29
           3       0.54      0.88      0.67         8
           4       0.80      1.00      0.89        12
           5       0.46      0.86      0.60         7

    accuracy                           0.92       458
   macro avg       0.71      0.90      0.79       458
weighted avg       0.94      0.92      0.93       458

