In [25]:
import pandas as pd
import numpy as np

from pathlib import Path
from model import LSTMModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import make_pipeline

In [26]:
# Define paths and filenames
DATA_PATH = Path('../results/')
DATASET_FNAME = 'dataset_result_feature.csv'

In [27]:
# Load the dataset
try:
    dataset = pd.read_csv(DATA_PATH / DATASET_FNAME, index_col=0)
except IOError as e:
    print('File not found, try to run previous crowdsignals scripts first!')
    raise e
dataset = dataset[1:]

In [28]:
# Separate features and target variables
X = dataset.drop(
    columns=['labelnormal', 'labelturnright', 'labelturnleft', 'labelbrake', 'labelstop', 'labelaccelerate'])
y = dataset[['labelnormal', 'labelturnright', 'labelturnleft', 'labelbrake', 'labelstop', 'labelaccelerate']].copy()

In [29]:
# Ensure the dataset is split in chronological order, assuming data is already sorted by time
# Commented out original splitting by index to use train_test_split for randomized split
# train_size = int(0.7 * len(dataset))  # Use 70% of the data for training
# X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
# y_train, y_test = y['label'].iloc[:train_size], y['label'].iloc[train_size:]
# print(len(X_train), len(X_test))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [30]:
from collections import Counter

pipeline = make_pipeline(
    SMOTE(sampling_strategy='all', random_state=42, k_neighbors=5),  # 调整 SMOTE 的参数
    RandomUnderSampler(sampling_strategy='not minority', random_state=42)  # 调整 RandomUnderSampler 的参数
)

# 运行pipeline进行重采样
X_train, y_train = pipeline.fit_resample(X_train, y_train.values)
y_train_labels = np.argmax(y_train, axis=1)
print(Counter(y_train_labels))

Counter({0: 1790, 1: 1790, 2: 1790, 3: 1790, 4: 1790, 5: 1790})


In [31]:
# Convert DataFrames to numpy arrays
X_train = np.array(X_train)
X_test = np.array(X_test)

# Reshape X_train and X_test for LSTM input
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [32]:
# Define the model
model = LSTMModel(X_train, y_train, lr=1e-3)

In [33]:
# Train the model
model.train(epochs=100, batch_size=64)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [34]:
# Use model to predit
y_pred = model.predict(X_test)



In [35]:
# Convert multi-label to single label
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test.values, axis=1)

In [36]:
# Evaluate model performance
accuracy = accuracy_score(y_test_labels, y_pred_labels)
print("Model Accuracy with Selected Features:", accuracy)
print("Classification Report:\n", classification_report(y_test_labels, y_pred_labels))

Model Accuracy with Selected Features: 0.8976293103448276
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.92      0.94       771
           1       0.69      0.74      0.71        50
           2       0.53      0.93      0.67        43
           3       0.57      0.53      0.55        15
           4       0.87      0.94      0.90        35
           5       0.70      0.50      0.58        14

    accuracy                           0.90       928
   macro avg       0.72      0.76      0.73       928
weighted avg       0.91      0.90      0.90       928

