In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier

In [8]:
# 데이터 불러오기 및 가공
data = pd.read_csv('processingData.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data['Timestamp'] = data['Timestamp'].astype('int64') // 10**9
data = data[['Timestamp', 'DI_uiSpeed', 'DI_vehicleSpeed']]

# 데이터 정규화
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data)
print(f"data_normalized.shape: {data_normalized.shape}")

# 시퀀스 생성
sequence_length = 150  # 시퀀스 길이
X = []
y = []

for i in range(len(data_normalized) - sequence_length):
    X.append(data_normalized[i:i+sequence_length].flatten())  # 시퀀스 데이터를 평탄화
    y.append(data_normalized[i+sequence_length, 1])  # Physical_value를 타겟으로 사용

X = np.array(X)
y = np.array(y)

# 데이터셋을 학습용과 테스트용으로 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

data_normalized.shape: (10492, 3)


In [15]:
data = pd.read_csv('merged_data_final.csv')
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data['Timestamp'] = data['Timestamp'].astype('int64') // 10**9
data = data[['Timestamp', 'DI_uiSpeed', 'DI_vehicleSpeed','Physical_value','acc_value']]

# 2. 데이터 정규화
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data)
print(f"data_normalized.shape: {data_normalized.shape}")

# 3. 시퀀스 생성
sequence_length = 150  # 시퀀스 길이
X = []
y = []

# acc_value를 타겟으로 설정 (data_normalized의 마지막 열)
for i in range(len(data_normalized) - sequence_length):
    X.append(data_normalized[i:i+sequence_length, :-1].flatten())  # acc_value를 제외하고 시퀀스 데이터를 1차원으로 변환
    y.append(data_normalized[i+sequence_length, -1])  # acc_value를 타겟으로 사용

X = np.array(X)
y = np.array(y)

data_normalized.shape: (10492, 5)


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Random Forest 모델 학습
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)  # Random Forest 모델 생성
rf_model.fit(X_train, y_train)  # 모델 학습

# 6. 모델 예측
y_pred = rf_model.predict(X_test)  # 테스트 데이터에 대한 예측값 생성

# 7. 평가 지표 계산
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# 8. 결과 출력
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

Accuracy: 0.9990
Precision: 0.9981
Recall: 1.0000
F1-Score: 0.9991
