In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, Flatten
from sklearn.model_selection import train_test_split

from keras.models import Model
from keras.layers import Input, Dense

In [2]:
# 1. 데이터 로드 및 전처리
# rms_x, rms_y, ..., vel_peak2peak_z 38개 피처
# 타임스텝마다 데이터가 있다고 가정

df = pd.read_csv('data/PdM/55285839-9b78-48d8-9f4e-573190ace016_data.csv') # 데이터 로드

In [3]:
df = df.groupby('time')[['rms_x', 'rms_y', 'rms_z', 'rms_xyz', 'vel_rms_x', 'vel_rms_y', 'vel_rms_z', 'vel_rms_xyz',
                      'skewness_x', 'skewness_y', 'skewness_z', 'vel_skewness_x', 'vel_skewness_y', 'vel_skewness_z',
                      'kurtosis_x', 'kurtosis_y', 'kurtosis_z', 'vel_kurtosis_x', 'vel_kurtosis_y', 'vel_kurtosis_z',
                      'crest_factor_x', 'crest_factor_y', 'crest_factor_z', 'vel_crest_factor_x', 'vel_crest_factor_y', 'vel_crest_factor_z',
                      'peak_x', 'peak_y', 'peak_z', 'vel_peak_x', 'vel_peak_y', 'vel_peak_z',
                      'peak2peak_x', 'peak2peak_y', 'peak2peak_z', 'vel_peak2peak_x', 'vel_peak2peak_y', 'vel_peak2peak_z', 'imbalance_health']].mean().reset_index()

In [4]:
# 1. 데이터 로드 및 전처리
# rms_x, rms_y, ..., vel_peak2peak_z 38개 피처
# 타임스텝마다 데이터가 있다고 가정


X = df.iloc[:, 1:39].values  # 피처만 추출
y = df.iloc[:, 39:40].values # imbalance
#y = np.where(np.isnan(y), 1, y) # nan을 0(비정상)으로
y = np.where(y != 1, 0, 1) 

# 피처 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [5]:
# 2. 훈련 및 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [6]:
# 3. 모델 구축
input_layer = Input(shape=(X_train.shape[1],))
encoder = Dense(32, activation='relu')(input_layer)
decoder = Dense(X_train.shape[1], activation='sigmoid')(encoder)

autoencoder = Model(input_layer, decoder)
autoencoder.compile(optimizer='adam', loss='mse')


In [7]:
# 2D 데이터를 3D로 변환 (time_steps=1)
X_train = np.expand_dims(X_train, axis=-1)  # shape: (5724, 1, 38)
X_train.shape

(168, 38, 1)

In [8]:
# 학습
autoencoder.fit(X_train, X_train, epochs=50, batch_size=256, shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x23ba0e47850>

In [9]:
from sklearn.metrics import classification_report

# 테스트 데이터에 대한 예측
y_pred = autoencoder.predict(X_test)

# MSE (Mean Squared Error)로 재구성 오차 계산
mse = np.mean(np.power(X_test - y_pred, 2), axis=1)  # 각 샘플의 MSE 계산

# 임계값 설정 (예: 재구성 오차의 95번째 백분위수)
threshold = np.percentile(mse, 97.5)

# 이상치 판별
y_pred_classes = np.where(mse > threshold, 0, 1)  # 1은 이상치, 0은 정상

# y_test를 1차원으로 변환
y_test_flat = y_test.flatten()  # (1432, 1) -> (1432,)

# 분류 성능 보고서 출력
print("\n분류 성능 보고서:")
print(classification_report(y_test_flat, y_pred_classes, target_names=["비정상", "정상"]))


분류 성능 보고서:
              precision    recall  f1-score   support

         비정상       0.00      0.00      0.00         4
          정상       0.90      0.95      0.92        38

    accuracy                           0.86        42
   macro avg       0.45      0.47      0.46        42
weighted avg       0.81      0.86      0.84        42

