In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError

In [6]:
data_dir = "data/PdM/"
data = pd.read_csv(data_dir + "total.csv")


  data = torch.load('sig_datasets.pt')


In [4]:
# 'created_at'을 datetime 형식으로 변환
data['created_at_datetime'] = pd.to_datetime(data['created_at_datetime'])

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 87506 entries, 0 to 87505
Data columns (total 48 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   asset_id             87506 non-null  object        
 1   created_at           87506 non-null  int64         
 2   created_at_datetime  87506 non-null  datetime64[ns]
 3   temperature          87506 non-null  int64         
 4   rms_x                87506 non-null  float64       
 5   rms_y                87506 non-null  float64       
 6   rms_z                87506 non-null  float64       
 7   rms_xyz              87506 non-null  float64       
 8   vel_rms_x            87506 non-null  float64       
 9   vel_rms_y            87506 non-null  float64       
 10  vel_rms_z            87506 non-null  float64       
 11  vel_rms_xyz          87506 non-null  float64       
 12  skewness_x           87506 non-null  float64       
 13  skewness_y           87506 non-

In [6]:
# 데이터 리샘플링 (2시간 단위로) 및 보간 함수 정의
def resample_and_interpolate(group):
    # 'created_at'을 인덱스로 설정
    group = group.set_index('created_at_datetime')
    # 숫자 데이터만 선택하여 리샘플링
    numeric_data = group.select_dtypes(include=[np.number])
    # 2시간 단위로 리샘플링 후 평균 계산
    resampled_data = numeric_data.resample('2H').mean()
    # 결측값 보간
    resampled_data = resampled_data.interpolate(method='linear')
    return resampled_data

# 각 asset_id 별로 그룹화하여 리샘플링
data_resampled = data.groupby('asset_id').apply(resample_and_interpolate).reset_index()
print(data_resampled.head())  # 결과 확인


                               asset_id created_at_datetime    created_at  \
0  02dc4105-ca5e-4770-a6fc-d1fdff11fc1c 2024-01-09 00:00:00  1.704733e+09   
1  02dc4105-ca5e-4770-a6fc-d1fdff11fc1c 2024-01-09 02:00:00  1.704740e+09   
2  02dc4105-ca5e-4770-a6fc-d1fdff11fc1c 2024-01-09 04:00:00  1.704747e+09   
3  02dc4105-ca5e-4770-a6fc-d1fdff11fc1c 2024-01-09 06:00:00  1.704754e+09   
4  02dc4105-ca5e-4770-a6fc-d1fdff11fc1c 2024-01-09 08:00:00  1.704761e+09   

   temperature     rms_x     rms_y     rms_z   rms_xyz  vel_rms_x  vel_rms_y  \
0          9.0  0.046038  0.063046  0.062820  0.100203   0.603069   0.733438   
1          9.0  0.040017  0.044373  0.032458  0.067999   0.474454   0.589077   
2          9.0  0.014427  0.013719  0.019351  0.027764   0.235237   0.224910   
3          9.0  0.014610  0.013812  0.019520  0.028022   0.252305   0.207443   
4         10.0  0.013999  0.013907  0.019003  0.027395   0.228062   0.199309   

   ...  peak2peak_y  peak2peak_z  vel_peak2peak_x  vel_p

In [None]:
# 특징과 라벨 분리
features = ['rms_x', 'rms_y', 'rms_z', 'rms_xyz', 'vel_rms_x', 'vel_rms_y', 'vel_rms_z', 'vel_rms_xyz',
                      'skewness_x', 'skewness_y', 'skewness_z', 'vel_skewness_x', 'vel_skewness_y', 'vel_skewness_z',
                      'kurtosis_x', 'kurtosis_y', 'kurtosis_z', 'vel_kurtosis_x', 'vel_kurtosis_y', 'vel_kurtosis_z',
                      'crest_factor_x', 'crest_factor_y', 'crest_factor_z', 'vel_crest_factor_x', 'vel_crest_factor_y', 'vel_crest_factor_z',
                      'peak_x', 'peak_y', 'peak_z', 'vel_peak_x', 'vel_peak_y', 'vel_peak_z',
                      'peak2peak_x', 'peak2peak_y', 'peak2peak_z', 'vel_peak2peak_x', 'vel_peak2peak_y', 'vel_peak2peak_z']
labels = ['imbalance_health', 'misalignment_health', 'looseness_health', 'bearing_health', 'asset_health']

X = data_resampled[features]
y = data_resampled[labels]

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 시퀀스 데이터
n_timesteps = 12  # 24시간 (12개의 2시간 단위 데이터)
n_features = len(features)

# 시계열 데이터 형식으로 변환
X_sequences = []
y_sequences = []
for asset_id, group in data_resampled.groupby('asset_id'):
    for i in range(len(group) - n_timesteps):
        X_seq = group[features].iloc[i:i + n_timesteps].values
        y_seq = group[labels].iloc[i + n_timesteps].values
        X_sequences.append(X_seq)
        y_sequences.append(y_seq)

X_sequences = np.array(X_sequences)
y_sequences = np.array(y_sequences)


In [None]:
# 학습, 검증, 테스트 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# 모델 정의
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features)),
    MaxPooling1D(pool_size=2),
    Conv1D(filters=128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(5)  # 다중 회귀를 위한 5개의 출력 뉴런
])

# 모델 컴파일
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=[RootMeanSquaredError(name="rmse")])

In [None]:
# 모델 학습
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# 학습 과정 시각화
plt.plot(history.history['rmse'], label='Training RMSE')
plt.plot(history.history['val_rmse'], label='Validation RMSE')
plt.xlabel('Epoch')
plt.ylabel('RMSE')
plt.legend()
plt.show()


In [None]:
# 평가
test_loss, test_rmse = model.evaluate(X_test, y_test)
print(f"Test RMSE: {test_rmse}")


In [None]:
# 모델 예측
y_pred = model.predict(X_test)
y_pred = np.clip(y_pred, 0, 1)

# 시각화
plt.figure(figsize=(10, 20))

# 각 레이블에 대해 시각화
for i in range(5):
    plt.subplot(5, 1, i+1)  # 5행 1열의 서브플롯 생성
    plt.plot(y_test[:, i], label=f'True Label {i+1}', alpha=0.7)
    plt.plot(y_pred[:, i], label=f'Predicted Label {i+1}', alpha=0.7)
    plt.xlabel('Samples')
    plt.ylabel('Health Metric')
    plt.title(f'Label {i+1} Comparison')
    plt.legend()

plt.tight_layout()
plt.show()
