In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pywt
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.decomposition import PCA
import cupy as cp
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import classification_report

import tensorflow as tf

In [2]:
df = pd.read_csv('data_5528.csv')

In [3]:
df = df.loc[:2200]

In [4]:
df_label = pd.read_csv('55285839-9b78-48d8-9f4e-573190ace016_data.csv')

In [5]:
df_label = df_label.loc[:2200]

In [6]:
df['waveform_y']

0       -0.087890625,-0.05859375,-0.0703125,-0.0644531...
1       -0.064453125,-0.08203125,-0.076171875,-0.06445...
2       -0.029296875,-0.10546875,-0.064453125,-0.07031...
3       -0.052734375,-0.0703125,-0.076171875,-0.070312...
4       -0.1640625,0.0234375,0.029296875,-0.1875,-0.16...
                              ...                        
2196    -0.09375,-0.1171875,-0.052734375,-0.10546875,-...
2197    -0.05859375,-0.099609375,-0.08203125,-0.070312...
2198    -0.10546875,-0.064453125,-0.03515625,-0.099609...
2199    -0.076171875,-0.09375,-0.076171875,-0.09960937...
2200    -0.052734375,-0.076171875,-0.08203125,-0.08203...
Name: waveform_y, Length: 2201, dtype: object

In [7]:
def convert_to_float_list(string):
    return [float(x) for x in string.split(',')]

In [8]:
y = df_label['imbalance_health'].values
y = np.where(np.isnan(y), 1, y)
y = np.where(y != 1, 0, 1)

In [9]:
df_wf = pd.DataFrame({
    'waveform_y': df['waveform_y'].apply(convert_to_float_list),
    'label_imbalance': y
})

In [10]:
df_wf[df_wf['label_imbalance'] == 1]

Unnamed: 0,waveform_y,label_imbalance
0,"[-0.087890625, -0.05859375, -0.0703125, -0.064...",1
1,"[-0.064453125, -0.08203125, -0.076171875, -0.0...",1
2,"[-0.029296875, -0.10546875, -0.064453125, -0.0...",1
3,"[-0.052734375, -0.0703125, -0.076171875, -0.07...",1
4,"[-0.1640625, 0.0234375, 0.029296875, -0.1875, ...",1
...,...,...
2196,"[-0.09375, -0.1171875, -0.052734375, -0.105468...",1
2197,"[-0.05859375, -0.099609375, -0.08203125, -0.07...",1
2198,"[-0.10546875, -0.064453125, -0.03515625, -0.09...",1
2199,"[-0.076171875, -0.09375, -0.076171875, -0.0996...",1


In [11]:
# df_wf에서 waveform_y 컬럼을 리스트 형태로 가져옴
waveforms = df_wf['waveform_y'].tolist()

# 모웨이블릿을 적용한 결과를 저장할 리스트
cwt_results = []

# 스케일 정의 (1부터 128까지로 설정, 필요에 따라 조정 가능)
scales = np.arange(1, 129)

# 각 waveform에 대해 CWT 적용
for waveform in waveforms:
    # Continuous Wavelet Transform (CWT) 적용
    coeffs, _ = pywt.cwt(waveform, scales, 'cmor1.5-1.0')  # 'cmor'는 Morlet wavelet
    # 절대값을 취해줌
    abs_coeffs = np.abs(coeffs)
    cwt_results.append(abs_coeffs)

# CWT 결과를 데이터프레임에 추가
df_wf['cwt_coeffs'] = cwt_results

In [12]:
df_wf

Unnamed: 0,waveform_y,label_imbalance,cwt_coeffs
0,"[-0.087890625, -0.05859375, -0.0703125, -0.064...",1,"[[0.005660165070963101, 0.0015715324074112322,..."
1,"[-0.064453125, -0.08203125, -0.076171875, -0.0...",1,"[[0.005532719353985204, 0.003508863269946021, ..."
2,"[-0.029296875, -0.10546875, -0.064453125, -0.0...",1,"[[0.004905662966923438, 0.005416642706699189, ..."
3,"[-0.052734375, -0.0703125, -0.076171875, -0.07...",1,"[[0.004693400056746271, 0.003505735446197863, ..."
4,"[-0.1640625, 0.0234375, 0.029296875, -0.1875, ...",1,"[[0.00575370414230734, 0.007534620875979414, 0..."
...,...,...,...
2196,"[-0.09375, -0.1171875, -0.052734375, -0.105468...",1,"[[0.007731850096245601, 0.0033256198913022028,..."
2197,"[-0.05859375, -0.099609375, -0.08203125, -0.07...",1,"[[0.005910241371313618, 0.004638971681497294, ..."
2198,"[-0.10546875, -0.064453125, -0.03515625, -0.09...",1,"[[0.006401865942190834, 0.0009896956679941885,..."
2199,"[-0.076171875, -0.09375, -0.076171875, -0.0996...",1,"[[0.006377724838375217, 0.003683375175303733, ..."


In [13]:
df_wf['cwt_coeffs'][0].shape

(128, 3200)

In [14]:
# 데이터 준비
X = np.array(df_wf['cwt_coeffs'].tolist())  # cwt_coeffs를 numpy 배열로 변환
y = np.array(df_wf['label_imbalance'])  # label (1: 정상, 0: 비정상)

# 데이터의 shape (samples, height, width, channels)에 맞게 reshape
X = X.reshape((X.shape[0], X.shape[1], X.shape[2], 1))  # 마지막 1은 채널 수 (1)

# train-test split (80% 학습, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CNN 모델 설계
model = Sequential()

# Conv2D Layer 1
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(128, 3200, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Conv2D Layer 2
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Conv2D Layer 3
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Fully connected layer
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))

# Output layer
model.add(Dense(1, activation='sigmoid'))  # 이진 분류

# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x24c0082a250>

In [21]:
# 모델 평가
y_pred = (model.predict(X_test) > 0.9).astype("int32")
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.08      0.16      0.10        25
           1       0.95      0.88      0.91       416

    accuracy                           0.84       441
   macro avg       0.51      0.52      0.51       441
weighted avg       0.90      0.84      0.87       441

