In [8]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 경로 설정
eeg_folder_path = './EEG_test2_csv'

# 파일 목록 가져오기
file_list = os.listdir(eeg_folder_path)

# 전체 데이터를 저장할 리스트
data = []

# 가장 긴 데이터 길이를 기준으로 zero-padding
max_length = 0

# 데이터 읽어오기 및 전처리
for file_name in file_list:
    file_path = os.path.join(eeg_folder_path, file_name)
    df = pd.read_csv(file_path)
    channel_data = df.iloc[:, 0].values  # 첫 번째 열의 데이터만 사용
    data.append(channel_data)
    if len(channel_data) > max_length:
        max_length = len(channel_data)

# Zero-padding
padded_data = pad_sequences(data, maxlen=max_length, padding='post')

# 데이터 정규화
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(padded_data)

In [9]:
# 데이터 타입 확인
print("Data Type:", type(normalized_data))

# 데이터 일부 출력
print(normalized_data.shape)
print("Sample Data:")
print(normalized_data[:5])  # 예시로 처음 5개 데이터 출력

Data Type: <class 'numpy.ndarray'>
(108, 78001)
Sample Data:
[[0.76928072 0.77643409 0.77903351 ... 0.4722262  0.47287204 0.        ]
 [0.40940554 0.4020989  0.40253573 ... 0.4722262  0.47287204 0.        ]
 [0.93878004 0.93195201 0.93625845 ... 0.4722262  0.47287204 0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.84144584 0.84324744 0.84380392 ... 0.4722262  0.47287204 0.        ]]


In [10]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape, Conv1D, Flatten, Conv1DTranspose
from tensorflow.keras.optimizers.legacy import Adam

# 가우시안 노이즈 추가
def add_gaussian_noise(data, noise_factor):
    noise = np.random.normal(loc=0.0, scale=noise_factor, size=data.shape)
    noisy_data = data + noise
    return noisy_data

In [15]:
# Denoising Autoencoder 모델 생성
def create_denoising_autoencoder(input_shape, encoding_dim):
    input_layer = Input(shape=input_shape)

    # Encoder
    encoded = Flatten()(input_layer)
    encoded = Dense(128, activation='relu')(encoded)
    encoded = Dense(64, activation='relu')(encoded)
    encoded = Dense(32, activation='relu')(encoded)
    encoded = Dense(encoding_dim, activation='relu')(encoded)

    # Decoder
    decoded = Dense(32, activation='relu')(encoded)
    decoded = Dense(64, activation='relu')(decoded)
    decoded = Dense(128, activation='relu')(decoded)
    decoded = Dense(np.prod(input_shape), activation='sigmoid')(decoded)
    decoded = Reshape(input_shape)(decoded)

    # Autoencoder
    autoencoder = Model(inputs=input_layer, outputs=decoded)
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    # Encoder
    encoder = Model(inputs=input_layer, outputs=encoded)

    return autoencoder, encoder

# 입력 데이터의 형태
input_shape = normalized_data.shape[1:]
encoding_dim = 64

# 가우시안 노이즈 추가
noise_factor = 0.5
noisy_data = add_gaussian_noise(normalized_data, noise_factor)

# Denoising Autoencoder 모델 생성
autoencoder, encoder = create_denoising_autoencoder(input_shape, encoding_dim)

# 모델 학습
autoencoder.fit(noisy_data, normalized_data, epochs=30, batch_size=16, shuffle=True, verbose=1)

# 특징 추출 (인코더의 출력)
eeg_test_features = encoder.predict(normalized_data)
print(eeg_test_features.dtype) # float32
print(eeg_test_features.shape) # (108, 64) 이어야 함


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
float32
(108, 64)


In [16]:
eeg_test_features

array([[ 0.       ,  0.       ,  0.       , ...,  0.       ,  0.       ,
        39.582573 ],
       [ 0.       ,  0.       ,  0.       , ...,  0.       ,  1.9774508,
        13.7914915],
       [ 2.7080767,  0.       ,  0.       , ...,  0.       ,  0.       ,
        54.24855  ],
       ...,
       [ 0.       ,  0.       ,  0.       , ...,  0.       ,  0.       ,
         7.134301 ],
       [ 0.       ,  0.       ,  0.       , ...,  0.       ,  0.       ,
        47.751797 ],
       [ 0.       ,  0.       ,  0.       , ...,  0.       ,  0.       ,
        41.317375 ]], dtype=float32)

In [17]:
df_encoded_eeg = pd.DataFrame(eeg_test_features)

# Save the DataFrame to a CSV file
output_file = "eeg_test_features.csv"
df_encoded_eeg.to_csv(output_file, index=False)