## EEG data grouping

In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 경로 설정
eeg_folder_path = '/Users/sh_oh/Library/CloudStorage/Dropbox/Data/2023-1/BDP/ECSMP_Dataset/EEG_test_csv'

# 파일 목록 가져오기
file_list = os.listdir(eeg_folder_path)

# 그룹별로 파일들을 그룹화
grouped_files = {}
for file_name in file_list:
    group_key = file_name[:11]
    if group_key == ".DS_Store":  # .DS_Store 그룹은 건너뜁니다
        continue
    if group_key not in grouped_files:
        grouped_files[group_key] = []
    grouped_files[group_key].append(file_name)

In [2]:
# 새로운 데이터를 저장할 리스트
new_data = []

# 그룹별로 데이터 처리
for key, file_names in grouped_files.items():
    # 그룹 내 파일들을 읽어와 데이터 리스트에 추가
    group_data = []
    for file_name in file_names:
        file_path = os.path.join(eeg_folder_path, file_name)
        df = pd.read_csv(file_path, encoding='latin1')  # 인코딩 변경
        channel_data = df.iloc[:, 0].values  # 첫 번째 열의 데이터만 사용
        group_data.append(channel_data)
    
    # 그룹 데이터를 평균하여 새로운 데이터 생성
    new_group_data = np.mean(group_data, axis=0)
    new_data.append(new_group_data)

# 저장할 폴더 경로
output_folder_path = '/Users/sh_oh/Library/CloudStorage/Dropbox/Data/2023-1/BDP/ECSMP_Dataset/EEG_test2_csv'

# 데이터를 CSV 파일로 저장
for i, group_data in enumerate(new_data):
    file_name = f"group_{i+1}.csv"
    file_path = os.path.join(output_folder_path, file_name)
    pd.DataFrame(group_data).to_csv(file_path, index=False)

In [3]:
# 그룹 개수 출력
print(f"그룹 개수: {len(grouped_files)}")

# 그룹 이름 출력 (정렬하여)
for group_name in sorted(grouped_files.keys()):
    print(group_name)

그룹 개수: 108
002_video_1
002_video_2
002_video_3
002_video_4
002_video_5
002_video_6
003_video_1
003_video_2
003_video_3
003_video_4
003_video_5
003_video_6
007_video_1
007_video_2
007_video_3
007_video_4
007_video_5
007_video_6
017_video_1
017_video_2
017_video_3
017_video_4
017_video_5
017_video_6
018_video_1
018_video_2
018_video_3
018_video_4
018_video_5
018_video_6
019_video_1
019_video_2
019_video_3
019_video_4
019_video_5
019_video_6
020_video_1
020_video_2
020_video_3
020_video_4
020_video_5
020_video_6
025_video_1
025_video_2
025_video_3
025_video_4
025_video_5
025_video_6
029_video_1
029_video_2
029_video_3
029_video_4
029_video_5
029_video_6
030_video_1
030_video_2
030_video_3
030_video_4
030_video_5
030_video_6
039_video_1
039_video_2
039_video_3
039_video_4
039_video_5
039_video_6
043_video_1
043_video_2
043_video_3
043_video_4
043_video_5
043_video_6
048_video_1
048_video_2
048_video_3
048_video_4
048_video_5
048_video_6
052_video_1
052_video_2
052_video_3
052_video_4
052_v

# EEG preprocessing

In [4]:
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 경로 설정
eeg_folder_path = '/Users/sh_oh/Library/CloudStorage/Dropbox/Data/2023-1/BDP/ECSMP_Dataset/EEG_test2_csv'

# 파일 목록 가져오기
file_list = os.listdir(eeg_folder_path)

# 전체 데이터를 저장할 리스트
data = []

# 가장 긴 데이터 길이를 기준으로 zero-padding
max_length = 0

# 데이터 읽어오기 및 전처리
for file_name in file_list:
    file_path = os.path.join(eeg_folder_path, file_name)
    df = pd.read_csv(file_path)
    channel_data = df.iloc[:, 0].values  # 첫 번째 열의 데이터만 사용
    data.append(channel_data)
    if len(channel_data) > max_length:
        max_length = len(channel_data)

# Zero-padding
padded_data = pad_sequences(data, maxlen=max_length, padding='post')

# 데이터 정규화
scaler = MinMaxScaler()
normalized_data = scaler.fit_transform(padded_data)

# Denoising AutoEncoder(DAE) model

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, Reshape
from tensorflow.keras.optimizers.legacy import Adam

# 가우시안 노이즈 추가
def add_gaussian_noise(data, noise_factor):
    noise = np.random.normal(loc=0.0, scale=noise_factor, size=data.shape)
    noisy_data = data + noise
    return noisy_data

In [6]:
# Denoising Autoencoder 모델 생성
def create_denoising_autoencoder(input_shape, encoding_dim):
    input_layer = Input(shape=input_shape)
    
    # Encoder
    encoded = Flatten()(input_layer)
    encoded = Dense(encoding_dim, activation='relu')(encoded)

    # Decoder
    decoded = Dense(np.prod(input_shape), activation='sigmoid')(encoded)
    decoded = Reshape(input_shape)(decoded)

    # Autoencoder
    autoencoder = Model(inputs=input_layer, outputs=decoded)
    autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

    return autoencoder

In [7]:
# 입력 데이터의 형태
input_shape = normalized_data.shape[1:]
encoding_dim = 64

# 가우시안 노이즈 추가
noise_factor = 0.5
noisy_data = add_gaussian_noise(normalized_data, noise_factor)

# Denoising Autoencoder 모델 생성
autoencoder = create_denoising_autoencoder(input_shape, encoding_dim)

# 모델 학습
autoencoder.fit(noisy_data, normalized_data, epochs=50, batch_size=6, shuffle=True, verbose=1)

Metal device set to: Apple M2 Pro
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2a79ab400>

# feature extraction

In [9]:
# 특징 추출 (인코더의 출력)
encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('dense_1').output)
eeg_test_features = encoder.predict(normalized_data)



In [10]:
print(eeg_test_features.dtype)

float32


In [12]:
np.savetxt('/Users/sh_oh/Library/CloudStorage/Dropbox/Data/2023-1/BDP/ECSMP_Dataset/eeg_test_features.txt', eeg_test_features)