In [4]:
import warnings
warnings.filterwarnings("ignore")

In [11]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.models import Model
from keras.layers import Input, Dense

# EDA 데이터 경로 설정
eda_folder = "EDA_preprocessed/Output_EDA/"

# 사용할 사람 ID 및 시청 영상 종류 리스트
train_ids = ['001', '005', '006', '009', '010', '011', '012', '013', '015', '021', '022', '023',
             '024', '026', '027', '028', '031', '032', '033', '036', '038', '041', '042', '044',
             '045', '047', '053', '054', '055', '060', '062', '063', '064', '066', '070', '072',
             '073', '075', '076', '077', '078']
test_ids = ['002', '003', '007', '017', '018', '019', '020', '025', '029', '030', '039', '043',
            '048', '052', '059', '065', '068', '074']

# 데이터프레임 초기화
X_train = pd.DataFrame()
X_test = pd.DataFrame()
y_train = []
y_test = []

# 데이터 전처리
data_train = []
data_test = []

# 전체 데이터셋에 대한 표준화
scaler = StandardScaler()

for id in train_ids:
    for emotion in ['happy', 'fear', 'sad', 'neutral', 'anger', 'disgust']:
        file_path = f"{eda_folder}/{id}/{id}_{emotion}.csv"
        data = pd.read_csv(file_path, header=None)
        scaled_data = scaler.fit_transform(data)
        data_train.append(scaled_data)
        y_train.append(emotion)  

for id in test_ids:
    for emotion in ['happy', 'fear', 'sad', 'neutral', 'anger', 'disgust']:
        file_path = f"{eda_folder}/{id}/{id}_{emotion}.csv"
        data = pd.read_csv(file_path, header=None)
        scaled_data = scaler.transform(data)
        data_test.append(scaled_data)
        y_test.append(emotion)  



In [12]:
# 데이터를 각각의 리스트에 추가
for i in range(len(train_ids)):
    for j in range(6):
        features = np.concatenate((np.array([train_ids[i]]), data_train[i][j].flatten(), np.array([j])), axis=None)
        X_train = X_train.append(pd.Series(features), ignore_index=True)

for i in range(len(test_ids)):
    for j in range(6):
        features = np.concatenate((np.array([test_ids[i]]), data_test[i][j].flatten(), np.array([j])), axis=None)
        X_test = X_test.append(pd.Series(features), ignore_index=True)


In [13]:
print(X_train.dtypes)
print(X_test.dtypes)

0    object
1    object
2    object
dtype: object
0    object
1    object
2    object
dtype: object


In [14]:
# 데이터 유형 변환
X_train = X_train.astype(float)
X_test = X_test.astype(float)

In [15]:
# 라벨 데이터 변환
label_mapping = {'happy': 0, 'fear': 1, 'sad': 2, 'neutral': 3, 'anger': 4, 'disgust': 5}
y_train = [label_mapping[y] for y in y_train]
y_test = [label_mapping[y] for y in y_test]

In [16]:
print(X_train.dtypes)
print(X_test.dtypes)

0    float64
1    float64
2    float64
dtype: object
0    float64
1    float64
2    float64
dtype: object


In [46]:
# Autoencoder 모델 정의
input_dim = X_train.shape[1]
encoding_dim = 64 

input_layer = Input(shape=(input_dim,))
encoder = Dense(encoding_dim, activation='relu')(input_layer)
decoder = Dense(input_dim, activation='sigmoid')(encoder)

autoencoder = Model(input_layer, decoder)
autoencoder.compile(optimizer='adam', loss='mse')

# 모델 학습
autoencoder.fit(X_train, X_train, epochs=15, batch_size=32, shuffle=True, validation_data=(X_test, X_test))

# 특징 추출
encoder_model = Model(input_layer, encoder)
encoded_X_train = encoder_model.predict(X_train)
encoded_X_test = encoder_model.predict(X_test)


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [47]:
# 결과 확인
print("Encoded Train data:")
print(encoded_X_train)
print("Encoded Test data:")
print(encoded_X_test)

Encoded Train data:
[[ 0.          0.18429577  0.         ...  0.          0.
   0.3257595 ]
 [ 0.2583171   0.27240455  0.         ...  0.          0.
   0.11707751]
 [ 0.6630129   1.2244285   0.         ...  0.          0.
   0.        ]
 ...
 [ 0.         11.246413    0.         ...  0.          0.
  21.727383  ]
 [ 0.         11.390205    0.         ...  0.          0.
  21.475027  ]
 [ 0.         11.3948      0.         ...  0.          0.
  21.331844  ]]
Encoded Test data:
[[ 0.         0.         0.        ...  0.         0.         7.8865905]
 [ 0.         0.         0.        ...  0.         0.         7.6779084]
 [ 0.         0.         0.        ...  0.         0.         7.4692264]
 ...
 [ 0.        94.99119    9.540165  ...  0.        45.31725    0.       ]
 [ 0.        95.49677    9.553176  ...  0.        45.25236    0.       ]
 [ 0.        95.584885   9.516296  ...  0.        44.931145   0.       ]]
