In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Dropout

In [7]:
# 1. 데이터 로드 및 전처리
df = pd.read_csv('D:GC/DSA_features.csv')
df

Unnamed: 0,T_xacc_mean,T_xacc_max,T_xacc_min,T_xacc_var,T_xacc_std,T_xacc_skew,T_yacc_mean,T_yacc_max,T_yacc_min,T_yacc_var,...,LL_ymag_std,LL_ymag_skew,LL_zmag_mean,LL_zmag_max,LL_zmag_min,LL_zmag_var,LL_zmag_std,LL_zmag_skew,activity,people
0,7.975714,8.1605,7.6823,0.014395,0.119981,-0.023319,1.083150,1.1832,0.99744,0.002208,...,0.000792,0.177075,-0.057119,-0.054963,-0.059241,6.778722e-07,0.000823,0.036729,sitting,p1
1,7.978250,8.1763,7.8472,0.007551,0.086896,0.552416,1.140865,1.2129,1.05810,0.000784,...,0.000860,-0.286918,-0.057268,-0.054945,-0.059589,7.032302e-07,0.000839,0.347471,sitting,p1
2,7.970894,8.0860,7.8470,0.003092,0.055603,0.100538,1.140962,1.2128,1.07960,0.000508,...,0.000762,-0.134430,-0.057068,-0.054711,-0.059065,6.268222e-07,0.000792,0.045579,sitting,p1
3,7.938412,8.1083,7.6901,0.003763,0.061343,-0.231914,1.165260,1.3170,1.07870,0.002173,...,0.000735,0.021485,-0.056422,-0.053670,-0.058310,8.011245e-07,0.000895,0.240690,sitting,p1
4,7.908930,8.1305,7.8322,0.001741,0.041731,2.042285,1.187504,1.2574,1.09450,0.000662,...,0.000824,-0.148229,-0.055801,-0.053313,-0.057815,6.853423e-07,0.000828,0.258429,sitting,p1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9115,8.280854,34.1980,-2.9038,28.080803,5.299132,1.350075,-1.491537,11.2240,-11.65100,14.670334,...,0.200829,-0.040701,0.297666,0.708480,-0.117430,4.135451e-02,0.203358,-0.310022,basketBall,p8
9116,9.591118,51.6970,-3.4129,35.722025,5.976791,2.981144,0.086304,6.9951,-11.76400,5.329897,...,0.148745,-0.266377,0.224716,0.554670,-0.250950,3.355704e-02,0.183186,-0.736410,basketBall,p8
9117,9.599113,27.9300,-1.0765,48.850886,6.989341,0.449237,-0.728367,3.7801,-8.36910,5.683022,...,0.310748,-0.009505,-0.237786,0.088854,-0.477260,2.026107e-02,0.142341,0.668438,basketBall,p8
9118,9.692482,72.7820,-2.6734,59.378336,7.705734,4.491114,-0.582724,6.1216,-8.85710,4.162963,...,0.156493,0.050624,0.533023,0.677800,0.055941,1.356379e-02,0.116464,-1.482489,basketBall,p8


In [None]:
# 'people' 열은 삭제합니다.
df = df.drop(columns=['people'])

# 레이블 인코딩 ('activity' 레이블만 인코딩)
label_encoder = LabelEncoder()
df['activity'] = label_encoder.fit_transform(df['activity'])


In [6]:





# 특징(X)과 타겟(y) 분리
X = df.drop(columns=['activity'])
y = df['activity']

# 데이터를 훈련(90%), 테스트(10%) 세트로 나눕니다.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# NumPy 배열로 변환 (시퀀스 생성을 위해 필요)
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

# 4. 시퀀스 데이터를 생성하기 위한 함수 정의 (n_steps = 10로 설정)
def split_sequences(sequences, n_steps):
    X, y = [], []
    for i in range(len(sequences)):
        end_ix = i + n_steps
        if end_ix > len(sequences):
            break
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

# 시퀀스 생성 (n_steps=10)
n_steps = 10
X_train_seq, y_train_seq = split_sequences(np.hstack([X_train, y_train[:, None]]), n_steps)
X_test_seq, y_test_seq = split_sequences(np.hstack([X_test, y_test[:, None]]), n_steps)

print(f"훈련 시퀀스 데이터 크기: {X_train_seq.shape}")
print(f"테스트 시퀀스 데이터 크기: {X_test_seq.shape}")

# 5. CNN + LSTM 모델 구축
model = Sequential()

# 1D CNN Layer (특징 추출)
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_steps, X_train_seq.shape[2])))
model.add(MaxPooling1D(pool_size=1))  # MaxPooling의 크기를 1로 조정하여 차원 축소를 방지

# LSTM Layer (시퀀스 패턴 학습)
model.add(LSTM(50, activation='relu'))

# Fully Connected Layer (분류기)
model.add(Dense(50, activation='relu'))

# Output Layer (활동을 예측하는 softmax 레이어)
model.add(Dense(len(np.unique(y_train)), activation='softmax'))

# 6. 모델 컴파일
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 모델 구조 확인
model.summary()

# 7. 모델 훈련 (검증 데이터를 훈련 데이터의 10%로 설정)
history = model.fit(X_train_seq, y_train_seq, epochs=10, validation_split=0.1)

# 8. 모델 평가
loss, accuracy = model.evaluate(X_test_seq, y_test_seq)
print(f"테스트 세트 성능 - 손실: {loss}, 정확도: {accuracy}")


훈련 시퀀스 데이터 크기: (8199, 10, 270)
테스트 시퀀스 데이터 크기: (903, 10, 270)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - accuracy: 0.2275 - loss: 2.6275 - val_accuracy: 0.6598 - val_loss: 0.9486
Epoch 2/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.7163 - loss: 0.7491 - val_accuracy: 0.8159 - val_loss: 0.5037
Epoch 3/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8488 - loss: 0.3997 - val_accuracy: 0.8439 - val_loss: 0.4276
Epoch 4/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8893 - loss: 0.3062 - val_accuracy: 0.8951 - val_loss: 0.2811
Epoch 5/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9180 - loss: 0.2334 - val_accuracy: 0.9122 - val_loss: 0.2656
Epoch 6/10
[1m231/231[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.9356 - loss: 0.1742 - val_accuracy: 0.9390 - val_loss: 0.1695
Epoch 7/10
[1m231/231[0m 