In [41]:
import os
import re
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [51]:
# LSTM_Live 폴더의 모든 CSV 파일 병합
def LSTM_Live_Merge(folder_path):
    """
    폴더 및 하위 폴더에 있는 모든 CSV 파일을 로드하여 하나의 DataFrame으로 병합합니다.

    Args:
        folder_path (str): CSV 파일이 저장된 폴더 경로.

    Returns:
        pd.DataFrame: 병합된 DataFrame.
    """
    csv_files = []

    # 하위 폴더까지 탐색
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.csv'):
                csv_path = os.path.join(root, file)
                
                # CSV 불러오기
                df = pd.read_csv(csv_path)
                csv_files.append(df)

    # 모든 CSV 파일 병합
    if csv_files:
        combined_df = pd.concat(csv_files, ignore_index=True)
        print(f"총 {len(csv_files)}개의 CSV 파일을 병합했습니다. 데이터 크기: {combined_df.shape}")
        return combined_df
    else:
        print("병합할 CSV 파일이 없습니다.")
        return pd.DataFrame()


In [52]:
def natural_sort_key(s):
    """자연스러운 정렬을 위한 키 생성 함수"""
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)]

def LSTM_Capture_Merge(folder_path, sequence):
    """
    폴더 및 하위 폴더에 있는 모든 CSV 파일을 폴더 및 파일명 기준 오름차순으로 로드하여 하나의 DataFrame으로 병합합니다.
    단, CSV 파일의 개수가 sequence의 배수인 폴더만 병합합니다.

    Args:
        folder_path (str): CSV 파일이 저장된 폴더 경로.
        sequence (int): CSV 파일 개수가 이 수의 배수여야 병합합니다.

    Returns:
        pd.DataFrame: 병합된 DataFrame.
    """
    csv_files = []

    for root, _, files in sorted(os.walk(folder_path), key=lambda x: natural_sort_key(x[0])):
        sorted_files = sorted([file for file in files if file.endswith('.csv')], key=natural_sort_key)
        file_count = len(sorted_files)
        
        if file_count == 0:
            continue  # CSV 파일이 없는 폴더는 건너뜀
        
        if file_count % sequence != 0:
            print(f"[제외] {root}: CSV 파일 수 {file_count}개는 {sequence}의 배수가 아닙니다.")
            continue  # sequence의 배수가 아니면 병합 제외

        for file in sorted_files:
            csv_path = os.path.join(root, file)
            csv_files.append(csv_path)

    if not csv_files:
        print("병합할 CSV 파일이 없습니다.")
        return pd.DataFrame()
    
    # CSV 파일 병합 (float_precision 추가)
    dataframes = [pd.read_csv(file, float_precision='round_trip') for file in csv_files]
    combined_df = pd.concat(dataframes, ignore_index=True)
    print(f"총 {len(csv_files)}개의 CSV 파일을 병합했습니다. 데이터 크기: {combined_df.shape}")
    return combined_df

In [53]:
# 데이터 시퀀스별로 변환
def reshape_to_sequences(data, labels, seq_length):
    """
    데이터를 시퀀스 형태로 변환합니다.
    Args:
        data (np.array): 키포인트 데이터.
        labels (np.array): 레이블 데이터.
        seq_length (int): 시퀀스 길이.
    Returns:
        np.array, np.array: 시퀀스화된 입력 데이터와 레이블.
    """
    sequences = []
    sequence_labels = []
    for i in range(len(data) - seq_length + 1):
        sequences.append(data[i:i + seq_length])
        sequence_labels.append(labels[i + seq_length - 1])  # 시퀀스의 마지막 레이블 사용
    return np.array(sequences), np.array(sequence_labels)

In [58]:
# 데이터 불러오기 및 병합
folder_path = './Data'  # 데이터 폴더 경로
df_live = LSTM_Live_Merge(folder_path + '/LSTM_Live') # LSTM_Live 폴더 병합
df_capture = LSTM_Capture_Merge(folder_path + '/LSTM_Capture', sequence=3) # LSTM_Capture 폴더 병합
df= pd.concat([df_live, df_capture], ignore_index=True) # 전체 폴더 병합

총 51개의 CSV 파일을 병합했습니다. 데이터 크기: (1476, 35)
총 6개의 CSV 파일을 병합했습니다. 데이터 크기: (6, 35)


In [59]:
# 데이터프레임 확인
df

Unnamed: 0,kp0_x,kp0_y,kp1_x,kp1_y,kp2_x,kp2_y,kp3_x,kp3_y,kp4_x,kp4_y,...,kp12_y,kp13_x,kp13_y,kp14_x,kp14_y,kp15_x,kp15_y,kp16_x,kp16_y,action_class
0,504.748352,174.018814,515.478699,165.320679,496.802307,164.987915,533.271973,169.938446,486.754608,168.247803,...,323.480652,517.671753,398.759491,472.511292,389.608002,501.476654,457.165405,478.328369,440.453247,0
1,493.893311,167.112579,505.612885,158.779816,486.140564,158.240402,522.142822,168.031372,475.116486,165.450317,...,332.408905,514.570679,405.486908,458.247192,400.161682,509.301849,452.652832,473.916595,442.841217,0
2,489.443970,184.714722,501.096130,175.484283,481.513306,174.429993,520.108032,178.772324,471.632446,175.447479,...,340.180481,506.248840,416.903564,466.605835,407.960449,495.446808,452.529724,478.228882,438.963867,0
3,504.438934,182.662781,514.850159,173.607452,496.023224,172.890991,530.308594,177.233246,484.224304,175.232208,...,326.477722,520.327271,397.660522,466.075043,391.556549,504.693970,446.558716,474.512299,441.921783,0
4,506.670929,164.630676,516.756531,157.412125,498.996033,156.697311,531.835144,160.829529,487.479858,158.457733,...,311.421844,515.506836,383.558777,476.523590,371.399200,506.702728,436.349396,487.770660,414.044739,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1477,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,284.087372,120.812088,0.000000,0.000000,...,170.815277,290.997467,196.064270,291.476349,192.157150,307.372986,226.044540,304.511932,220.732727,2
1478,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,291.872925,124.161087,0.000000,0.000000,...,173.845215,295.106537,199.819305,294.047119,193.109634,307.697021,230.016510,303.347900,220.787003,2
1479,252.043289,109.688896,0.000000,0.000000,248.737198,107.836449,0.000000,0.000000,0.000000,0.000000,...,151.474121,251.780411,175.948883,247.387833,175.644180,255.462875,205.097855,252.283371,204.398911,2
1480,254.473282,112.512146,256.610779,110.128609,251.741089,110.181183,0.000000,0.000000,247.274139,111.595650,...,155.516266,254.698898,180.479706,249.408005,179.833176,261.246094,204.217148,259.570312,204.109390,2


In [46]:
# X, y 분리
X = df.iloc[:, :-1].values # 키포인트
y = df.iloc[:, -1].values # 클래스

In [47]:
# 레이블 원-핫 인코딩
y = to_categorical(y)

  x = np.array(x, dtype="int64")


IndexError: index -9223372036854775808 is out of bounds for axis 1 with size 221

In [30]:
# 데이터 시퀀스 길이 지정
seq_length = 3  # 시퀀스 길이

# 데이터를 시퀀스 형태로 변환
X_seq, y_seq = reshape_to_sequences(X, y, seq_length)

In [38]:
# 데이터 분할
X_train, X_val, y_train, y_val = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

In [39]:
# LSTM 모델 정의
model = Sequential([
    LSTM(128, input_shape=(seq_length, X_train.shape[2]), return_sequences=True),
    LSTM(64),
    Dense(32, activation="relu"),
    Dense(y_train.shape[1], activation="softmax")
    ])

  super().__init__(**kwargs)


In [40]:
 # 모델 컴파일 및 학습
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
es = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 3, mode = 'auto')
# model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=300, batch_size=32, callbacks=es)
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=300, batch_size=32)

Epoch 1/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step - accuracy: 0.3852 - loss: 1.4652 - val_accuracy: 0.4595 - val_loss: 1.2855
Epoch 2/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5145 - loss: 1.2233 - val_accuracy: 0.5777 - val_loss: 1.0715
Epoch 3/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5668 - loss: 1.0612 - val_accuracy: 0.5912 - val_loss: 1.0094
Epoch 4/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6002 - loss: 1.0318 - val_accuracy: 0.5912 - val_loss: 1.0118
Epoch 5/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6170 - loss: 0.9676 - val_accuracy: 0.6318 - val_loss: 0.9027
Epoch 6/300
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6317 - loss: 0.9331 - val_accuracy: 0.6385 - val_loss: 0.9206
Epoch 7/300
[1m37/37[0m [32m━━

KeyboardInterrupt: 

In [38]:
# 모델 저장
output_model = './Model/LSTM.h5'
model.save(output_model)



In [43]:
# 학습된 모델 평가
loss, accuracy = model.evaluate(X_val, y_val, verbose=0)
print(f"검증 데이터 정확도: {accuracy * 100:.2f}%")

검증 데이터 정확도: 76.87%
