In [5]:
import numpy as np
import pandas as pd

# 데이터 로드
btc_data = pd.read_csv('./BTC_KRW.csv')

# 필요한 열 선택 (Open, High, Low, Close, Volume)
btc_data = btc_data[['Open', 'High', 'Low', 'Close', 'Volume']]

# Hyperparameter 설정
sequence_size = 21
validation_size = 150
test_size = 30
is_regression = True  # Regression 또는 Classification 선택

# Sequence 생성 함수
def create_sequences(data, sequence_size, is_regression):
    sequences = []
    labels = []
    for idx in range(len(data) - sequence_size):
        sequence = data[idx:idx + sequence_size].values
        if is_regression:
            label = data.iloc[idx + sequence_size]['Close']  # 다음 날 Close 값 (Regression)
        else:
            label = 1 if data.iloc[idx + sequence_size]['Close'] > data.iloc[idx + sequence_size - 1]['Close'] else 0  # Classification
        sequences.append(sequence)
        labels.append(label)
    return np.array(sequences), np.array(labels)

# Sequence 생성
X, y = create_sequences(btc_data, sequence_size, is_regression)

# 데이터셋 분리 (Train, Validation, Test)
train_size = len(X) - validation_size - test_size
X_train, y_train = X[:train_size], y[:train_size]
X_val, y_val = X[train_size:train_size + validation_size], y[train_size:train_size + validation_size]
X_test, y_test = X[train_size + validation_size:], y[train_size + validation_size:]

# 데이터셋 확인
print(f"Train data shape: {X_train.shape}, {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, {y_val.shape}")
print(f"Test data shape: {X_test.shape}, {y_test.shape}")


Train data shape: (3527, 21, 5), (3527,)
Validation data shape: (150, 21, 5), (150,)
Test data shape: (30, 21, 5), (30,)
