In [297]:
import pandas as pd
import numpy as np

In [298]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def preprocess(df):
    # 1. 기본 전처리
    df = df.sort_values("ts").set_index("ts")
    df = df.drop(columns=["o", "h", "l", "vol", "volCcy"])
    
    # 2. Bollinger Band 계산
    rolling_mean = df["c"].rolling(window=20)
    basis = rolling_mean.mean().round(2)
    dev = (rolling_mean.std(ddof=0) * 2).round(2)
    upper, lower = basis + dev, basis - dev
    
    df["bb_up_break"] = (df["c"] > upper).astype(int)
    df["bb_down_break"] = (df["c"] < lower).astype(int)
    
    # 3. 변화율 계산
    df["pct_change"] = ((1 - df["c"].shift(1) / df["c"]) * 100).round(2)
    
    # 4. 거래대금 스케일링
    df["volCcyQuote"] = MinMaxScaler().fit_transform(df[["volCcyQuote"]])
    
    # 5. 불필요 컬럼 제거
    df = df.drop(columns=["c"]).dropna()
    
    # 6. break 상태 정의
    df["bb_state"] = df["bb_up_break"] - df["bb_down_break"]
    grp = (df["bb_state"].ne(df["bb_state"].shift())).cumsum()
    
    def signed_cumsum(x):
        if x.name == 0: return pd.Series(0, index=x.index)
        return x.cumsum() if x.name > 0 else -x.abs().cumsum()
    
    df["break_combo"] = df.groupby(grp)["bb_state"].transform(signed_cumsum)
    df["target"] = np.sign(df["break_combo"].shift(-1))
    
    # 최종 불필요 컬럼 제거
    return df.drop(columns=["bb_state", "bb_up_break", "bb_down_break"]).dropna()


# 사용 예시
df = pd.read_csv("okx-btc-usdt-swap-15m.csv")
df = preprocess(df)


In [299]:
df.iloc[df.shape[0]-125:df.shape[0]-115]

Unnamed: 0_level_0,volCcyQuote,pct_change,break_combo,target
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-08-22 13:45:00,0.050143,-0.0,0,1.0
2025-08-22 14:00:00,0.561683,2.38,1,1.0
2025-08-22 14:15:00,0.293258,-0.02,2,1.0
2025-08-22 14:30:00,0.164112,0.5,3,1.0
2025-08-22 14:45:00,0.105063,0.02,4,1.0
2025-08-22 15:00:00,0.142609,0.73,5,0.0
2025-08-22 15:15:00,0.08972,0.07,0,0.0
2025-08-22 15:30:00,0.050132,0.19,0,0.0
2025-08-22 15:45:00,0.142894,-0.5,0,0.0
2025-08-22 16:00:00,0.085739,0.2,0,0.0


In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

df = df[df['break_combo'] != 0]

# 특징 / 라벨 분리
X = df[["volCcyQuote", "pct_change", "break_combo"]]

# -1,0,1 → 0,1,2로 매핑
y = df["target"].map({0:0, 1:1, -1:2})

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y
)

# XGBoost 모델
model = xgb.XGBClassifier(
    n_estimators=1500,
    objective="multi:softmax",
    num_class=3,
    eval_metric="mlogloss",
    tree_method="hist"
)

# 학습
model.fit(X_train, y_train)

# 예측
y_pred = model.predict(X_test)

# 성능 확인
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.56      0.66      0.60      1224
           1       0.57      0.44      0.50       602
           2       0.54      0.46      0.50       558

    accuracy                           0.56      2384
   macro avg       0.56      0.52      0.53      2384
weighted avg       0.56      0.56      0.55      2384

