In [None]:
pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29411 sha256=0c9882c444296915410d1d8f2734bed49147bf576bf77a00f9e2d313642f5dd0
  Stored in directory: /root/.cache/pip/wheels/5f/67/4f/8a9f252836e053e532c6587a3230bc72a4deb16b03a829610b
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


In [None]:
pip install shap

Collecting shap
  Downloading shap-0.46.0-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (540 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.1/540.1 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.8 (from shap)
  Downloading slicer-0.0.8-py3-none-any.whl (15 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.46.0 slicer-0.0.8


In [None]:
pip install SMOTE

Collecting SMOTE
  Downloading smote-0.1-py2.py3-none-any.whl (3.3 kB)
Installing collected packages: SMOTE
Successfully installed SMOTE-0.1


In [None]:
pip install imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.12.3-py3-none-any.whl (258 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.3/258.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: imbalanced-learn
Successfully installed imbalanced-learn-0.12.3


In [None]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm
import ta  # Technical Analysis library
import shap
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer

In [None]:
# Load the dataset from Kaggle
file_path = '/content/drive/MyDrive/Data/SOL_data.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset to ensure it is loaded correctly
data.head()

Unnamed: 0,open_time,open,high,low,close,volume
0,2020-12-31 15:00:00,1.4458,1.4458,1.4444,1.4456,161.34
1,2020-12-31 15:01:00,1.4477,1.4478,1.4463,1.4463,148.86
2,2020-12-31 15:02:00,1.4479,1.453,1.4466,1.4484,3208.13
3,2020-12-31 15:03:00,1.4503,1.4559,1.4503,1.4558,1639.76
4,2020-12-31 15:04:00,1.4558,1.4569,1.4545,1.4569,900.91


In [None]:
# 데이터의 첫 번째 1/3만 사용
n_samples = len(data)
data = data.iloc[:n_samples // 3]

In [None]:
# open_time 열을 datetime 형식으로 변환
data['open_time'] = pd.to_datetime(data['open_time'])

# 시각과 분만 추출하여 time 열 생성
data['time'] = data['open_time'].dt.strftime('%H:%M')

# ATR 계산
atr_periods = [5, 10, 14, 20, 50]
for period in atr_periods:
    data[f'atr_{period}'] = ta.volatility.average_true_range(data['high'], data['low'], data['close'], window=period)

# VWAP 계산
data['vwap'] = ta.volume.volume_weighted_average_price(data['high'], data['low'], data['close'], data['volume'])

# Stochastic Oscillator 계산
stoch_periods = [(14, 3), (21, 5), (9, 3), (5, 2), (20, 7)]
for period, smooth in stoch_periods:
    data[f'stoch_%k_{period}_{smooth}'] = ta.momentum.stoch(data['high'], data['low'], data['close'], window=period, smooth_window=smooth)
    data[f'stoch_%d_{period}_{smooth}'] = ta.momentum.stoch_signal(data['high'], data['low'], data['close'], window=period, smooth_window=smooth)

# OBV 계산
data['obv'] = ta.volume.on_balance_volume(data['close'], data['volume'])

# Bollinger Bands 계산
bollinger_periods = [10, 20, 50, 100, 200]
for period in bollinger_periods:
    data[f'bollinger_hband_{period}'] = ta.volatility.BollingerBands(data['close'], window=period).bollinger_hband()
    data[f'bollinger_lband_{period}'] = ta.volatility.BollingerBands(data['close'], window=period).bollinger_lband()

# Ichimoku 계산
ichimoku_periods = [9, 26, 52, 100, 200]
for period in ichimoku_periods:
    data[f'ichimoku_base_{period}'] = ta.trend.ichimoku_base_line(data['high'], data['low'], window1=period)
    data[f'ichimoku_conversion_{period}'] = ta.trend.ichimoku_conversion_line(data['high'], data['low'], window1=period)

# Supertrend 계산 함수
def calculate_supertrend(df, period=7, multiplier=3, atr_period=14):
    df = df.copy()  # 데이터프레임을 복사하여 사용
    hl2 = (df['high'] + df['low']) / 2
    df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=atr_period)
    df['upperband'] = hl2 + (multiplier * df['atr'])
    df['lowerband'] = hl2 - (multiplier * df['atr'])
    df['in_uptrend'] = True

    for current in tqdm(range(1, len(df.index)), desc=f'Calculating Supertrend {period}-{multiplier}-{atr_period}'):
        previous = current - 1

        if df['close'].iloc[current] > df['upperband'].iloc[previous]:
            df.loc[df.index[current], 'in_uptrend'] = True
        elif df['close'].iloc[current] < df['lowerband'].iloc[previous]:
            df.loc[df.index[current], 'in_uptrend'] = False
        else:
            df.loc[df.index[current], 'in_uptrend'] = df['in_uptrend'].iloc[previous]

            if df['in_uptrend'].iloc[current] and df['lowerband'].iloc[current] < df['lowerband'].iloc[previous]:
                df.loc[df.index[current], 'lowerband'] = df['lowerband'].iloc[previous]

            if not df['in_uptrend'].iloc[current] and df['upperband'].iloc[current] > df['upperband'].iloc[previous]:
                df.loc[df.index[current], 'upperband'] = df['upperband'].iloc[previous]

    return df

# Supertrend 계산
supertrend_settings = [(7, 3, 14), (10, 3, 20), (14, 2, 10), (20, 4, 50), (50, 5, 5)]
for period, multiplier, atr_period in supertrend_settings:
    data = calculate_supertrend(data, period, multiplier, atr_period)
    data[f'supertrend_upper_{period}_{multiplier}_{atr_period}'] = data['upperband']
    data[f'supertrend_lower_{period}_{multiplier}_{atr_period}'] = data['lowerband']
    data[f'supertrend_in_uptrend_{period}_{multiplier}_{atr_period}'] = data['in_uptrend']

Calculating Supertrend 7-3-14: 100%|██████████| 618844/618844 [05:53<00:00, 1749.26it/s]
Calculating Supertrend 10-3-20: 100%|██████████| 618844/618844 [05:13<00:00, 1975.49it/s]
Calculating Supertrend 14-2-10: 100%|██████████| 618844/618844 [05:09<00:00, 2000.04it/s]
Calculating Supertrend 20-4-50: 100%|██████████| 618844/618844 [05:22<00:00, 1916.66it/s]
Calculating Supertrend 50-5-5: 100%|██████████| 618844/618844 [05:22<00:00, 1918.01it/s]
Calculating Max/Min Returns:   7%|▋         | 45761/618845 [06:05<1:16:18, 125.17it/s]


KeyboardInterrupt: 

In [None]:
def calculate_max_min_returns(df):
    window_size = 60

    # 'open_time'이 이미 인덱스로 설정되어 있는지 확인
    if df.index.name != 'open_time':
        raise KeyError("'open_time' 열이 인덱스로 설정되어 있어야 합니다.")

    # 인덱스 중복 확인 및 제거
    df = df[~df.index.duplicated(keep='first')].copy()

    # 60분 윈도우를 적용하여 최대 및 최소 가격 계산
    df['max_price'] = df['high'].rolling(f'{window_size}T').max().shift(-window_size)
    df['min_price'] = df['low'].rolling(f'{window_size}T').min().shift(-window_size)

    # 결측값을 적절히 처리 (예: 마지막 몇 행)
    df['max_price'].fillna(df['high'], inplace=True)
    df['min_price'].fillna(df['low'], inplace=True)

    # 현재 가격
    current_price = df['close']

    # 최대 및 최소 수익률 계산
    df['max_return_60min'] = ((df['max_price'] - current_price) / current_price) * 100
    df['min_return_60min'] = ((df['min_price'] - current_price) / current_price) * 100

    # 필요없는 열 삭제
    df.drop(columns=['max_price', 'min_price'], inplace=True)

    return df

# 최대 상승률과 최대 하락률 계산
data = calculate_max_min_returns(data)

In [None]:
# 인덱스를 리셋하여 'open_time'을 열로 되돌림
data.reset_index(inplace=True)

data.head()

NameError: name 'data' is not defined

In [None]:
data.columns

NameError: name 'data' is not defined

In [None]:
path2 = "/content/drive/MyDrive/Data/SOL60_INDICATOR2_SMALL";
data.to_csv(path2, index=True);

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Data/SOL60_INDICATOR2_SMALL")

# Display the first few rows of the dataset to ensure it is loaded correctly
data.head()

Unnamed: 0,open_time,open,high,low,close,volume,time,atr_5,atr_10,atr_14,...,supertrend_lower_14_2_10,supertrend_in_uptrend_14_2_10,supertrend_upper_20_4_50,supertrend_lower_20_4_50,supertrend_in_uptrend_20_4_50,supertrend_upper_50_5_5,supertrend_lower_50_5_5,supertrend_in_uptrend_50_5_5,max_return_60min,min_return_60min
0,2020-12-31 15:00:00,1.4458,1.4458,1.4444,1.4456,161.34,15:00,0.0,0.0,0.0,...,1.4451,True,1.4451,1.4451,True,1.4451,1.4451,True,1.203652,-0.387382
1,2020-12-31 15:01:00,1.4477,1.4478,1.4463,1.4463,148.86,15:01,0.0,0.0,0.0,...,1.44705,True,1.44705,1.44705,True,1.44705,1.44705,True,1.154671,-0.435594
2,2020-12-31 15:02:00,1.4479,1.453,1.4466,1.4484,3208.13,15:02,0.0,0.0,0.0,...,1.4498,True,1.4498,1.4498,True,1.4498,1.4498,True,1.008009,-0.57995
3,2020-12-31 15:03:00,1.4503,1.4559,1.4503,1.4558,1639.76,15:03,0.0,0.0,0.0,...,1.4531,True,1.4531,1.4531,True,1.4531,1.4531,True,0.494573,-1.085314
4,2020-12-31 15:04:00,1.4558,1.4569,1.4545,1.4569,900.91,15:04,0.00404,0.0,0.0,...,1.4557,True,1.4557,1.4557,True,1.4759,1.4355,True,0.418697,-1.159997


In [None]:
# open_time 열이 datetime 형식이 아닌 경우 변환
if not np.issubdtype(data['open_time'].dtype, np.datetime64):
    data['open_time'] = pd.to_datetime(data['open_time'])

# time 열을 분 단위로 변환
data['time'] = data['open_time'].dt.hour * 60 + data['open_time'].dt.minute

# 사용하지 않을 열 제외
data = data.drop(columns=['open_time'])

# 목표 변수 생성 (max_return_60min이 1.1% 이상인 경우 1, 그렇지 않으면 0)
data['target'] = (data['max_return_60min'] >= 1.1).astype(int)

# 특성과 목표 변수 분리
X = data.drop(columns=['max_return_60min', 'min_return_60min', 'target'])
y = data['target']

# NaN 값을 평균으로 대체
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# 데이터 정규화
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_imputed)

# SMOTE를 사용한 오버샘플링
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)

# 시계열 데이터 생성 함수
def create_sequences(X, y, time_steps=60):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i + time_steps])
        ys.append(y[i + time_steps])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32)

# 데이터 크기를 확인하고 time_steps 값 조정
time_steps = 60
if len(X_resampled) <= time_steps:
    raise ValueError(f"The dataset size ({len(X_resampled)}) is too small for the given time_steps ({time_steps}).")

X_sequences, y_sequences = create_sequences(X_resampled, y_resampled, time_steps)

In [None]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-2.1.0-py3-none-manylinux_2_28_x86_64.whl (153.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.9/153.9 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-nccl-cu12 (from xgboost)
  Downloading nvidia_nccl_cu12-2.22.3-py3-none-manylinux2014_x86_64.whl (190.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.9/190.9 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nvidia-nccl-cu12, xgboost
Successfully installed nvidia-nccl-cu12-2.22.3 xgboost-2.1.0


In [None]:
import xgboost as xgb

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# XGBoost 모델 정의
model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.05, max_depth=6, random_state=42, use_label_encoder=False, eval_metric='logloss')

# 모델 학습
model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 Score: {f1:.3f}")

X_train shape: (607761, 60, 62)
X_test shape: (151941, 60, 62)
y_train shape: (607761,)
y_test shape: (151941,)


Parameters: { "use_label_encoder" } are not used.



Accuracy: 0.731
Precision: 0.772
Recall: 0.656
F1 Score: 0.709


In [None]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting graphviz (from catboost)
  Downloading graphviz-0.20.3-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: graphviz, catboost
Successfully installed catboost-1.2.5 graphviz-0.20.3


In [None]:
from catboost import CatBoostClassifier

# 학습 데이터와 검증 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# CatBoost 모델 정의
model = CatBoostClassifier(iterations=100, learning_rate=0.05, depth=6, random_seed=42, verbose=0)

# 모델 학습
model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred = model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=1)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Accuracy: {accuracy:.3f}")
print(f"Precision: {precision:.3f}")
print(f"Recall: {recall:.3f}")
print(f"F1 Score: {f1:.3f}")

X_train shape: (607761, 60, 62)
X_test shape: (151941, 60, 62)
y_train shape: (607761,)
y_test shape: (151941,)
Accuracy: 0.733
Precision: 0.769
Recall: 0.665
F1 Score: 0.713


In [None]:
from sklearn.ensemble import RandomForestClassifier

# 학습 데이터와 검증 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# Random Forest 모델 정의
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# 모델 학습
rf_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred_rf = rf_model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, zero_division=1)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

print(f"Random Forest - Accuracy: {accuracy_rf:.3f}")
print(f"Random Forest - Precision: {precision_rf:.3f}")
print(f"Random Forest - Recall: {recall_rf:.3f}")
print(f"Random Forest - F1 Score: {f1_rf:.3f}")

X_train shape: (607761, 60, 62)
X_test shape: (151941, 60, 62)
y_train shape: (607761,)
y_test shape: (151941,)
Random Forest - Accuracy: 0.947
Random Forest - Precision: 0.966
Random Forest - Recall: 0.927
Random Forest - F1 Score: 0.946


In [None]:
import os

# 데이터 저장 경로 설정
save_dir = '/content/drive/MyDrive/Data'
os.makedirs(save_dir, exist_ok=True)

np.save(os.path.join(save_dir, 'X_sequences.npy'), X_sequences)
np.save(os.path.join(save_dir, 'y_sequences.npy'), y_sequences)

In [None]:
import os

# 데이터 로드
load_dir = '/content/drive/MyDrive/Data'

X_sequences = np.load(os.path.join(load_dir, 'X_sequences.npy'))
y_sequences = np.load(os.path.join(load_dir, 'y_sequences.npy'))

In [None]:
pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-4.4.0-py3-none-manylinux_2_28_x86_64.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightgbm
Successfully installed lightgbm-4.4.0


In [None]:
import lightgbm as lgb

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

# LightGBM 데이터셋 생성
lgb_train = lgb.Dataset(X_train.reshape(X_train.shape[0], -1), y_train, params={'max_bin': 255})
lgb_eval = lgb.Dataset(X_test.reshape(X_test.shape[0], -1), y_test, reference=lgb_train, params={'max_bin': 255})

# LightGBM 하이퍼파라미터 설정
params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': {'binary_logloss', 'auc'},
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'verbose': -1
}

# 모델 학습
gbm = lgb.train(
    params,
    lgb_train,
    num_boost_round=100,
    valid_sets=lgb_eval,
    callbacks=[lgb.early_stopping(stopping_rounds=10)]
)

# 예측
y_pred_prob = gbm.predict(X_test.reshape(X_test.shape[0], -1), num_iteration=gbm.best_iteration)
y_pred = (y_pred_prob > 0.5).astype(int)

# 성능 평가
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')

X_train shape: (607761, 60, 62)
X_test shape: (151941, 60, 62)
y_train shape: (607761,)
y_test shape: (151941,)
Training until validation scores don't improve for 10 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's binary_logloss: 0.49074	valid_0's auc: 0.820659
Accuracy: 0.7378719371334926
Precision: 0.7856690319310694
Recall: 0.6535441370223979
F1 Score: 0.7135418165079548


In [None]:
from sklearn.ensemble import GradientBoostingClassifier


# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

# Gradient Boosting 모델 정의
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.05, max_depth=6, random_state=42)

# 모델 학습
gb_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred_gb = gb_model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy_gb = accuracy_score(y_test, y_pred_gb)
precision_gb = precision_score(y_test, y_pred_gb, zero_division=1)
recall_gb = recall_score(y_test, y_pred_gb)
f1_gb = f1_score(y_test, y_pred_gb)

print(f"Gradient Boosting - Accuracy: {accuracy_gb:.3f}")
print(f"Gradient Boosting - Precision: {precision_gb:.3f}")
print(f"Gradient Boosting - Recall: {recall_gb:.3f}")
print(f"Gradient Boosting - F1 Score: {f1_gb:.3f}")

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

# AdaBoost 모델 정의
ab_model = AdaBoostClassifier(n_estimators=100, learning_rate=0.05, random_state=42)

# 모델 학습
ab_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred_ab = ab_model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy_ab = accuracy_score(y_test, y_pred_ab)
precision_ab = precision_score(y_test, y_pred_ab, zero_division=1)
recall_ab = recall_score(y_test, y_pred_ab)
f1_ab = f1_score(y_test, y_pred_ab)

print(f"AdaBoost - Accuracy: {accuracy_ab:.3f}")
print(f"AdaBoost - Precision: {precision_ab:.3f}")
print(f"AdaBoost - Recall: {recall_ab:.3f}")
print(f"AdaBoost - F1 Score: {f1_ab:.3f}")

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

# 학습 데이터와 테스트 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X_sequences, y_sequences, test_size=0.2, random_state=42)

# Extra Trees 모델 정의
et_model = ExtraTreesClassifier(n_estimators=100, random_state=42)

# 모델 학습
et_model.fit(X_train.reshape(X_train.shape[0], -1), y_train)

# 예측
y_pred_et = et_model.predict(X_test.reshape(X_test.shape[0], -1))

# 평가
accuracy_et = accuracy_score(y_test, y_pred_et)
precision_et = precision_score(y_test, y_pred_et, zero_division=1)
recall_et = recall_score(y_test, y_pred_et)
f1_et = f1_score(y_test, y_pred_et)

print(f"Extra Trees - Accuracy: {accuracy_et:.3f}")
print(f"Extra Trees - Precision: {precision_et:.3f}")
print(f"Extra Trees - Recall: {recall_et:.3f}")
print(f"Extra Trees - F1 Score: {f1_et:.3f}")

In [None]:
# SHAP 값 계산 및 시각화
explainer = shap.Explainer(rf_model)
shap_values = explainer(X_test.reshape(X_test.shape[0], -1))
shap.summary_plot(shap_values, X_test.reshape(X_test.shape[0], -1), feature_names=X.columns)