In [2]:
import FinanceDataReader as fdr
etfs = fdr.StockListing("ETF/KR")
etfs

Unnamed: 0,Symbol,Category,Name,Price,RiseFall,Change,ChangeRate,NAV,EarningRate,Volume,Amount,MarCap
0,459580,6,KODEX CD금리액티브(합성),1073470,2,70,0.01,1073460.0,0.6809,187972,201781,84859
1,360750,4,TIGER 미국S&P500,20880,2,55,0.26,20917.0,0.4292,6008102,125345,82737
2,069500,1,KODEX 200,41975,2,275,0.66,42009.0,21.1786,6876541,290843,66467
3,488770,7,KODEX 머니마켓액티브,103195,2,5,0.00,103203.0,0.7714,421231,43467,64947
4,133690,4,TIGER 미국나스닥100,136250,2,260,0.19,136545.0,5.4480,216459,29472,51176
...,...,...,...,...,...,...,...,...,...,...,...,...
985,465620,4,ACE 미국빅테크TOP7 Plus인버스(합성),9670,2,60,0.62,9634.0,-11.5916,459,4,24
986,139310,5,TIGER 금속선물(H),5770,5,-105,-1.79,5880.0,-1.2606,3917,22,23
987,145670,3,ACE 인버스,4505,5,-30,-0.66,4504.0,-17.9186,12119,54,23
988,275750,3,RISE 코스닥150선물인버스,3675,3,0,0.00,3678.0,-10.5840,1992,7,22


In [3]:
def calculate_rsi(data, window = 14):
    delta = data.diff()
    gain = delta.where(delta > 0, 0).rolling(window = 14).mean()
    loss = -delta.where(delta < 0, 0).rolling(window = 14).mean()
    RS = gain / (loss + 1e-10)
    RSI = 100 - (100 / (1 + RS))
    return RSI

In [12]:
def calculate_macd(data, short=12, long=26, signal=9):
    ema_short = data.ewm(span=short, adjust=False).mean()
    ema_long = data.ewm(span=long, adjust=False).mean()
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    macd_histogram = macd_line - signal_line
    return macd_histogram

In [13]:
def calculate_obv(close, volume):
    direction = close.diff().apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
    obv = (volume * direction).cumsum()
    return obv

In [14]:
def calculate_atr(df, window=14):
    high = df['High']
    low = df['Low']
    close = df['Close']
    tr1 = high - low
    tr2 = (high - close.shift()).abs()
    tr3 = (low - close.shift()).abs()
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    atr = tr.rolling(window=window).mean()
    return atr

In [15]:
from tqdm import tqdm

etfs_dic = {}
for i in tqdm(range(len(etfs))):

    try:
        row = etfs.iloc[i]
        symbol = row['Symbol']
        name = row['Name']
        df = fdr.DataReader(symbol)
        df['RSI'] = calculate_rsi(df['Close'])
        df = df.dropna()
        df = df[['Close', 'Change', 'RSI']]
        df['MACD'] = calculate_macd(df['Close'])
        df['OBV'] = calculate_obv(df['Close'], df['Volume'])
        df['ATR'] = calculate_atr(df)
        etfs_dic[symbol] = [name, df]
    except:
        continue

100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [01:44<00:00,  9.50it/s]


In [17]:
X = []
Y = []

window = 50

for symbol in tqdm(etfs_dic):
    name = etfs_dic[symbol][0]
    df = etfs_dic[symbol][1].copy().values
    for i in range(len(df) - window):
        # A = df[i : i + window, 1:].flatten()      # Change, RSI열만 학습
        A = df[i : i + window, [1, 2, 3, 4, 5]].flatten()  # Change, RSI, MACD, OBV, ATR 사용
        before = df[i + window - 1, 0]
        after = df[i + window, 0]
        B = (after - before) / before * 100 >= 5
        if len(A) == 100:
            X.append(A)
            Y.append(B)

0it [00:00, ?it/s]


In [7]:
import numpy as np

X = np.array(X)
Y = np.array(Y)

# 시간순으로 80% 학습, 20% 테스트로 나눔
split_idx = int(len(X) * 0.8)
train_x, test_x = X[:split_idx], X[split_idx:]
train_y, test_y = Y[:split_idx], Y[split_idx:]

In [8]:
from xgboost import XGBClassifier  
from sklearn.metrics import classification_report

model = XGBClassifier()
model.fit(train_x, train_y)

pred = model.predict(test_x)

report = classification_report(test_y, pred)
print(report)

# 성능 70% 이상으로 올라감

              precision    recall  f1-score   support

       False       1.00      1.00      1.00    213439
        True       0.70      0.37      0.48      1213

    accuracy                           1.00    214652
   macro avg       0.85      0.68      0.74    214652
weighted avg       0.99      1.00      0.99    214652



In [9]:
model.fit(X, Y)

In [10]:
for symbol in etfs_dic:
    try:
        name = etfs_dic[symbol][0]
        df = etfs_dic[symbol][1].copy()[["Change", "RSI"]].values[-window:].flatten()
        pred = model.predict([df])
        if pred[0] == 1:
            print(name)
    except:
        continue

----

In [28]:
import FinanceDataReader as fdr
import pandas as pd
import numpy as np
from tqdm import tqdm
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# === 기술적 지표 함수 정의 ===

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = delta.where(delta > 0, 0).rolling(window=window).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
    rs = gain / (loss + 1e-10)
    return 100 - (100 / (1 + rs))

def calculate_macd(data, short=12, long=26, signal=9):
    ema_short = data.ewm(span=short, adjust=False).mean()
    ema_long = data.ewm(span=long, adjust=False).mean()
    macd = ema_short - ema_long
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd - signal_line

def calculate_obv(close, volume):
    direction = close.diff().apply(lambda x: 1 if x > 0 else -1 if x < 0 else 0)
    return (volume * direction).cumsum()

def calculate_atr(df, window=14):
    tr1 = df['High'] - df['Low']
    tr2 = (df['High'] - df['Close'].shift()).abs()
    tr3 = (df['Low'] - df['Close'].shift()).abs()
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    return tr.rolling(window).mean()

# === 데이터 수집 및 전처리 ===

etfs = fdr.StockListing("ETF/KR")
etfs_dic = {}
for i in tqdm(range(len(etfs))):
    try:
        row = etfs.iloc[i]
        symbol, name = row['Symbol'], row['Name']
        df = fdr.DataReader(symbol)
        if {'Close', 'Volume', 'High', 'Low'}.issubset(df.columns):
            df['Change'] = df['Close'].pct_change() * 100
            df['RSI'] = calculate_rsi(df['Close'])
            # df['MACD'] = calculate_macd(df['Close'])
            # df['OBV'] = calculate_obv(df['Close'], df['Volume'])
            df['ATR'] = calculate_atr(df)
            df = df.dropna()
            # df = df[['Close', 'Change', 'RSI', 'MACD', 'OBV', 'ATR']]
            df = df[['Close', 'Change', 'RSI', 'ATR']]
            etfs_dic[symbol] = [name, df]
    except:
        continue

# === 데이터셋 구성 ===

X, Y = [], []
window = 50
for symbol in tqdm(etfs_dic):
    df = etfs_dic[symbol][1].copy().values
    for i in range(len(df) - window):
        feature = df[i : i + window, 1:].flatten()
        before, after = df[i + window - 1, 0], df[i + window, 0]
        label = (after - before) / before * 100 >= 5
        # if len(feature) == 250:  # 5 features × 50일
        if len(feature) == 150:  # 3 features × 50일
            X.append(feature)
            Y.append(label)

# === 시계열 분할 및 모델 학습 ===

X, Y = np.array(X), np.array(Y)
split = int(len(X) * 0.8)
train_x, test_x = X[:split], X[split:]
train_y, test_y = Y[:split], Y[split:]

model = XGBClassifier()
model.fit(train_x, train_y)

pred = model.predict(test_x)
print(classification_report(test_y, pred))

100%|████████████████████████████████████████████████████████████████████████████████| 990/990 [01:42<00:00,  9.66it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 908/908 [00:02<00:00, 304.22it/s]


              precision    recall  f1-score   support

       False       1.00      1.00      1.00    213437
        True       0.71      0.35      0.47      1215

    accuracy                           1.00    214652
   macro avg       0.85      0.67      0.73    214652
weighted avg       0.99      1.00      0.99    214652



In [29]:
# === 전체 데이터 재학습 후 실전 예측 ===

model.fit(X, Y)

print("\n📈 다음날 5% 이상 상승 예상 ETF:")
for symbol in etfs_dic:
    try:
        name = etfs_dic[symbol][0]
        df = etfs_dic[symbol][1].copy().values[-window:, 1:]
        if df.shape[0] == window:
            latest = df.flatten().reshape(1, -1)
            if model.predict(latest)[0] == 1:
                print(name)
    except:
        continue


📈 다음날 5% 이상 상승 예상 ETF:
PLUS 태양광&ESS
