In [1]:
%%capture
%pip install -r requirements.txt

In [2]:
from modules.stock_data import get_stock_data, load_dfs
from modules.agent import DQNAgent
from modules.trainer import train
from modules.trainer import train_with_env
from modules.graph import training_loss, action_graph
from modules.env import TradingEnv


from easydict import EasyDict as edict
import torch.optim as optim
import torch.nn as nn

In [3]:
CONFIGS = edict({
    "GET_DATASET": True,
    "TICKERS": [
        "TSLA",
        "KS11",
    ],
    "DATAFRAME_PATH": "./data",
    "WINDOW_SIZE": 10,
    "OUTPUT_SIZE": 3,
    "EPOCHS": 50,
    "GAMMA": 0.9,
    "BATCH_SIZE": 128,
    "LEARNING_RATE": 1e-5,
    #  epsilon 설정 추가
    "EPSILON_START": 1,
    "EPSILON_MIN": 0.05,
    "EPSILON_DECAY": 0.96,
    
})

In [4]:
if CONFIGS.GET_DATASET:
    get_stock_data(CONFIGS)

df = load_dfs(CONFIGS)

ixic_df = df[0]  # KS11 데이터프레임

# 1) Env용 1차원 가격 배열
prices_for_env = ixic_df["Close"].values.astype(float)

# 2) action_graph용 (Index, Close) 형태
prices_for_graph = [(i, p) for i, p in enumerate(prices_for_env)]

env = TradingEnv(
    prices=prices_for_env,
    window_size=CONFIGS.WINDOW_SIZE,
    fee_rate=0.001,
    initial_cash=1_000_000,
)
agent = DQNAgent(CONFIGS)

loss_fn = nn.MSELoss()
optimizer = optim.Adam(agent.parameters(), lr=CONFIGS.LEARNING_RATE)

In [5]:
df[0]

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close
2010-06-29,1.266667,1.666667,1.169333,1.592667,281494500,1.592667
2010-06-30,1.719333,2.028000,1.553333,1.588667,257806500,1.588667
2010-07-01,1.666667,1.728000,1.351333,1.464000,123282000,1.464000
2010-07-02,1.533333,1.540000,1.247333,1.280000,77097000,1.280000
2010-07-06,1.333333,1.333333,1.055333,1.074000,103003500,1.074000
...,...,...,...,...,...,...
2025-11-19,406.179993,411.779999,398.500000,403.989990,72047700,403.989990
2025-11-20,414.630005,428.940002,394.739990,395.230011,113548800,395.230011
2025-11-21,402.320007,402.799988,383.760010,391.089996,100460600,391.089996
2025-11-24,402.170013,421.720001,401.089996,417.779999,96574400,417.779999


In [6]:
loss_history, episode_rewards = train_with_env(
    agent,
    env,
    CONFIGS,
    optimizer,
    loss_fn,
)

Episode 1/50 | Reward: -3.18 | Epsilon: 0.9600 | Mean Loss: 0.0036
Episode 2/50 | Reward: 2.01 | Epsilon: 0.9216 | Mean Loss: 0.0075
Episode 3/50 | Reward: 0.29 | Epsilon: 0.8847 | Mean Loss: 0.0057
Episode 4/50 | Reward: 1.34 | Epsilon: 0.8493 | Mean Loss: 0.0049
Episode 5/50 | Reward: 3.24 | Epsilon: 0.8154 | Mean Loss: 0.0039
Episode 6/50 | Reward: 0.49 | Epsilon: 0.7828 | Mean Loss: 0.0032
Episode 7/50 | Reward: -0.80 | Epsilon: 0.7514 | Mean Loss: 0.0027
Episode 8/50 | Reward: 3.03 | Epsilon: 0.7214 | Mean Loss: 0.0022
Episode 9/50 | Reward: 0.71 | Epsilon: 0.6925 | Mean Loss: 0.0018
Episode 10/50 | Reward: 4.60 | Epsilon: 0.6648 | Mean Loss: 0.0017
Episode 11/50 | Reward: 1.48 | Epsilon: 0.6382 | Mean Loss: 0.0015
Episode 12/50 | Reward: 5.32 | Epsilon: 0.6127 | Mean Loss: 0.0014
Episode 13/50 | Reward: 2.75 | Epsilon: 0.5882 | Mean Loss: 0.0013
Episode 14/50 | Reward: 2.01 | Epsilon: 0.5647 | Mean Loss: 0.0013
Episode 15/50 | Reward: 5.98 | Epsilon: 0.5421 | Mean Loss: 0.0012
Ep

In [7]:
training_loss(loss_history)

In [8]:
states, actions, infos = env.run_with_agent(agent)

# actions도 (Index, Action) 형태로 변환
action_points = [(info["step"], a) for info, a in zip(infos, actions)]

price_points = [(info["step"], info["price"]) for info in infos]

N = 365 # 최근 N일치 데이터를 보여줌

infos_last = infos[-N:]
actions_last = actions[-N:]

# prices: (Index, Close) 형태로, Index는 0 ~ N-1
prices_last1y = [
    (i, info["price"])
    for i, info in enumerate(infos_last)
]

# actions: (Index, Action) 형태로, Index는 0 ~ N-1
actions_last1y = [
    (i, a)
    for i, a in enumerate(actions_last)
]

action_graph("IXIC", prices_last1y, actions_last1y)

In [13]:
portfolio_last = [info["portfolio_value"] for info in infos_last]

initial_last = portfolio_last[0]
final_last   = portfolio_last[-1]

total_return_last = (final_last - initial_last) / initial_last

print(f"최근 {N}일 초기 자본: {initial_last:,.0f}원")
print(f"최근 {N}일 최종 자본: {final_last:,.0f}원")
print(f"최근 {N}일 내 에이전트 수익률: {total_return_last * 100:.2f}%")

# 최근 1년 가격
close_last = [info["price"] for info in infos_last]

buy_and_hold_return = (close_last[-1] - close_last[0]) / close_last[0]
print(f"단순 {CONFIGS['TICKERS'][0]} 지수 매수 보유 수익률: {buy_and_hold_return * 100:.2f}%")

최근 365일 초기 자본: 243,858,370원
최근 365일 최종 자본: 935,708,495원
최근 365일 내 에이전트 수익률: 283.71%
단순 TSLA 지수 매수 보유 수익률: 123.23%


In [10]:
from collections import Counter

states, actions, infos = env.run_with_agent(agent, epsilon=0.0)
print(Counter(actions))

portfolio_values = [info["portfolio_value"] for info in infos]
initial = portfolio_values[0]
final = portfolio_values[-1]
ret = (final - initial) / initial
print("초기:", initial, "최종:", final, "수익률:", ret)

Counter({2: 2458, 0: 1186, 1: 224})
초기: 1092623.5457931615 최종: 935708494.8438854 수익률: 855.3869032903115


In [11]:
agent.nn.save(f"./{CONFIGS['TICKERS'][0]}_dqn.pth")