In [2]:
import chainer
import chainerrl
import numpy as np

In [204]:
from gym import spaces
import random


class State:
    COM = 0 # 0.15 / 100

    def __init__(self, pre_price):
        self.did_buy = -1
        self.pre_trade_price = pre_price

    def _buy(self, price):
        reward = 1 - price / self.pre_trade_price - self.COM
        self.did_buy = 1
        self.pre_trade_price = price
        return reward

    def _sell(self, price):
        reward = 1 - self.pre_trade_price / price - self.COM
        self.did_buy = -1
        self.pre_trade_price = price
        return reward

    def trade(self, price):
        if self.did_buy == 1:
            return self._sell(price)
        else:
            return self._buy(price)


class History:
    TIME_UNIT = 24 * 5 * 4
    ACTIONS = 2  # 取れる行動の種類数
    OBS_SIZE = TIME_UNIT + 1  # エージェントの観察値の種類数 ここでは履歴長(300) + 前回価格(1)

    def __init__(self, price_hist):
        self.turn = 0
        self.price = price_hist  # btc価値推移(pd.Series:分刻み?) 0-indexed
        self.index = self.TIME_UNIT - 1
        self.mean = price_hist.rolling(self.TIME_UNIT).mean()
        self.action_space = spaces.Discrete(self.ACTIONS)

    def reset(self):
        """
        環境の初期化をする
        """
        self.index = random.randint(self.TIME_UNIT - 1, self.price.size - 1000)
        self.state = State(self.price[self.index])
        return self._get_observe()

    def _get_observe(self):
        if self.index == self.price.size:
            return None
        prices = self.price[self.index - (self.TIME_UNIT - 1): self.index + 1] / self.mean[self.index] - 1
        p = self.state.pre_trade_price / self.mean[self.index] - 1
        prices = prices.append(pd.Series(p))
        obs = np.array(prices * self.state.did_buy * -1, dtype=np.float32)
#         print(self.price[self.index - self.TIME_UNIT - 1: self.index + 1])
        return obs

    def render(self):
        """
        ステップごとの描画関数
        """
        pass

    def step(self, action):
        """
        agentが選んだ行動が与えられるので
        環境を変更し，観察値や報酬を返す

        :param int action: どの行動を選んだか
        :return:
            observe: numpy array: 環境の観察値を返す
            reward : float      : 報酬
            done   : boolean    : 終了したか否か
            info   : str (自由?): (デバッグ用などの)情報
        """
        price = self.price[self.index]
        reward = 0 #(1 - price / self.price[self.index - 1]) * self.state.did_buy + State.COM
        self.index += 1
        observe = self._get_observe()
#         print(observe)
        done = observe is None
        if action == 1:
            reward += self.state.trade(price) * 1.2
        info = 'price=%d idx=%d act=%d rwd=%f' % (
            price, self.index, action, reward
        )
        return observe, reward, done, info

    def get_action_space(self):
        """
        :return: Descrete: とれる行動の種類数を返す
        """
        return self.action_space.sample

In [16]:
def make_agent(env, obs_size, n_actions):
    """
    チュートリアル通りのagent作成
    ネットワークやアルゴリズムの決定
    """
    n_hidden_channels = 60 * 5 * 2
    n_hidden_layers = 3
    # 幅n_hidden_channels，隠れ層n_hidden_layersのネットワーク
    q_func = chainerrl.q_functions.FCStateQFunctionWithDiscreteAction(
        obs_size, n_actions, n_hidden_channels, n_hidden_layers
    )

    # 最適化関数の設定
    optimizer = chainer.optimizers.Adam(1e-2)
    optimizer.setup(q_func)

    # 割引率の設定
    gamma = 0.95

    # 探索方針の設定
    explorer = chainerrl.explorers.ConstantEpsilonGreedy(
        epsilon=0.3, random_action_func=env.get_action_space()
    )

    replay_buffer = chainerrl.replay_buffer.ReplayBuffer(10 ** 6)

    agent = chainerrl.agents.DoubleDQN(
        q_func, optimizer, replay_buffer, gamma, explorer,
        replay_start_size=500#, update_frequency=1,
        #target_update_frequency=100
    )
    return agent

In [206]:
def train_module(env, agent):
    """
    chainerrlのモジュールによるtraining
    """
    import logging
    import sys
    import gym
    gym.undo_logger_setup()  # Turn off gym's default logger settings
    logging.basicConfig(level=logging.INFO, stream=sys.stdout, format='')

    chainerrl.experiments.train_agent_with_evaluation(
        agent, env,
        steps=100000,           # 合計10000ステップagentを動かす
        eval_n_runs=5,         # 本番テストのたびに 5回評価を行う
        max_episode_len=200,   # 1ゲームのステップ数
        eval_interval=2000,   # 1000ステップごとに本番テストを行う
        outdir='agent/result') # Save everything to 'agent/result' directory

In [9]:
import sqlite3
import pandas as pd

db_name = 'db.sqlite3'
con = sqlite3.connect(db_name)

In [10]:
df = pd.read_sql_query('select * from history limit 5000000', con)
df.exec_date = pd.to_datetime(df.exec_date)
df = df.set_index('exec_date')
df = df['2000':]

exec_date
2017-08-23 23:16:00    454975.647059
2017-08-23 23:17:00    455325.217391
2017-08-23 23:18:00    455561.096154
2017-08-23 23:19:00    456238.176471
2017-08-23 23:20:00    456804.527778
2017-08-23 23:21:00    457085.770492
2017-08-23 23:22:00    457322.961538
2017-08-23 23:23:00    457693.500000
2017-08-23 23:24:00    457917.415385
2017-08-23 23:25:00    457939.711111
2017-08-23 23:26:00    457618.925000
2017-08-23 23:27:00    457596.076923
2017-08-23 23:28:00    457792.592593
2017-08-23 23:29:00    457079.631579
2017-08-23 23:30:00    457031.111111
2017-08-23 23:31:00    457304.843750
2017-08-23 23:32:00    457481.586207
2017-08-23 23:33:00    457053.942857
2017-08-23 23:34:00    456450.926829
2017-08-23 23:35:00    456037.925926
2017-08-23 23:36:00    456507.105263
2017-08-23 23:37:00    456504.400000
2017-08-23 23:38:00    456489.777778
2017-08-23 23:39:00    456751.928571
2017-08-23 23:40:00    456559.481481
2017-08-23 23:41:00    456808.162791
2017-08-23 23:42:00    45652

In [205]:
bench_price = df.price.resample('15Min').mean().fillna(method='ffill')
bench_price

exec_date
2017-08-23 23:15:00    457108.640777
2017-08-23 23:30:00    456675.891473
2017-08-23 23:45:00    454707.246781
2017-08-24 00:00:00    456225.617571
2017-08-24 00:15:00    456445.335714
2017-08-24 00:30:00    454891.975309
2017-08-24 00:45:00    453957.868902
2017-08-24 01:00:00    454515.079038
2017-08-24 01:15:00    456688.234043
2017-08-24 01:30:00    459398.009235
2017-08-24 01:45:00    462225.885567
2017-08-24 02:00:00    460335.884884
2017-08-24 02:15:00    458460.885838
2017-08-24 02:30:00    459432.827451
2017-08-24 02:45:00    456841.398291
2017-08-24 03:00:00    454196.062589
2017-08-24 03:15:00    453361.611881
2017-08-24 03:30:00    454592.826754
2017-08-24 03:45:00    455554.640127
2017-08-24 04:00:00    453753.551807
2017-08-24 04:15:00    453476.532819
2017-08-24 04:30:00    453769.017143
2017-08-24 04:45:00    454639.853383
2017-08-24 05:00:00    455183.373333
2017-08-24 05:15:00    456814.484321
2017-08-24 05:30:00    457536.040404
2017-08-24 05:45:00    45660

In [32]:
bench_price['2017-09-01':'2017-09-15'].reset_index()

Unnamed: 0,exec_date,price
0,2017-09-01 00:00:00,523176.573770
1,2017-09-01 00:01:00,523370.871795
2,2017-09-01 00:02:00,523291.705882
3,2017-09-01 00:03:00,523421.960000
4,2017-09-01 00:04:00,523524.892857
5,2017-09-01 00:05:00,523649.928571
6,2017-09-01 00:06:00,523903.250000
7,2017-09-01 00:07:00,523996.882353
8,2017-09-01 00:08:00,524097.470588
9,2017-09-01 00:09:00,524080.617647


In [207]:
# 環境の作成
env = History(bench_price.reset_index().price)

obs_size = env.OBS_SIZE
n_actions = env.ACTIONS
agent = make_agent(env, obs_size, n_actions)

save_path = 'agent/trade_15min'
# agent.load(save_path)

# training
train_module(env, agent)
agent.save(save_path)

outdir:agent/result step:200 episode:0 R:-0.033008077352
statistics:[('average_q', 0.0018278017830750991), ('average_loss', 0)]
outdir:agent/result step:400 episode:1 R:0.0926594659391
statistics:[('average_q', 0.004146194475021637), ('average_loss', 0)]
outdir:agent/result step:600 episode:2 R:0.0242053263235
statistics:[('average_q', 0.004950892625888467), ('average_loss', 0.0025571778844217124)]
outdir:agent/result step:800 episode:3 R:-0.0200712129896
statistics:[('average_q', 0.00478113978555808), ('average_loss', 0.0004560643186574082)]
outdir:agent/result step:1000 episode:4 R:-0.104661613971
statistics:[('average_q', 0.00418004391338584), ('average_loss', 0.00015176441543927297)]
outdir:agent/result step:1200 episode:5 R:-0.039144089327
statistics:[('average_q', 0.002712478476517576), ('average_loss', 7.408795946871971e-05)]
outdir:agent/result step:1400 episode:6 R:-0.0416818535662
statistics:[('average_q', 0.0014912295468764811), ('average_loss', 6.467430525776359e-05)]
outdi

test episode: 4 R: -0.0195100804169
The best score is updated 0.0 -> 0.0010833873268520648
Saved the agent to agent/result\10000
outdir:agent/result step:10200 episode:50 R:-0.0342281030134
statistics:[('average_q', 0.012309583815508418), ('average_loss', 4.161753648160996e-05)]
outdir:agent/result step:10400 episode:51 R:-0.0775392110325
statistics:[('average_q', 0.01233139067285669), ('average_loss', 2.6204024285667504e-05)]
outdir:agent/result step:10600 episode:52 R:-0.0778558495203
statistics:[('average_q', 0.012334868032814063), ('average_loss', 2.3284851839198174e-05)]
outdir:agent/result step:10800 episode:53 R:0.0244798264017
statistics:[('average_q', 0.012371637781619236), ('average_loss', 1.9117822467411874e-05)]
outdir:agent/result step:11000 episode:54 R:-0.0747947227713
statistics:[('average_q', 0.01223962056336843), ('average_loss', 1.8497975361284458e-05)]
outdir:agent/result step:11200 episode:55 R:-0.0553216185589
statistics:[('average_q', 0.012218394037057309), ('ave

test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:20200 episode:100 R:-0.103711570858
statistics:[('average_q', 0.010993026781312481), ('average_loss', 1.7018943341593366e-05)]
outdir:agent/result step:20400 episode:101 R:0.0305046348789
statistics:[('average_q', 0.011064216395011319), ('average_loss', 1.6358153135925037e-05)]
outdir:agent/result step:20600 episode:102 R:0.0353048646636
statistics:[('average_q', 0.011251144463109616), ('average_loss', 1.8420610806581186e-05)]
outdir:agent/result step:20800 episode:103 R:-0.0831077824143
statistics:[('average_q', 0.011448388227125645), ('average_loss', 2.1152568451738806e-05)]
outdir:agent/result step:21000 episode:104 R:-0.109451098032
statistics:[('average_q', 0.011573430352135266), ('average_loss', 1.7190543401865937e-05)]
outdir:agent/result step:21200 episode:105 R:-0.0442388621772
statistics:[('average_q', 0.011586182510931287), ('average_loss', 1.9515

outdir:agent/result step:29800 episode:148 R:0.0119711482621
statistics:[('average_q', 0.011643155997276743), ('average_loss', 1.6376857102798847e-05)]
outdir:agent/result step:30000 episode:149 R:-0.0233544042175
statistics:[('average_q', 0.011668284250874397), ('average_loss', 1.8896318103017868e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:30200 episode:150 R:-0.0102597349394
statistics:[('average_q', 0.012179802741772272), ('average_loss', 5.5994692197854e-05)]
outdir:agent/result step:30400 episode:151 R:-0.00224325216712
statistics:[('average_q', 0.012608238372988474), ('average_loss', 7.93483974935826e-05)]
outdir:agent/result step:30600 episode:152 R:0.0606217222667
statistics:[('average_q', 0.012606052162441763), ('average_loss', 9.72863064830836e-05)]
outdir:agent/result step:30800 episode:153 R:-0.0134822323914
statistics:[('average_q', 0.012286554190582026), ('average_loss', 9.13120748

outdir:agent/result step:39600 episode:197 R:-0.109393234399
statistics:[('average_q', 0.009061826791494542), ('average_loss', 7.065385911663309e-05)]
outdir:agent/result step:39800 episode:198 R:-0.0588972548473
statistics:[('average_q', 0.00897005910085585), ('average_loss', 5.375229777141535e-05)]
outdir:agent/result step:40000 episode:199 R:0.0635819235472
statistics:[('average_q', 0.008932403243581081), ('average_loss', 6.69442100979237e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:40200 episode:200 R:-0.0189030200076
statistics:[('average_q', 0.008603849446047378), ('average_loss', 6.371385090869938e-05)]
outdir:agent/result step:40400 episode:201 R:-0.0682071168958
statistics:[('average_q', 0.007639006773054955), ('average_loss', 6.902713371738558e-05)]
outdir:agent/result step:40600 episode:202 R:-0.181804207971
statistics:[('average_q', 0.006880697136892887), ('average_loss', 7.5782293425

outdir:agent/result step:49600 episode:247 R:-0.0548285098177
statistics:[('average_q', 0.002864506617797139), ('average_loss', 7.565278036728486e-05)]
outdir:agent/result step:49800 episode:248 R:-0.00428005107446
statistics:[('average_q', 0.0030258582116531748), ('average_loss', 7.302379539783472e-05)]
outdir:agent/result step:50000 episode:249 R:-0.0519158397994
statistics:[('average_q', 0.003111744703484721), ('average_loss', 6.196250745615479e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:50200 episode:250 R:-0.0472184971654
statistics:[('average_q', 0.008124302337120913), ('average_loss', 4.334161464683938e-05)]
outdir:agent/result step:50400 episode:251 R:0.024157762886
statistics:[('average_q', 0.007723259542702186), ('average_loss', 4.465021583129624e-05)]
outdir:agent/result step:50600 episode:252 R:-0.0326003004278
statistics:[('average_q', 0.007378547996945444), ('average_loss', 4.21414

statistics:[('average_q', 0.006383405316123881), ('average_loss', 4.8333795011616096e-05)]
outdir:agent/result step:59600 episode:297 R:0.115789250003
statistics:[('average_q', 0.006353329688948478), ('average_loss', 5.3723512252189004e-05)]
outdir:agent/result step:59800 episode:298 R:-0.0033934049648
statistics:[('average_q', 0.006338338611448464), ('average_loss', 4.8163740912134074e-05)]
outdir:agent/result step:60000 episode:299 R:0.0510998361043
statistics:[('average_q', 0.006562805348885784), ('average_loss', 4.051694585522508e-05)]
test episode: 0 R: 0.0399185360953
test episode: 1 R: 0.0153476533168
test episode: 2 R: 0.0275349854417
test episode: 3 R: -0.0876814928406
test episode: 4 R: -0.0106575115983
outdir:agent/result step:60200 episode:300 R:0.0836893824069
statistics:[('average_q', 0.0023677691687463316), ('average_loss', 2.230698850531622e-05)]
outdir:agent/result step:60400 episode:301 R:0.077308751796
statistics:[('average_q', 0.002469588070593613), ('average_loss',

outdir:agent/result step:69400 episode:346 R:-0.0515825067724
statistics:[('average_q', 0.003123371568183434), ('average_loss', 2.1327558593587727e-05)]
outdir:agent/result step:69600 episode:347 R:-0.00541996988935
statistics:[('average_q', 0.0031731726686913433), ('average_loss', 1.6730260224691893e-05)]
outdir:agent/result step:69800 episode:348 R:-0.149843575899
statistics:[('average_q', 0.003181242913641727), ('average_loss', 1.7925084407473414e-05)]
outdir:agent/result step:70000 episode:349 R:0.0564172770343
statistics:[('average_q', 0.003216224989932277), ('average_loss', 1.7499900634542062e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:70200 episode:350 R:-0.0733210586448
statistics:[('average_q', 0.004261327247899024), ('average_loss', 1.7896415336979933e-05)]
outdir:agent/result step:70400 episode:351 R:-0.0833990709671
statistics:[('average_q', 0.004253698998166386), ('average_loss', 1.

outdir:agent/result step:79400 episode:396 R:0.0157033698467
statistics:[('average_q', 0.0042078038357478125), ('average_loss', 1.8839476036505347e-05)]
outdir:agent/result step:79600 episode:397 R:-0.0558474049479
statistics:[('average_q', 0.0041228407656405605), ('average_loss', 1.7922106785431807e-05)]
outdir:agent/result step:79800 episode:398 R:-0.0599942912046
statistics:[('average_q', 0.004166717507329992), ('average_loss', 2.0558706615923345e-05)]
outdir:agent/result step:80000 episode:399 R:0.0440185129625
statistics:[('average_q', 0.004170302657681195), ('average_loss', 2.6219984787291628e-05)]
test episode: 0 R: -0.013006719211
test episode: 1 R: 0.0366400547022
test episode: 2 R: -0.0369496434628
test episode: 3 R: -0.102982176364
test episode: 4 R: 0.0573200490621
outdir:agent/result step:80200 episode:400 R:0.0756044747392
statistics:[('average_q', 0.005303923935463384), ('average_loss', 1.6986305572044542e-05)]
outdir:agent/result step:80400 episode:401 R:0.0920393810727

outdir:agent/result step:89400 episode:446 R:0.0197424931839
statistics:[('average_q', 0.006293375035030303), ('average_loss', 1.7995957069889384e-05)]
outdir:agent/result step:89600 episode:447 R:0.0534072723599
statistics:[('average_q', 0.00627254217876681), ('average_loss', 1.8721014363547206e-05)]
outdir:agent/result step:89800 episode:448 R:-0.0608512578408
statistics:[('average_q', 0.0062914834026722485), ('average_loss', 1.5739757612083288e-05)]
outdir:agent/result step:90000 episode:449 R:-0.0750724919861
statistics:[('average_q', 0.006300634228106755), ('average_loss', 1.5789253713051108e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
outdir:agent/result step:90200 episode:450 R:0.0062574865991
statistics:[('average_q', 0.006253473373366565), ('average_loss', 1.7083103284104202e-05)]
outdir:agent/result step:90400 episode:451 R:-0.0659544945125
statistics:[('average_q', 0.006323467634230145), ('average_loss', 1.637

outdir:agent/result step:99200 episode:495 R:0.0608715503004
statistics:[('average_q', 0.006862731659996362), ('average_loss', 1.6957537040895865e-05)]
outdir:agent/result step:99400 episode:496 R:0.0622995787548
statistics:[('average_q', 0.006814625847693928), ('average_loss', 1.6821217513280218e-05)]
outdir:agent/result step:99600 episode:497 R:-0.0994529983295
statistics:[('average_q', 0.0068083433103620535), ('average_loss', 1.7843497005820517e-05)]
outdir:agent/result step:99800 episode:498 R:-0.0287367059674
statistics:[('average_q', 0.006787499887273366), ('average_loss', 1.796614473810642e-05)]
outdir:agent/result step:100000 episode:499 R:0.0216582299883
statistics:[('average_q', 0.006756598783551301), ('average_loss', 1.715697206617215e-05)]
test episode: 0 R: 0
test episode: 1 R: 0
test episode: 2 R: 0
test episode: 3 R: 0
test episode: 4 R: 0
Saved the agent to agent/result\100000_finish


In [203]:
obs = env.reset()
done = False
R = 0
t = 0
jpy = 40000
btc = 0
com = 0.15 / 100
side = 'sell'
while not done and t < 10000:
    # env.render()
    action = agent.act(obs)
    price = env.price[env.index]
    if action == 1:
#         print(side, price, jpy + btc * price)
        if side == 'sell':
            jpy += btc * (price * (1 - com))
            btc = 0
            side = 'buy'
        else:
            btc += jpy / (price * (1 + com))
            jpy = 0
            side = 'sell'
    obs, r, done, _ = env.step(action)
    R += r
    t += 1
agent.stop_episode()
print(side, price, jpy + btc * price)
R

sell 764725.112288 8267.12453631


-0.36874832620255366

In [66]:
env.reset()

array([ -3.33017111e-02,  -3.47540006e-02,  -2.64158901e-02,
        -2.76343040e-02,  -3.69144976e-02,  -3.81019041e-02,
        -3.25510092e-02,  -3.28800566e-02,  -2.66034640e-02,
        -2.28973087e-02,  -2.30146144e-02,  -2.70482469e-02,
        -2.53713112e-02,  -2.77111661e-02,  -2.70384457e-02,
        -2.15960313e-02,  -1.96590163e-02,  -2.03899797e-02,
        -2.36755032e-02,  -2.13258862e-02,  -1.77524891e-02,
        -1.68221127e-02,  -1.60347391e-02,  -6.32817205e-03,
         3.13871563e-03,   1.30279483e-02,   1.41302003e-02,
         7.01587927e-03,   3.92294396e-03,   4.61383164e-03,
        -3.05690628e-05,  -1.12179655e-03,   7.40518328e-03,
         6.24970859e-03,   1.22779561e-02,   1.22518810e-02,
         1.08099794e-02,   9.75608267e-03,   9.19359457e-03,
         1.11959167e-02,   1.47709679e-02,   1.70483943e-02,
         1.38181141e-02,   1.49873989e-02,   6.88818237e-03,
         1.76530262e-03,   8.65532923e-03,   1.08598024e-02,
         9.02340841e-03,

In [172]:
obs, r, done, _ = env.step(0)
price = env.price[env.index]
price, r, env.state.did_buy

(527634.37217305799, 0, 1)