## 簡単な例の実装
http://testpy.hatenablog.com/entry/2017/02/02/000000

In [1]:
import pandas as pd
import gym_trading
import gym
import sys
import itertools
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers

import baselines.common.tf_util as U
import baselines.deepq.utils as UT

from baselines import deepq
from baselines.deepq.replay_buffer import ReplayBuffer
from baselines.common.schedules import LinearSchedule


def model(inpt, num_actions, scope, reuse=False):
    """This model takes as input an observation and returns values of all actions."""
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        out = layers.fully_connected(out, num_outputs=128, activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=32, activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return out


def run_test(env, act, episodes=1, final_test=False):
    obs = env.reset(train=False)
    start = env.sim.train_end_index + 1
    end = env.sim.count - 1

    for episode in range(episodes):
        done = False
        while done is False:
            action = act(obs[None])
            obs, reward, done, info = env.step(action)

        if not final_test:
            journal = pd.DataFrame(env.portfolio.journal)
            profit = journal["Profit"].sum()
            return env.portfolio.average_profit_per_trade, profit
        else:
            print("Training period  %s - %s" % (env.sim.date_time[start], env.sim.date_time[end]))
            print("Average Reward is %s" % (env.portfolio.average_profit_per_trade))

    if final_test:
        env.generate_summary_stats()


with U.make_session(8):
    # csv = "/home/adrian/Escritorio/polinex/EURUSD60.csv"
    csv = "data/EURUSD60.csv"

    env = gym.make('trading-v0')
    env.initialise_simulator(csv, trade_period=50, train_split=0.7)

    act, train, update_target, debug = deepq.build_train(
        make_obs_ph=lambda name: UT.BatchInput(env.observation_space.shape, name=name),
        q_func=model,
        num_actions=env.action_space.n,
        optimizer=tf.train.AdamOptimizer(learning_rate=5e-4),
    )

    replay_buffer = ReplayBuffer(50000)
    # Create the schedule for exploration starting from 1 (every action is random) down to
    # 0.02 (98% of actions are selected according to values predicted by the model).
    exploration = LinearSchedule(schedule_timesteps=10000, initial_p=1.0, final_p=0.02)
    # Initialize the parameters and copy them to the target network.
    U.initialize()
    update_target()

    episode_rewards = [0.0]
    obs = env.reset()
    l_mean_episode_reward = []
    for t in itertools.count():
        # Take action and update exploration to the newest value
        action = act(obs[None], update_eps=exploration.value(t))[0]

        new_obs, rew, done, _ = env.step(action)

        # Store transition in the replay buffer.
        replay_buffer.add(obs, action, rew, new_obs, float(done))

        obs = new_obs

        episode_rewards[-1] += rew

        is_solved = np.mean(episode_rewards[-101:-1]) > 500 or t >= 1000
        is_solved = is_solved and len(env.portfolio.journal) != 0

        if done:

            journal = pd.DataFrame(env.portfolio.journal)
            profit = journal["Profit"].sum()

            try:
                print("-------------------------------------")
                print("steps                     | {:}".format(t))
                print("episodes                  | {}".format(len(episode_rewards)))
                print("% time spent exploring    | {}".format(int(100 * exploration.value(t))))

                print("--")
                l_mean_episode_reward.append(round(np.mean(episode_rewards[-101:-1]), 1))

                print("mean episode reward       | {:}".format(l_mean_episode_reward[-1]))
                print("Total operations          | {}".format(len(env.portfolio.journal)))
                print("Avg duration trades       | {}".format(round(journal["Trade Duration"].mean(), 2)))
                print("Total profit episode      | {}".format(round(profit), 1))
                print("Avg profit per trade      | {}".format(round(env.portfolio.average_profit_per_trade, 3)))

                print("--")

                reward_test, profit = run_test(env=env, act=act)
                print("Total profit test:        > {}".format(round(profit, 2)))
                print("Avg profit per trade test > {}".format(round(reward_test, 3)))
                print("-------------------------------------")
            except Exception as e:
                print("Exception: ", e)
                # Update target network periodically.

            obs = env.reset()
            episode_rewards.append(0)



        if is_solved:
            # Show off the result
            env.generate_summary_stats()
            run_test(env, act, final_test=True)
            UT.save_state( './test_model/test_model')
            break

        else:
            # Minimize the error in Bellman's equation on a batch sampled from replay buffer.
            if t > 500:
                obses_t, actions, rewards, obses_tp1, dones = replay_buffer.sample(32)
                train(obses_t, actions, rewards, obses_tp1, dones, np.ones_like(rewards))
            if t % 500 == 0:
                update_target()




  from ._conv import register_converters as _register_converters


                       Return       ATR  Open Trade  Duration Trade
Date_Time                                                          
2013-12-02 02:00:00  0.421251  0.355142         0.0             0.0
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


SUMMARY STATISTICS
Total Trades Taken:  167
Total Reward:  -611.1000000000129
Average Reward per Trade:  -3.6592814371258258
Win Ratio: 37.72455089820359 %


<matplotlib.figure.Figure at 0x20960274358>

[ { 'Entry Price': 1.35881,
    'Entry Time': Timestamp('2013-12-02 03:00:00'),
    'Exit Price': 1.35514,
    'Exit Time': Timestamp('2013-12-02 11:00:00'),
    'Profit': -39.70000000000062,
    'Trade Duration': 8,
    'Type': 'BUY',
    'reward': -2.6000000000018204},
  { 'Entry Price': 1.35511,
    'Entry Time': Timestamp('2013-12-02 12:00:00'),
    'Exit Price': 1.35405,
    'Exit Time': Timestamp('2013-12-02 15:00:00'),
    'Profit': -13.600000000000609,
    'Trade Duration': 3,
    'Type': 'BUY',
    'reward': -14.600000000000499},
  { 'Entry Price': 1.3537700000000001,
    'Entry Time': Timestamp('2013-12-02 22:00:00'),
    'Exit Price': 1.35343,
    'Exit Time': Timestamp('2013-12-03 03:00:00'),
    'Profit': 0.4000000000022901,
    'Trade Duration': 5,
    'Type': 'SELL',
    'reward': -0.29999999999785487},
  { 'Entry Price': 1.35346,
    'Entry Time': Timestamp('2013-12-03 04:00:00'),
    'Exit Price': 1.35362,
    'Exit Time': Timestamp('2013-12-03 06:00:00'),
    'Profit'

    'Entry Time': Timestamp('2014-01-03 19:00:00'),
    'Exit Price': 1.35978,
    'Exit Time': Timestamp('2014-01-03 21:00:00'),
    'Profit': -8.900000000000905,
    'Trade Duration': 2,
    'Type': 'BUY',
    'reward': -4.700000000000035},
  { 'Entry Price': 1.35979,
    'Entry Time': Timestamp('2014-01-03 22:00:00'),
    'Exit Price': 1.35786,
    'Exit Time': Timestamp('2014-01-06 07:00:00'),
    'Profit': 16.299999999999873,
    'Trade Duration': 9,
    'Type': 'SELL',
    'reward': 13.199999999999548},
  { 'Entry Price': 1.3594600000000001,
    'Entry Time': Timestamp('2014-01-06 11:00:00'),
    'Exit Price': 1.36376,
    'Exit Time': Timestamp('2014-01-06 19:00:00'),
    'Profit': 39.9999999999997,
    'Trade Duration': 8,
    'Type': 'BUY',
    'reward': 36.89999999999938},
  { 'Entry Price': 1.3637700000000001,
    'Entry Time': Timestamp('2014-01-06 20:00:00'),
    'Exit Price': 1.3632600000000001,
    'Exit Time': Timestamp('2014-01-07 00:00:00'),
    'Profit': -8.100000000

    'Entry Time': Timestamp('2014-01-23 06:00:00'),
    'Exit Price': 1.3543399999999999,
    'Exit Time': Timestamp('2014-01-23 07:00:00'),
    'Profit': -2.5999999999996,
    'Trade Duration': 1,
    'Type': 'SELL',
    'reward': -3.0},
  { 'Entry Price': 1.3638,
    'Entry Time': Timestamp('2014-01-23 12:00:00'),
    'Exit Price': 1.36656,
    'Exit Time': Timestamp('2014-01-23 15:00:00'),
    'Profit': 24.600000000000957,
    'Trade Duration': 3,
    'Type': 'BUY',
    'reward': -2.7999999999986898},
  { 'Entry Price': 1.36658,
    'Entry Time': Timestamp('2014-01-23 16:00:00'),
    'Exit Price': 1.36741,
    'Exit Time': Timestamp('2014-01-23 19:00:00'),
    'Profit': -11.300000000001084,
    'Trade Duration': 3,
    'Type': 'SELL',
    'reward': -15.800000000001699},
  { 'Entry Price': 1.36924,
    'Entry Time': Timestamp('2014-01-23 21:00:00'),
    'Exit Price': 1.36824,
    'Exit Time': Timestamp('2014-01-24 04:00:00'),
    'Profit': 7.000000000001119,
    'Trade Duration': 7,


<matplotlib.figure.Figure at 0x209648f0cf8>

[ { 'Entry Price': 1.37952,
    'Entry Time': Timestamp('2014-04-08 21:00:00'),
    'Exit Price': 1.37939,
    'Exit Time': Timestamp('2014-04-09 00:00:00'),
    'Profit': -1.6999999999981448,
    'Trade Duration': 3,
    'Type': 'SELL',
    'reward': -3.7999999999985796},
  { 'Entry Price': 1.37942,
    'Entry Time': Timestamp('2014-04-09 01:00:00'),
    'Exit Price': 1.3790200000000001,
    'Exit Time': Timestamp('2014-04-09 04:00:00'),
    'Profit': -6.9999999999995595,
    'Trade Duration': 3,
    'Type': 'BUY',
    'reward': -8.900000000000905},
  { 'Entry Price': 1.37901,
    'Entry Time': Timestamp('2014-04-09 05:00:00'),
    'Exit Price': 1.37899,
    'Exit Time': Timestamp('2014-04-09 08:00:00'),
    'Profit': -3.2000000000013102,
    'Trade Duration': 3,
    'Type': 'BUY',
    'reward': -2.3000000000020755},
  { 'Entry Price': 1.3789799999999999,
    'Entry Time': Timestamp('2014-04-09 09:00:00'),
    'Exit Price': 1.38036,
    'Exit Time': Timestamp('2014-04-09 13:00:00'),
 

    'Trade Duration': 7,
    'Type': 'SELL',
    'reward': -12.500000000000068},
  { 'Entry Price': 1.38629,
    'Entry Time': Timestamp('2014-05-02 19:00:00'),
    'Exit Price': 1.3870799999999999,
    'Exit Time': Timestamp('2014-05-05 02:00:00'),
    'Profit': 4.899999999998464,
    'Trade Duration': 7,
    'Type': 'BUY',
    'reward': 8.200000000000099},
  { 'Entry Price': 1.3873799999999998,
    'Entry Time': Timestamp('2014-05-05 07:00:00'),
    'Exit Price': 1.38757,
    'Exit Time': Timestamp('2014-05-05 09:00:00'),
    'Profit': -1.099999999998655,
    'Trade Duration': 2,
    'Type': 'BUY',
    'reward': -6.899999999998904},
  { 'Entry Price': 1.3871799999999999,
    'Entry Time': Timestamp('2014-05-05 11:00:00'),
    'Exit Price': 1.387,
    'Exit Time': Timestamp('2014-05-05 12:00:00'),
    'Profit': -4.7999999999984695,
    'Trade Duration': 1,
    'Type': 'BUY',
    'reward': 2.1000000000001044},
  { 'Entry Price': 1.38751,
    'Entry Time': Timestamp('2014-05-05 15:00:00

    'Type': 'BUY',
    'reward': -3.4999999999988347},
  { 'Entry Price': 1.37065,
    'Entry Time': Timestamp('2014-05-20 05:00:00'),
    'Exit Price': 1.37093,
    'Exit Time': Timestamp('2014-05-20 07:00:00'),
    'Profit': -0.1999999999994202,
    'Trade Duration': 2,
    'Type': 'BUY',
    'reward': 1.6000000000012697},
  { 'Entry Price': 1.36944,
    'Entry Time': Timestamp('2014-05-20 10:00:00'),
    'Exit Price': 1.3684,
    'Exit Time': Timestamp('2014-05-20 11:00:00'),
    'Profit': -13.399999999999299,
    'Trade Duration': 1,
    'Type': 'BUY',
    'reward': -2.7999999999986898},
  { 'Entry Price': 1.36839,
    'Entry Time': Timestamp('2014-05-20 12:00:00'),
    'Exit Price': 1.3699700000000001,
    'Exit Time': Timestamp('2014-05-20 15:00:00'),
    'Profit': 12.800000000001369,
    'Trade Duration': 3,
    'Type': 'BUY',
    'reward': 13.399999999998638},
  { 'Entry Price': 1.3699700000000001,
    'Entry Time': Timestamp('2014-05-20 16:00:00'),
    'Exit Price': 1.36946000

In [2]:
! explorer .

TypeError: 'sess' must be a Session; None