In [1]:
import time
import tushare as ts
import datetime
import pandas as pd
import os
import tensorflow as tf
from collections import namedtuple
import numpy as np

In [2]:
if os.path.exists('601318.SH_5min.csv'):
    data = pd.read_csv('601318.SH_5min.csv', index_col=0)
    print('Read sussessful')
#     print('head: \n', data.head())
#     print('tail: \n', data.tail())
else:
    print('File not exist')

Read sussessful


In [3]:
data.head(5)

Unnamed: 0,ts_code,trade_time,open,close,high,low,vol,amount
0,601318.SH,2019-10-22 15:00:00,90.15,90.29,90.29,90.15,966545.0,87224082.0
1,601318.SH,2019-10-22 14:55:00,90.12,90.15,90.19,90.11,735530.0,66309263.0
2,601318.SH,2019-10-22 14:50:00,90.1,90.11,90.12,90.07,558033.0,50276229.0
3,601318.SH,2019-10-22 14:45:00,90.1,90.1,90.14,90.06,640738.0,57733317.0
4,601318.SH,2019-10-22 14:40:00,90.25,90.12,90.25,90.06,724000.0,65254508.0


In [4]:
def get_time(t):
    time = datetime.datetime.strptime(t, '%Y-%m-%d %H:%M:%S')
    minus_9_30 = (int(time.strftime('%H'))-9) * 12 + int(time.strftime('%M')) / 5 - 6
    return minus_9_30 if minus_9_30 < 25 else minus_9_30 - 18

class Observations:
    def __init__(self, index, is_hold, wait_time, trade_price):
        # is_hold: 是否持有股票，1表示持有，0表示未持有
        # trade_price: 距离上次操作的时间（多少个5分钟）
        # trade_price: 上次交易价格
        
        self.index = index
        self.is_hold = is_hold 
        self.wait_time = wait_time
        self.trade_price = trade_price

    def values(self, history_data, length):
        # history_data: DataFram 索引越靠前日期越靠后
        # 返回数据为 length * 6 + 3， 前 length * 6 为每日的 time，open， close， high， low， vol-10000
        # 其中 time 为 0 到 48， 表示一天中的第几个5分钟
        # 最后三位分别是 is_hold * 100，即100为持仓, 持仓是否过夜，100为过夜
        
        recent_data = history_data[['trade_time', 'open', 'high', 'low', 'close', 'vol']][
            self.index: self.index+length]
        recent_data['vol'] = recent_data['vol']/10000
        recent_data['trade_time'] = recent_data['trade_time'].apply(lambda x: get_time(x))
        is_pass_night = self.wait_time > 48 or self.wait_time > recent_data['trade_time'].iloc[0]
        return np.hstack([np.array(recent_data.values).reshape(1,-1),
                          np.array([[self.is_hold*100, 100 if is_pass_night else 0, self.trade_price]])])
        
    def decode(self, history_data, length, log=False):
        recent_data = history_data[['trade_time', 'open', 'high', 'low', 'close', 'vol']][
            self.index: self.index+length]
        recent_data['trade_time'] = recent_data['trade_time'].apply(lambda x: get_time(x))

        if log:
            print('recent data is :\n', recent_data)
            print('')

            if self.is_hold:
                print('Hold stock for {} minutes， purchase price is {}.'.format(
                    self.wait_time*5, self.trade_price))
            else:
                print('Dosen\'t hold any thing.')
        return recent_data
        
    def __str__(self):
        return 'index: {}, is_hold: {}, wait_time: {}, trade_price: {}\n'.format(
            self.index, self.is_hold, self.wait_time, self.trade_price)
    
    def __repr__(self):
        return self.__str__()

In [None]:
# Observation test
# index， is_hold， wait_time， trade_price
# obs = Observations(*[1, 1, 0, 89])
# next_obs = Observations(*[0, 1, 1, 89])
# print(obs.values(data, 3))
# print(next_obs.values(data, 3))

In [None]:
class Actions:
    
    def __init__(self, action_prob):
        # 买、卖、持有的几率
        self.p_buy = action_prob[0]
        self.p_sell = action_prob[1] 
        self.p_hold = 1 - action_prob[0] - action_prob[1] 
        
        self.action_choose = np.random.choice(['buy', 'sell', 'hold'],
                                              p=[self.p_buy, self.p_sell, self.p_hold])
# Deubg 
        print('action_prob: {}, action is {}'.format(action_prob, self.action_choose))
    
    def choose(self):

        return self.action_choose

+ 注意： 交易默认只买 100 股，手续费默认

In [None]:
def calc_reward_batch(obs, next_obs, history_data):
    # obs 和 next_obs 为 Observation 类
    
    fee = obs.trade_price * 0.02 if next_obs.wait_time == 1 else 0
    if obs.is_hold == 1:
        delta_price = (next_obs.decode(history_data, 1).close.iloc[0]
                       - obs.decode(history_data, 1).close.iloc[0]) * 100
        return delta_price - fee
    else:
        return -fee
        

In [None]:
# 计算 reward 测试
# obs.decode(data, 3, log=True)
# next_obs.decode(data, 3, log=True)
# calc_reward_batch(obs, next_obs, data)

In [None]:
class Env:
    def __init__(self, hps, history_data):
        self._hps = hps
        self._history_data = history_data
        
        self._observations_dim = hps.days * 6 + 3
        self._actions_dim = 3
        return
    
    def reset(self):
        index = self._history_data.shape[0] - self._hps.days - 1
        return Observations(index=index, is_hold=0, wait_time=0, trade_price=0)
    
    def step(self, obs, action):
        # 输入为 Observations 类和 Actions 类
        # 返回值为 next observations， reward， done
        index, is_hold, wait_time, trade_price = obs.index, obs.is_hold, obs.wait_time, obs.trade_price
        done = True if index == 0 else False
        action_choose = action.choose()
        
        if is_hold == 1 and action_choose == 'sell':
            current_time = get_time(self._history_data['trade_time'].iloc[index])
            is_pass_night = wait_time>48 or wait_time>current_time
            if is_pass_night:
                is_hold = 0  # 卖掉了
                wait_time = 0  # 时间清0
                trade_price = self._history_data['close'].iloc[index]  # 以当前的收盘价为成交加个
            else:
                pass # 不做操作，类似 hold
        elif is_hold == 0 and action_choose == 'buy':
            is_hold = 1
            wait_time = 0
            trade_price = self._history_data['close'].iloc[index]
        else:
            pass # 不做操作
        
        next_obs = Observations(index-1, is_hold, wait_time+1, trade_price)

        return next_obs, calc_reward_batch(obs, next_obs, self._history_data), done
    
    @property
    def observations_dim(self):
        return self._observations_dim
    
    @property
    def actions_dim(self):
        return self._actions_dim

In [None]:
class DataSet:
    def __init__(self, hps, history_data):
        self._buffer = []
        self._length = 0
        self._hps = hps
        self._history_data = history_data
        return 
    
    def get_batch(self, nums):
        assert self._length > 1, 'Length of data is {} which is not enough. \
        Data need at least {}'.format(self._length, 2)
        
        rand_idx = np.random.randint(0, self._length-1, nums)
        obs = np.vstack([self._buffer[x].values(
            self._history_data, self._hps.days) for x in rand_idx])
        
        next_obs = np.vstack([self._buffer[x+1].values(
            self._history_data, self._hps.days) for x in rand_idx])
        
        reward = np.array([calc_reward_batch(self._buffer[x], self._buffer[x+1], self._history_data)
                           for x in rand_idx])
    
        return obs, next_obs, reward
    
    def add_data(self, obs):
        # obs 为 Observation 类
        self._buffer.append(obs)
        self._length += 1
        return
        

In [None]:
# DataSet test

# obs = Observations(*[1, 1, 0, 89])
# next_obs = Observations(*[0, 1, 1, 89])
# data_set = DataSet(hps, data)
# data_set.add_data(obs)
# data_set.add_data(obs)
# data_set.add_data(obs)
# print(data_set._buffer)
# obs.trade_price = 100
# print(data_set._buffer)
# print(obs)
# data_set.add_data(obs)
# obs, next_obs, reward = data_set.get_batch(2)
# print('obs: \n', obs)
# print('\nnext_obs: \n', next_obs)
# print('\nreward: \n', reward)

In [None]:
class Model:
    def __init__(self, env, hps):
        self._env = env
        self._hps = hps
        
        self.global_step = tf.Variable(0, name='global_step', trainable=False)
        self.action, self.Q, self.action_loss, self.Q_loss, self.action_train_opt, self.Q_train_opt = \
            self._build_graph()
        self._sess, self._summary_writer = self._sess_setup()
        return
    
    def train(self, iteration, data_set):
        for i in range(iteration):
            obs, next_obs, reward = data_set.get_batch(self._hps.batch_size)
            action_loss, Q_loss = self._train_one_step(obs, next_obs, reward)
            print('action_loss: {}, Q_loss : {}'.format(action_loss, Q_loss))

        return
    
    def test(self, data):
        return
    
    def predict(self, obs):
        action_prob = self._sess.run(self.action, {self._observations_ph: obs})
        return action_prob
    
    def _train_one_step(self, obs, next_obs, reward):
        feed_dict = {self._observations_ph: obs,
                    self._next_observations_ph: next_obs,
                    self._rewards_ph: reward}
        
        action_loss, Q_loss, _, _ = self._sess.run([self.action_loss, self.Q_loss,
                                                    self.action_train_opt, self.Q_train_opt],
                                                   feed_dict)
        return action_loss, Q_loss
    
    def _sess_setup(self):
        saver = tf.train.Saver(max_to_keep=3)
        sv = tf.train.Supervisor(logdir=self._hps.train_dir,
                   is_chief=True,
                   saver=saver,
                   summary_op=None,
                   save_summaries_secs=600, # save summaries for tensorboard every 60 secs
                   save_model_secs=600, # checkpoint every 600 secs
                   global_step=self.global_step,
                   init_feed_dict= None
                   )
        summary_writer = sv.summary_writer
        sess = sv.prepare_or_wait_for_session()
    
        return sess, summary_writer
    
    def _create_placeholders(self):
        observations_dim = self._env.observations_dim
        actions_dim = self._env.actions_dim

        
        self._observations_ph = tf.placeholder(
            tf.float32,
            shape=(None, observations_dim),
            name='observation',
        )
        self._next_observations_ph = tf.placeholder(
            tf.float32,
            shape=(None, observations_dim),
            name='next_observation',
        )
#         self._actions_ph = tf.placeholder(
#             tf.float32,
#             shape=(None, actions_dim),
#             name='actions',
#         )
        self._rewards_ph = tf.placeholder(
            tf.float32,
            shape=(None, ),
            name='rewards',
        )
        return
    
#     def _linear(self, arg, output_size, activation, scope=None, reuse=False):
#         input_size = arg.get_shape().as_list()[1]
#         print('input_size', input_size)
#         trunc_norm_init = tf.truncated_normal_initializer(stddev=self._hps.trunc_norm_init_std)
        
#         with tf.variable_scope(scope or "Linear", reuse=reuse):
#             matrix = tf.get_variable("Matrix", [input_size, output_size])
#             res = tf.matmul(arg, matrix)
#             bias_term = tf.get_variable("Bias", [output_size],
#                                         initializer=trunc_norm_init)
#         return activation(res + bias_term)
        
    def _action_Q_output(self, state, reuse=False):
        
        with tf.variable_scope('hidden_state', reuse=tf.AUTO_REUSE):
            hidden_states = tf.layers.dense(state, self._hps.hidden_dim,
                                           activation=tf.nn.sigmoid, name='state_hidden_layer')
            
        with tf.variable_scope('action_output', reuse=tf.AUTO_REUSE):
            actions = tf.nn.softmax(tf.layers.dense(hidden_states, self._env.actions_dim,
                                                    activation=tf.nn.sigmoid, name='action_output_layer'))
            
        with tf.variable_scope('Q_output', reuse=tf.AUTO_REUSE):
            Q = tf.layers.dense(tf.concat([hidden_states, actions], axis=1), 1,
                                           activation=None, name='Q_output_layer')
        return hidden_states, actions, Q
        
    def _build_graph(self):
        self._create_placeholders()
        _, action, Q = self._action_Q_output(self._observations_ph)
        _, _, next_Q = self._action_Q_output(self._next_observations_ph)
        
        # Calculate action loss and Q loss
        action_loss = -tf.reduce_sum(tf.squeeze(Q), axis=0)

        Q_loss = tf.reduce_sum(self._rewards_ph + tf.squeeze(self._hps.gamma * next_Q - Q), axis=0)

        
        # Get update option
        t_vars = tf.trainable_variables()
        action_vars = [var for var in t_vars 
                       if var.name.startswith('hidden_state') or var.name.startswith('action_output')]
        
        Q_vars = [var for var in t_vars 
                  if var.name.startswith('hidden_state') or var.name.startswith('Q_output')] 
        
        action_train_opt = tf.train.AdamOptimizer(self._hps.learning_rate).minimize(
            action_loss, var_list = action_vars)
        
        Q_train_opt = tf.train.AdamOptimizer(self._hps.learning_rate).minimize(
            action_loss, var_list = Q_vars)
        
        return action, Q, action_loss, Q_loss, action_train_opt, Q_train_opt

        

In [None]:
class Agent:
    def __init__(self, hps, env, history_data):
        self._hps = hps
        self._env = env
        self._history_data = history_data
        self._data_set = DataSet(hps, history_data)
        self._model = Model(env, hps)
        return
    
    def step(self, obs):
        self._data_set.add_data(obs)
        action_prob = self._model.predict(obs.values(self._history_data, self._hps.days))
        action = Actions(action_prob[0])
        
        if self._data_set._length > 20:
            self._model.train(1, self._data_set)
            
        return action
    

In [None]:
hps = {'trunc_norm_init_std': 1e-4,
      'hidden_dim': 20,
      'train_dir': './model',
      'gamma': 0.99,
      'learning_rate': 0.003,
      'batch_size': 10,
      'days': 20}
hps = namedtuple("HParams", hps.keys())(**hps)

In [None]:
# For model test
# env = Env(hps, data)
# obs = env.reset()
# obs.values(data, hps.days)

In [None]:
env = Env(hps, data)
agent = Agent(hps, env, data)

Instructions for updating:
Please switch to tf.train.MonitoredTrainingSession
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path ./model/model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:global_step/sec: 0


In [None]:
obs = env.reset()
rewards = []
reward_sum = 0
n = 100000
for i in range(n):
    print('{}/{}'.format(i, n))
    action = agent.step(obs)
    obs, reward, done = env.step(obs, action)
    rewards.append(reward)
    reward_sum += reward
    print('reward is {:.2f}, reward sum is {:.2f}, time is {}, close is {}\n'.format(
        reward, reward_sum, obs.decode(data, 1)['trade_time'].iloc[0], obs.decode(data, 1)['close'].iloc[0]))


0/100000
action_prob: [0.39657342 0.38204104 0.22138563], action is sell
reward is -0.00, reward sum is 0.00, time is 21.0, close is 28.28

1/100000
action_prob: [0.43359554 0.3283313  0.23807319], action is buy
reward is -0.00, reward sum is 0.00, time is 22.0, close is 28.27

2/100000
action_prob: [0.4135311  0.37282205 0.21364687], action is buy
reward is 8.00, reward sum is 8.00, time is 23.0, close is 28.35

3/100000
action_prob: [0.4195073  0.33264577 0.24784693], action is sell
reward is 2.00, reward sum is 10.00, time is 24.0, close is 28.37

4/100000
action_prob: [0.45859054 0.33935025 0.20205916], action is sell
reward is 5.00, reward sum is 15.00, time is 25.0, close is 28.42

5/100000
action_prob: [0.42529476 0.34749025 0.22721496], action is sell
reward is -8.00, reward sum is 7.00, time is 26.0, close is 28.34

6/100000
action_prob: [0.385062   0.39691684 0.21802115], action is buy
reward is 1.00, reward sum is 8.00, time is 27.0, close is 28.35

7/100000
action_prob: [0.

action_loss: -38.2979736328125, Q_loss : -22.382976531982422
reward is 11.00, reward sum is 137.85, time is 19.0, close is 30.21

48/100000
action_prob: [0.3357955  0.45067617 0.21352829], action is buy
action_loss: -38.7000617980957, Q_loss : 24.04739761352539
reward is -9.00, reward sum is 128.85, time is 20.0, close is 30.12

49/100000
action_prob: [0.33346924 0.45320922 0.2133215 ], action is hold
action_loss: -39.10194778442383, Q_loss : -5.391016960144043
reward is 8.00, reward sum is 136.85, time is 21.0, close is 30.2

50/100000
action_prob: [0.33111945 0.45573086 0.21314968], action is sell
action_loss: -39.50363540649414, Q_loss : 17.604965209960938
reward is -12.00, reward sum is 124.85, time is 22.0, close is 30.08

51/100000
action_prob: [0.32874912 0.4582401  0.21301083], action is sell
action_loss: -39.90513229370117, Q_loss : -5.981053829193115
reward is 11.00, reward sum is 135.85, time is 23.0, close is 30.19

52/100000
action_prob: [0.3263612  0.46073583 0.21290295],

action_loss: -54.64453125, Q_loss : 47.857948303222656
reward is -10.00, reward sum is 154.66, time is 11.0, close is 29.9

89/100000
action_prob: [0.25213972 0.531382   0.21647824], action is buy
action_loss: -55.0401611328125, Q_loss : 66.44960021972656
reward is 10.00, reward sum is 164.66, time is 12.0, close is 30.0

90/100000
action_prob: [0.25092894 0.53252316 0.2165479 ], action is sell
action_loss: -55.4356689453125, Q_loss : 90.44564819335938
reward is -6.00, reward sum is 158.66, time is 13.0, close is 29.94

91/100000
action_prob: [0.2497609  0.533627   0.21661212], action is sell
action_loss: -55.8310546875, Q_loss : 24.441688537597656
reward is -2.00, reward sum is 156.66, time is 14.0, close is 29.92

92/100000
action_prob: [0.24863449 0.53469443 0.21667105], action is sell
action_loss: -56.22632598876953, Q_loss : -2.1442646980285645
reward is -9.00, reward sum is 147.66, time is 15.0, close is 29.83

93/100000
action_prob: [0.24754857 0.53572655 0.2167249 ], action is 

action_loss: -70.79210662841797, Q_loss : -26.707923889160156
reward is 0.00, reward sum is 44.07, time is 3.0, close is 28.83

130/100000
action_prob: [0.226019   0.5573627  0.21661833], action is sell
action_loss: -71.18463134765625, Q_loss : -7.307242393493652
reward is 0.00, reward sum is 44.07, time is 4.0, close is 28.96

131/100000
action_prob: [0.22573732 0.5576723  0.21659033], action is sell
action_loss: -71.57711791992188, Q_loss : 20.702228546142578
reward is 0.00, reward sum is 44.07, time is 5.0, close is 28.99

132/100000
action_prob: [0.22546421 0.55797374 0.21656206], action is sell
action_loss: -71.96955871582031, Q_loss : -57.71969985961914
reward is 0.00, reward sum is 44.07, time is 6.0, close is 29.0

133/100000
action_prob: [0.22519927 0.55826724 0.21653353], action is buy
action_loss: -72.36196899414062, Q_loss : 14.27638053894043
reward is -0.58, reward sum is 43.49, time is 7.0, close is 29.0

134/100000
action_prob: [0.22494228 0.5585529  0.2165048 ], action 

action_loss: -86.85971069335938, Q_loss : -0.8685989379882812
reward is 16.00, reward sum is -32.51, time is 44.0, close is 28.24

171/100000
action_prob: [0.21901953 0.56550527 0.21547516], action is sell
action_loss: -87.25111389160156, Q_loss : 35.12749481201172
reward is 16.00, reward sum is -16.51, time is 45.0, close is 28.4

172/100000
action_prob: [0.2189215  0.565628   0.21545058], action is sell
action_loss: -87.64248657226562, Q_loss : -89.87643432617188
reward is -19.00, reward sum is -35.51, time is 46.0, close is 28.21

173/100000
action_prob: [0.2188255  0.56574833 0.21542618], action is buy
action_loss: -88.03385925292969, Q_loss : -12.88033676147461
reward is -12.00, reward sum is -47.51, time is 47.0, close is 28.09

174/100000
action_prob: [0.21873154 0.5658665  0.215402  ], action is buy
action_loss: -88.42520141601562, Q_loss : 32.115753173828125
reward is 25.00, reward sum is -22.51, time is 48.0, close is 28.34

175/100000
action_prob: [0.21863951 0.56598246 0.21

action_loss: -102.89542388916016, Q_loss : -28.169958114624023
reward is -1.00, reward sum is -9.65, time is 36.0, close is 28.54

212/100000
action_prob: [0.21622919 0.56913984 0.214631  ], action is sell
action_loss: -103.28631591796875, Q_loss : -15.032859802246094
reward is 24.00, reward sum is 14.35, time is 37.0, close is 28.78

213/100000
action_prob: [0.21618344 0.56920236 0.21461418], action is hold
action_loss: -103.67718505859375, Q_loss : -9.036767959594727
reward is 0.00, reward sum is 14.35, time is 38.0, close is 28.78

214/100000
action_prob: [0.2161385  0.569264   0.21459754], action is sell
action_loss: -104.06805419921875, Q_loss : -11.04068374633789
reward is -7.00, reward sum is 7.35, time is 39.0, close is 28.71

215/100000
action_prob: [0.21609421 0.56932473 0.21458103], action is hold
action_loss: -104.4588851928711, Q_loss : 21.389812469482422
reward is 3.00, reward sum is 10.35, time is 40.0, close is 28.74

216/100000
action_prob: [0.21605068 0.56938463 0.214

action_prob: [0.21484968 0.57108486 0.21406548], action is sell
action_loss: -118.91534423828125, Q_loss : 7.234844207763672
reward is 0.00, reward sum is -1.79, time is 28.0, close is 28.15

253/100000
action_prob: [0.21482424 0.571122   0.21405375], action is hold
action_loss: -119.30448913574219, Q_loss : 17.808401107788086
reward is 17.00, reward sum is 15.21, time is 29.0, close is 28.32

254/100000
action_prob: [0.21479918 0.57115865 0.21404211], action is buy
action_loss: -119.696533203125, Q_loss : 4.803032875061035
reward is -12.00, reward sum is 3.21, time is 30.0, close is 28.2

255/100000
action_prob: [0.21477446 0.57119495 0.21403062], action is sell
action_loss: -120.08712005615234, Q_loss : -16.773265838623047
reward is 8.00, reward sum is 11.21, time is 31.0, close is 28.28

256/100000
action_prob: [0.21475    0.5712308  0.21401918], action is buy
action_loss: -120.47770690917969, Q_loss : 52.22962188720703
reward is 0.00, reward sum is 11.21, time is 32.0, close is 28.

action_loss: -134.53579711914062, Q_loss : 46.654640197753906
reward is -5.00, reward sum is -28.89, time is 19.0, close is 28.11

293/100000
action_prob: [0.21403041 0.5723131  0.21365649], action is sell
action_loss: -134.92623901367188, Q_loss : 73.07833862304688
reward is 9.00, reward sum is -19.89, time is 20.0, close is 28.2

294/100000
action_prob: [0.21401498 0.5723369  0.21364808], action is sell
action_loss: -135.316650390625, Q_loss : 53.64683151245117
reward is -10.00, reward sum is -29.89, time is 21.0, close is 28.1

295/100000
action_prob: [0.21399973 0.57236046 0.21363978], action is sell
action_loss: -135.70709228515625, Q_loss : -16.91526985168457
reward is -6.00, reward sum is -35.89, time is 22.0, close is 28.04

296/100000
action_prob: [0.21398465 0.5723838  0.2136315 ], action is buy
action_loss: -136.09751892089844, Q_loss : -27.36096954345703
reward is -4.00, reward sum is -39.89, time is 23.0, close is 28.0

297/100000
action_prob: [0.21396975 0.57240695 0.2136

action_loss: -150.54116821289062, Q_loss : 20.494586944580078
reward is 0.00, reward sum is -35.45, time is 11.0, close is 28.65

334/100000
action_prob: [0.21351561 0.5731254  0.21335897], action is sell
action_loss: -150.93145751953125, Q_loss : 23.360084533691406
reward is 0.00, reward sum is -35.45, time is 12.0, close is 28.65

335/100000
action_prob: [0.21350555 0.57314175 0.21335274], action is buy
action_loss: -151.32180786132812, Q_loss : -2.513214111328125
reward is -0.56, reward sum is -36.01, time is 13.0, close is 28.6

336/100000
action_prob: [0.21349557 0.57315785 0.21334659], action is buy
action_loss: -151.7121124267578, Q_loss : 58.48287582397461
reward is 5.00, reward sum is -31.01, time is 14.0, close is 28.65

337/100000
action_prob: [0.21348569 0.5731739  0.21334045], action is buy
action_loss: -152.10243225097656, Q_loss : 62.35297393798828
reward is 2.00, reward sum is -29.01, time is 15.0, close is 28.67

338/100000
action_prob: [0.21347591 0.57318974 0.2133343

action_loss: -166.54275512695312, Q_loss : -3.8190202713012695
reward is 0.00, reward sum is 19.42, time is 3.0, close is 29.0

375/100000
action_prob: [0.21317069 0.5736938  0.21313551], action is sell
action_loss: -166.93301391601562, Q_loss : -13.669330596923828
reward is 0.00, reward sum is 19.42, time is 4.0, close is 29.0

376/100000
action_prob: [0.21316378 0.57370543 0.21313077], action is sell
action_loss: -167.3232421875, Q_loss : -33.673221588134766
reward is 0.00, reward sum is 19.42, time is 5.0, close is 28.94

377/100000
action_prob: [0.2131569  0.57371706 0.21312606], action is sell
INFO:tensorflow:Saving checkpoint to path ./model/model.ckpt
INFO:tensorflow:global_step/sec: 0
action_loss: -167.71348571777344, Q_loss : -14.832931518554688
reward is 0.00, reward sum is 19.42, time is 6.0, close is 28.8

378/100000
action_prob: [0.21315007 0.57372856 0.21312137], action is hold
action_loss: -168.103759765625, Q_loss : -18.24664306640625
reward is 0.00, reward sum is 19.42

action_loss: -182.151611328125, Q_loss : 68.17848205566406
reward is -4.00, reward sum is 91.84, time is 43.0, close is 29.55

415/100000
action_prob: [0.21293324 0.5740999  0.21296684], action is buy
action_loss: -182.54177856445312, Q_loss : -31.825408935546875
reward is 7.00, reward sum is 98.84, time is 44.0, close is 29.62

416/100000
action_prob: [0.2129282  0.57410866 0.21296309], action is hold
action_loss: -182.93197631835938, Q_loss : -70.38970947265625
reward is -22.00, reward sum is 76.84, time is 45.0, close is 29.4

417/100000
action_prob: [0.21292324 0.57411736 0.21295942], action is sell
action_loss: -183.32215881347656, Q_loss : -27.398229598999023
reward is 1.00, reward sum is 77.84, time is 46.0, close is 29.41

418/100000
action_prob: [0.21291828 0.57412595 0.21295571], action is buy
action_loss: -183.71234130859375, Q_loss : 37.162879943847656
reward is 0.00, reward sum is 77.84, time is 47.0, close is 29.41

419/100000
action_prob: [0.21291338 0.5741346  0.2129520

action_loss: -198.1488037109375, Q_loss : 25.42451286315918
reward is 20.00, reward sum is 190.67, time is 35.0, close is 30.95

456/100000
action_prob: [0.21275489 0.57441515 0.21282999], action is buy
action_loss: -198.53890991210938, Q_loss : 16.01462173461914
reward is -12.00, reward sum is 178.67, time is 36.0, close is 30.83

457/100000
action_prob: [0.21275118 0.5744218  0.21282704], action is buy
action_loss: -198.9290771484375, Q_loss : 44.84291076660156
reward is -13.00, reward sum is 165.67, time is 37.0, close is 30.7

458/100000
action_prob: [0.21274744 0.57442844 0.21282406], action is sell
action_loss: -199.31924438476562, Q_loss : 20.00680160522461
reward is -11.00, reward sum is 154.67, time is 38.0, close is 30.59

459/100000
action_prob: [0.21274377 0.5744351  0.21282114], action is hold
action_loss: -199.7093963623047, Q_loss : -15.997089385986328
reward is -6.00, reward sum is 148.67, time is 39.0, close is 30.53

460/100000
action_prob: [0.21274011 0.5744416  0.21

action_loss: -214.14453125, Q_loss : -32.14143753051758
reward is 5.00, reward sum is 139.48, time is 27.0, close is 30.5

497/100000
action_prob: [0.21262074 0.57465935 0.21271995], action is hold
action_loss: -214.53466796875, Q_loss : -9.145347595214844
reward is 1.00, reward sum is 140.48, time is 28.0, close is 30.51

498/100000
action_prob: [0.21261789 0.5746646  0.21271753], action is hold
action_loss: -214.9248046875, Q_loss : 32.85076141357422
reward is -3.00, reward sum is 137.48, time is 29.0, close is 30.48

499/100000
action_prob: [0.21261507 0.5746698  0.21271512], action is buy
action_loss: -215.31495666503906, Q_loss : 0.2864513397216797
reward is 3.00, reward sum is 140.48, time is 30.0, close is 30.51

500/100000
action_prob: [0.21261226 0.574675   0.21271272], action is buy
action_loss: -215.705078125, Q_loss : 28.842958450317383
reward is 5.00, reward sum is 145.48, time is 31.0, close is 30.56

501/100000
action_prob: [0.21260947 0.5746802  0.21271035], action is s

action_loss: -230.13931274414062, Q_loss : 0.6986122131347656
reward is -3.00, reward sum is 126.26, time is 19.0, close is 30.02

538/100000
action_prob: [0.21251726 0.5748528  0.21262991], action is buy
action_loss: -230.52940368652344, Q_loss : 48.09910202026367
reward is 4.00, reward sum is 130.26, time is 20.0, close is 30.06

539/100000
action_prob: [0.21251506 0.574857   0.21262793], action is buy
action_loss: -230.9195098876953, Q_loss : 109.09061431884766
reward is -11.00, reward sum is 119.26, time is 21.0, close is 29.95

540/100000
action_prob: [0.21251284 0.57486117 0.21262595], action is hold
action_loss: -231.30960083007812, Q_loss : 8.686901092529297
reward is 5.00, reward sum is 124.26, time is 22.0, close is 30.0

541/100000
action_prob: [0.21251066 0.5748654  0.212624  ], action is hold
action_loss: -231.69973754882812, Q_loss : -49.466590881347656
reward is -13.00, reward sum is 111.26, time is 23.0, close is 29.87

542/100000
action_prob: [0.21250848 0.5748695  0.2

action_loss: -246.13323974609375, Q_loss : 18.948461532592773
reward is -0.59, reward sum is 105.07, time is 11.0, close is 30.0

579/100000
action_prob: [0.21243578 0.575009   0.21255526], action is sell
action_loss: -246.5233154296875, Q_loss : -13.030227661132812
reward is -21.00, reward sum is 84.07, time is 12.0, close is 29.79

580/100000
action_prob: [0.21243401 0.5750124  0.21255362], action is sell
action_loss: -246.91342163085938, Q_loss : -7.469139099121094
reward is -2.00, reward sum is 82.07, time is 13.0, close is 29.77

581/100000
action_prob: [0.21243225 0.57501584 0.21255197], action is buy
action_loss: -247.30348205566406, Q_loss : -2.4730300903320312
reward is 8.00, reward sum is 90.07, time is 14.0, close is 29.85

582/100000
action_prob: [0.21243052 0.57501924 0.21255033], action is sell
action_loss: -247.69357299804688, Q_loss : -47.4769401550293
reward is 12.00, reward sum is 102.07, time is 15.0, close is 29.97

583/100000
action_prob: [0.21242878 0.5750226  0.2

action_loss: -262.1264953613281, Q_loss : -1.6212692260742188
reward is 0.00, reward sum is 104.47, time is 3.0, close is 29.8

620/100000
action_prob: [0.21237043 0.575137   0.21249257], action is sell
action_loss: -262.5165710449219, Q_loss : 5.374839782714844
reward is 0.00, reward sum is 104.47, time is 4.0, close is 29.83

621/100000
action_prob: [0.21236901 0.5751398  0.21249115], action is hold
action_loss: -262.90667724609375, Q_loss : -50.62907028198242
reward is 0.00, reward sum is 104.47, time is 5.0, close is 29.9

622/100000
action_prob: [0.2123676  0.5751426  0.21248977], action is sell
action_loss: -263.2967529296875, Q_loss : -27.63296127319336
reward is 0.00, reward sum is 104.47, time is 6.0, close is 29.9

623/100000
action_prob: [0.21236615 0.5751455  0.21248835], action is sell
action_loss: -263.6867980957031, Q_loss : -43.636871337890625
reward is 0.00, reward sum is 104.47, time is 7.0, close is 29.88

624/100000
action_prob: [0.21236476 0.5751483  0.21248698], a

action_loss: -278.1192626953125, Q_loss : -3.7811813354492188
reward is 9.00, reward sum is 90.87, time is 44.0, close is 29.25

661/100000
action_prob: [0.21231723 0.5752435  0.21243933], action is sell
action_loss: -278.50933837890625, Q_loss : 1.2149085998535156
reward is 5.00, reward sum is 95.87, time is 45.0, close is 29.3

662/100000
action_prob: [0.21231604 0.57524586 0.21243814], action is sell
action_loss: -278.8994140625, Q_loss : 29.211017608642578
reward is -2.00, reward sum is 93.87, time is 46.0, close is 29.28

663/100000
action_prob: [0.21231487 0.5752482  0.21243693], action is sell
action_loss: -279.2894287109375, Q_loss : 36.20710754394531
reward is 0.00, reward sum is 93.87, time is 47.0, close is 29.28

664/100000
action_prob: [0.21231373 0.5752505  0.21243575], action is hold
action_loss: -279.6795349121094, Q_loss : 63.203216552734375
reward is 5.00, reward sum is 98.87, time is 48.0, close is 29.33

665/100000
action_prob: [0.21231256 0.57525283 0.21243457], ac

action_loss: -294.1116638183594, Q_loss : 6.474887371063232
reward is -8.00, reward sum is 88.70, time is 36.0, close is 28.91

702/100000
action_prob: [0.21227333 0.575333   0.21239372], action is buy
action_loss: -294.501708984375, Q_loss : 16.054977416992188
reward is -1.00, reward sum is 87.70, time is 37.0, close is 28.9

703/100000
action_prob: [0.21227232 0.57533497 0.21239267], action is buy
action_loss: -294.8917236328125, Q_loss : -15.51391315460205
reward is -10.00, reward sum is 77.70, time is 38.0, close is 28.8

704/100000
action_prob: [0.21227136 0.575337   0.21239164], action is sell
action_loss: -295.28179931640625, Q_loss : 16.047176361083984
reward is 6.00, reward sum is 83.70, time is 39.0, close is 28.86

705/100000
action_prob: [0.2122704  0.575339   0.21239062], action is buy
action_loss: -295.6718444824219, Q_loss : -24.956714630126953
reward is 10.00, reward sum is 93.70, time is 40.0, close is 28.96

706/100000
action_prob: [0.21226943 0.5753409  0.21238957], 

action_loss: -309.71368408203125, Q_loss : 13.320866584777832
reward is 4.00, reward sum is 113.52, time is 27.0, close is 29.19

742/100000
action_prob: [0.21223746 0.5754074  0.21235517], action is sell
action_loss: -310.10369873046875, Q_loss : -1.7074246406555176
reward is 1.00, reward sum is 114.52, time is 28.0, close is 29.2

743/100000
action_prob: [0.21223664 0.5754091  0.21235427], action is buy
action_loss: -310.4937744140625, Q_loss : -9.104934692382812
reward is 5.00, reward sum is 119.52, time is 29.0, close is 29.25

744/100000
action_prob: [0.2122358  0.5754108  0.21235335], action is buy
action_loss: -310.8838195800781, Q_loss : 42.69317626953125
reward is 3.00, reward sum is 122.52, time is 30.0, close is 29.28

745/100000
action_prob: [0.21223502 0.5754125  0.21235247], action is sell
action_loss: -311.2738342285156, Q_loss : 41.887264251708984
reward is 0.00, reward sum is 122.52, time is 31.0, close is 29.28

746/100000
action_prob: [0.2122342  0.5754142  0.2123515

action_loss: -325.7054138183594, Q_loss : -23.257064819335938
reward is -11.00, reward sum is 165.35, time is 19.0, close is 29.94

783/100000
action_prob: [0.21220641 0.57547295 0.2123207 ], action is hold
action_loss: -326.095458984375, Q_loss : -8.856355667114258
reward is 12.00, reward sum is 177.35, time is 20.0, close is 30.06

784/100000
action_prob: [0.21220571 0.5754744  0.2123199 ], action is sell
action_loss: -326.48553466796875, Q_loss : 96.18714904785156
reward is 2.00, reward sum is 179.35, time is 21.0, close is 30.08

785/100000
action_prob: [0.21220501 0.5754759  0.21231912], action is sell
action_loss: -326.87554931640625, Q_loss : 9.73126220703125
reward is -5.00, reward sum is 174.35, time is 22.0, close is 30.03

786/100000
action_prob: [0.21220432 0.57547736 0.21231835], action is sell
action_loss: -327.26556396484375, Q_loss : 4.727333068847656
reward is -8.00, reward sum is 166.35, time is 23.0, close is 29.95

787/100000
action_prob: [0.21220364 0.5754788  0.21

action_loss: -341.6968994140625, Q_loss : -23.416976928710938
reward is 0.00, reward sum is 273.16, time is 11.0, close is 30.92

824/100000
action_prob: [0.21217996 0.5755296  0.21229047], action is sell
action_loss: -342.0869445800781, Q_loss : -19.420867919921875
reward is -11.00, reward sum is 262.16, time is 12.0, close is 30.81

825/100000
action_prob: [0.21217936 0.5755309  0.21228977], action is buy
action_loss: -342.4769287109375, Q_loss : 12.575241088867188
reward is 5.00, reward sum is 267.16, time is 13.0, close is 30.86

826/100000
action_prob: [0.21217878 0.57553214 0.21228907], action is sell
action_loss: -342.866943359375, Q_loss : 46.38995361328125
reward is 4.00, reward sum is 271.16, time is 14.0, close is 30.9

827/100000
action_prob: [0.21217817 0.57553345 0.2122884 ], action is sell
action_loss: -343.25701904296875, Q_loss : -4.004979133605957
reward is -6.00, reward sum is 265.16, time is 15.0, close is 30.84

828/100000
action_prob: [0.21217762 0.5755347  0.2122

action_loss: -357.68817138671875, Q_loss : 9.423110961914062
reward is 11.00, reward sum is 360.92, time is 3.0, close is 32.05

865/100000
action_prob: [0.21215726 0.5755789  0.21226381], action is hold
action_loss: -358.0782470703125, Q_loss : -60.580780029296875
reward is 45.00, reward sum is 405.92, time is 4.0, close is 32.5

866/100000
action_prob: [0.21215674 0.57558006 0.21226317], action is sell
action_loss: -358.46826171875, Q_loss : 38.41532897949219
reward is -5.00, reward sum is 400.92, time is 5.0, close is 32.45

867/100000
action_prob: [0.21215624 0.5755812  0.21226259], action is sell
action_loss: -358.85833740234375, Q_loss : -24.58856201171875
reward is -16.00, reward sum is 384.92, time is 6.0, close is 32.29

868/100000
action_prob: [0.21215573 0.5755823  0.21226197], action is sell
action_loss: -359.2483825683594, Q_loss : 26.407508850097656
reward is -17.00, reward sum is 367.92, time is 7.0, close is 32.12

869/100000
action_prob: [0.21215524 0.57558346 0.212261

action_loss: -373.679443359375, Q_loss : -14.736801147460938
reward is -1.00, reward sum is 399.92, time is 44.0, close is 32.44

906/100000
action_prob: [0.21213761 0.57562226 0.21224011], action is sell
action_loss: -374.0694580078125, Q_loss : -16.740692138671875
reward is -26.00, reward sum is 373.92, time is 45.0, close is 32.18

907/100000
action_prob: [0.21213718 0.5756232  0.21223958], action is hold
action_loss: -374.45947265625, Q_loss : 29.255416870117188
reward is 24.00, reward sum is 397.92, time is 46.0, close is 32.42

908/100000
action_prob: [0.21213673 0.5756242  0.21223904], action is sell
action_loss: -374.8494873046875, Q_loss : 44.66552734375
reward is 0.00, reward sum is 397.92, time is 47.0, close is 32.42

909/100000
action_prob: [0.2121363 0.5756252 0.2122385], action is hold
action_loss: -375.23956298828125, Q_loss : -10.347803115844727
reward is 4.00, reward sum is 401.92, time is 48.0, close is 32.46

910/100000
action_prob: [0.21213585 0.57562613 0.21223795

action_loss: -389.6705322265625, Q_loss : 12.103286743164062
reward is -1.00, reward sum is 472.63, time is 36.0, close is 33.68

947/100000
action_prob: [0.21212055 0.57566047 0.21221901], action is sell
action_loss: -390.0605773925781, Q_loss : -39.900604248046875
reward is 7.00, reward sum is 479.63, time is 37.0, close is 33.75

948/100000
action_prob: [0.21212015 0.5756613  0.21221854], action is sell
action_loss: -390.4505920410156, Q_loss : -33.48649597167969
reward is -9.00, reward sum is 470.63, time is 38.0, close is 33.66

949/100000
action_prob: [0.21211976 0.5756622  0.21221805], action is sell
action_loss: -390.8406066894531, Q_loss : 90.51161193847656
reward is 4.00, reward sum is 474.63, time is 39.0, close is 33.7

950/100000
action_prob: [0.21211939 0.5756631  0.21221757], action is buy
action_loss: -391.23065185546875, Q_loss : 57.087684631347656
reward is 5.00, reward sum is 479.63, time is 40.0, close is 33.75

951/100000
action_prob: [0.212119   0.5756639  0.21221

action_loss: -405.66143798828125, Q_loss : 19.943374633789062
reward is 22.00, reward sum is 557.28, time is 28.0, close is 34.93

988/100000
action_prob: [0.21210554 0.5756943  0.21220008], action is buy
action_loss: -406.0514831542969, Q_loss : 19.939483642578125
reward is -19.00, reward sum is 538.28, time is 29.0, close is 34.74

989/100000
action_prob: [0.2121052  0.57569516 0.21219963], action is hold
action_loss: -406.4414978027344, Q_loss : -44.62260818481445
reward is -2.00, reward sum is 536.28, time is 30.0, close is 34.72

990/100000
action_prob: [0.21210487 0.57569593 0.21219923], action is buy
action_loss: -406.8315124511719, Q_loss : -25.644298553466797
reward is -16.00, reward sum is 520.28, time is 31.0, close is 34.56

991/100000
action_prob: [0.21210453 0.57569665 0.21219876], action is sell
action_loss: -407.2215576171875, Q_loss : -8.072227478027344
reward is -6.00, reward sum is 514.28, time is 32.0, close is 34.5

992/100000
action_prob: [0.21210419 0.5756974  0.

action_loss: -421.26220703125, Q_loss : 26.787391662597656
reward is 5.98, reward sum is 498.44, time is 19.0, close is 34.5744

1028/100000
action_prob: [0.21209265 0.5757238  0.21218346], action is hold
action_loss: -421.6522521972656, Q_loss : 38.177459716796875
reward is 3.99, reward sum is 502.43, time is 20.0, close is 34.6143

1029/100000
action_prob: [0.21209237 0.5757246  0.21218309], action is buy
action_loss: -422.042236328125, Q_loss : 1.2145709991455078
reward is -12.95, reward sum is 489.48, time is 21.0, close is 34.4848

1030/100000
action_prob: [0.21209207 0.57572526 0.21218267], action is buy
action_loss: -422.43231201171875, Q_loss : 26.099332809448242
reward is -12.95, reward sum is 476.53, time is 22.0, close is 34.3553

1031/100000
action_prob: [0.21209177 0.575726   0.21218227], action is hold
action_loss: -422.8222961425781, Q_loss : 82.15919494628906
reward is -11.96, reward sum is 464.57, time is 23.0, close is 34.2357

1032/100000
action_prob: [0.21209146 0.5

action_loss: -436.86279296875, Q_loss : -77.67501831054688
reward is 11.96, reward sum is 415.40, time is 10.0, close is 33.7676

1068/100000
action_prob: [0.21208121 0.57575035 0.21216838], action is buy
action_loss: -437.2528381347656, Q_loss : 36.04547882080078
reward is -15.94, reward sum is 399.46, time is 11.0, close is 33.6082

1069/100000
action_prob: [0.21208097 0.57575107 0.21216805], action is sell
action_loss: -437.642822265625, Q_loss : -8.376411437988281
reward is 5.98, reward sum is 405.44, time is 12.0, close is 33.668

1070/100000
action_prob: [0.21208069 0.5757516  0.2121677 ], action is sell
action_loss: -438.0328674316406, Q_loss : 54.619659423828125
reward is 0.99, reward sum is 406.43, time is 13.0, close is 33.6779

1071/100000
action_prob: [0.2120804  0.57575226 0.21216731], action is buy
action_loss: -438.42291259765625, Q_loss : -4.03823184967041
reward is -9.96, reward sum is 396.47, time is 14.0, close is 33.5783

1072/100000
action_prob: [0.21208015 0.57575

action_loss: -452.46343994140625, Q_loss : -30.494613647460938
reward is 1.00, reward sum is 446.28, time is 1.0, close is 34.0764

1108/100000
action_prob: [0.21207099 0.5757743  0.2121547 ], action is sell
action_loss: -452.8533935546875, Q_loss : -75.2025375366211
reward is -30.56, reward sum is 415.72, time is 2.0, close is 33.7775

1109/100000
action_prob: [0.21207075 0.57577485 0.21215437], action is sell
action_loss: -453.243408203125, Q_loss : -60.11723327636719
reward is 0.00, reward sum is 415.72, time is 3.0, close is 34.0465

1110/100000
action_prob: [0.21207052 0.57577544 0.21215405], action is sell
action_loss: -453.6334533691406, Q_loss : -20.456323623657227
reward is 0.00, reward sum is 415.72, time is 4.0, close is 33.9668

1111/100000
action_prob: [0.21207026 0.575776   0.2121537 ], action is sell
action_loss: -454.0234680175781, Q_loss : -8.540214538574219
reward is 0.00, reward sum is 415.72, time is 5.0, close is 33.8273

1112/100000
action_prob: [0.21207005 0.5757

action_loss: -468.0639343261719, Q_loss : 42.656768798828125
reward is 3.99, reward sum is 417.03, time is 41.0, close is 33.4588

1148/100000
action_prob: [0.21206184 0.57579595 0.21214223], action is sell
action_loss: -468.4539794921875, Q_loss : -16.744525909423828
reward is 10.96, reward sum is 427.99, time is 42.0, close is 33.5684

1149/100000
action_prob: [0.21206164 0.5757964  0.21214192], action is sell
action_loss: -468.843994140625, Q_loss : -77.55845642089844
reward is 20.91, reward sum is 448.90, time is 43.0, close is 33.7775

1150/100000
action_prob: [0.21206142 0.57579696 0.21214162], action is hold
action_loss: -469.2339782714844, Q_loss : 110.60365295410156
reward is -18.92, reward sum is 429.98, time is 44.0, close is 33.5883

1151/100000
action_prob: [0.2120612  0.5757975  0.21214132], action is sell
action_loss: -469.6240234375, Q_loss : 31.213764190673828
reward is 7.97, reward sum is 437.95, time is 45.0, close is 33.668

1152/100000
action_prob: [0.21206099 0.57

action_loss: -483.66448974609375, Q_loss : -25.82665252685547
reward is -5.98, reward sum is 523.26, time is 32.0, close is 34.5146

1188/100000
action_prob: [0.2120536  0.57581556 0.21213077], action is sell
action_loss: -484.0545349121094, Q_loss : 44.039451599121094
reward is 24.91, reward sum is 548.17, time is 33.0, close is 34.7637

1189/100000
action_prob: [0.2120534  0.57581604 0.21213052], action is hold
action_loss: -484.4444580078125, Q_loss : 67.14556121826172
reward is 0.99, reward sum is 549.16, time is 34.0, close is 34.7736

1190/100000
action_prob: [0.21205322 0.5758165  0.21213025], action is hold
action_loss: -484.83453369140625, Q_loss : 45.15167236328125
reward is 16.94, reward sum is 566.10, time is 35.0, close is 34.943000000000005

1191/100000
action_prob: [0.21205303 0.57581705 0.21212995], action is sell
action_loss: -485.2245788574219, Q_loss : 8.127742767333984
reward is -15.94, reward sum is 550.16, time is 36.0, close is 34.7836

1192/100000
action_prob: [

action_loss: -499.2650146484375, Q_loss : 50.81175994873047
reward is -8.97, reward sum is 607.55, time is 23.0, close is 35.8195

1228/100000
action_prob: [0.21204618 0.5758335  0.2121203 ], action is hold
action_loss: -499.655029296875, Q_loss : -7.01652717590332
reward is 9.96, reward sum is 617.51, time is 24.0, close is 35.9191

1229/100000
action_prob: [0.21204598 0.575834   0.21212006], action is sell
action_loss: -500.0450134277344, Q_loss : 61.80954360961914
reward is 8.97, reward sum is 626.48, time is 25.0, close is 36.0088

1230/100000
action_prob: [0.21204582 0.5758344  0.21211979], action is buy
action_loss: -500.4350280761719, Q_loss : -10.552349090576172
reward is -26.90, reward sum is 599.58, time is 26.0, close is 35.7398

1231/100000
action_prob: [0.21204565 0.5758348  0.21211955], action is sell
action_loss: -500.82513427734375, Q_loss : 18.308759689331055
reward is -7.96, reward sum is 591.62, time is 27.0, close is 35.6602

1232/100000
action_prob: [0.21204546 0.5

action_loss: -514.8653564453125, Q_loss : -88.05866241455078
reward is 0.00, reward sum is 589.90, time is 14.0, close is 34.8932

1268/100000
action_prob: [0.21203944 0.5758499  0.21211067], action is sell
action_loss: -515.25537109375, Q_loss : -10.21255111694336
reward is 0.00, reward sum is 589.90, time is 15.0, close is 34.9131

1269/100000
action_prob: [0.21203928 0.5758503  0.21211044], action is buy
action_loss: -515.6453247070312, Q_loss : 31.13855743408203
reward is -0.71, reward sum is 589.19, time is 16.0, close is 34.9131

1270/100000
action_prob: [0.21203911 0.57585067 0.21211019], action is buy
action_loss: -516.035400390625, Q_loss : 44.95966339111328
reward is -5.98, reward sum is 583.21, time is 17.0, close is 34.8533

1271/100000
action_prob: [0.21203898 0.5758511  0.21210998], action is buy
action_loss: -516.4254150390625, Q_loss : -27.164260864257812
reward is -10.95, reward sum is 572.26, time is 18.0, close is 34.7438

1272/100000
action_prob: [0.2120388  0.57585

action_loss: -530.465576171875, Q_loss : 25.675357818603516
reward is 26.90, reward sum is 403.54, time is 5.0, close is 33.2297

1308/100000
action_prob: [0.21203333 0.5758649  0.21210177], action is hold
action_loss: -530.8555908203125, Q_loss : 8.701467514038086
reward is -5.98, reward sum is 397.56, time is 6.0, close is 33.1699

1309/100000
action_prob: [0.21203317 0.5758652  0.21210153], action is sell
action_loss: -531.2454833984375, Q_loss : 70.3775405883789
reward is -9.96, reward sum is 387.60, time is 7.0, close is 33.0703

1310/100000
action_prob: [0.21203302 0.57586557 0.21210134], action is buy
action_loss: -531.6355590820312, Q_loss : 41.703651428222656
reward is -0.99, reward sum is 386.61, time is 8.0, close is 33.0604

1311/100000
action_prob: [0.21203288 0.5758659  0.21210113], action is hold
action_loss: -532.0255737304688, Q_loss : 39.108154296875
reward is -13.95, reward sum is 372.66, time is 9.0, close is 32.9209

1312/100000
action_prob: [0.21203274 0.57586634 

action_loss: -546.0655517578125, Q_loss : -39.53666305541992
reward is 27.89, reward sum is 297.95, time is 45.0, close is 32.1738

1348/100000
action_prob: [0.21202774 0.5758787  0.21209356], action is sell
action_loss: -546.4556274414062, Q_loss : -17.374553680419922
reward is -11.95, reward sum is 286.00, time is 46.0, close is 32.0543

1349/100000
action_prob: [0.21202762 0.57587904 0.21209337], action is sell
action_loss: -546.8455810546875, Q_loss : -19.148418426513672
reward is -2.99, reward sum is 283.01, time is 47.0, close is 32.0244

1350/100000
action_prob: [0.21202746 0.5758794  0.21209316], action is sell
action_loss: -547.2356567382812, Q_loss : 11.777463912963867
reward is 11.95, reward sum is 294.96, time is 48.0, close is 32.1439

1351/100000
action_prob: [0.21202734 0.5758797  0.21209297], action is hold
action_loss: -547.6256103515625, Q_loss : 74.7047348022461
reward is 33.87, reward sum is 328.83, time is 0.0, close is 32.4826

1352/100000
action_prob: [0.21202719

action_loss: -561.6655883789062, Q_loss : 10.393355369567871
reward is 5.98, reward sum is 303.61, time is 36.0, close is 32.134

1388/100000
action_prob: [0.21202262 0.5758914  0.21208593], action is sell
action_loss: -562.0556640625, Q_loss : 11.28946304321289
reward is 3.98, reward sum is 307.59, time is 37.0, close is 32.1738

1389/100000
action_prob: [0.21202251 0.57589173 0.21208575], action is buy
action_loss: -562.4456176757812, Q_loss : -23.380691528320312
reward is 29.89, reward sum is 337.48, time is 38.0, close is 32.4727

1390/100000
action_prob: [0.21202241 0.57589203 0.21208559], action is sell
action_loss: -562.835693359375, Q_loss : -25.598358154296875
reward is -7.97, reward sum is 329.51, time is 39.0, close is 32.393

1391/100000
action_prob: [0.21202224 0.5758923  0.2120854 ], action is sell
action_loss: -563.2256469726562, Q_loss : -56.902244567871094
reward is -11.96, reward sum is 317.55, time is 40.0, close is 32.2734

1392/100000
action_prob: [0.21202214 0.575

action_loss: -577.2654418945312, Q_loss : -9.892667770385742
reward is -16.93, reward sum is 311.28, time is 27.0, close is 32.5723

1428/100000
action_prob: [0.21201797 0.5759031  0.21207885], action is sell
action_loss: -577.6554565429688, Q_loss : 18.609439849853516
reward is 0.99, reward sum is 312.27, time is 28.0, close is 32.5822

1429/100000
action_prob: [0.21201785 0.5759034  0.2120787 ], action is sell
action_loss: -578.0454711914062, Q_loss : 10.634550094604492
reward is -4.98, reward sum is 307.29, time is 29.0, close is 32.5324

1430/100000
action_prob: [0.21201776 0.5759037  0.21207857], action is sell
action_loss: -578.4354248046875, Q_loss : 42.18566131591797
reward is 2.99, reward sum is 310.28, time is 30.0, close is 32.5623

1431/100000
action_prob: [0.21201764 0.57590395 0.21207836], action is buy
action_loss: -578.825439453125, Q_loss : -36.25960159301758
reward is 1.00, reward sum is 311.28, time is 31.0, close is 32.5723

1432/100000
action_prob: [0.21201754 0.57

action_loss: -592.865234375, Q_loss : 0.3863525390625
reward is 29.89, reward sum is 345.83, time is 18.0, close is 32.7516

1468/100000
action_prob: [0.21201368 0.575914   0.21207233], action is sell
action_loss: -593.2551879882812, Q_loss : 25.037458419799805
reward is -1.00, reward sum is 344.83, time is 19.0, close is 32.7416

1469/100000
action_prob: [0.21201359 0.57591426 0.21207215], action is sell
action_loss: -593.6451416015625, Q_loss : 45.422569274902344
reward is 37.85, reward sum is 382.68, time is 20.0, close is 33.1201

1470/100000
action_prob: [0.21201345 0.5759145  0.212072  ], action is sell
action_loss: -594.03515625, Q_loss : 52.323638916015625
reward is 0.00, reward sum is 382.68, time is 21.0, close is 33.1201

1471/100000
action_prob: [0.21201336 0.5759148  0.21207185], action is hold
action_loss: -594.4251708984375, Q_loss : 18.06574821472168
reward is -6.97, reward sum is 375.71, time is 22.0, close is 33.0504

1472/100000
action_prob: [0.21201329 0.5759151  0.

action_loss: -608.4647827148438, Q_loss : -36.557228088378906
reward is -12.95, reward sum is 378.39, time is 9.0, close is 32.9707

1508/100000
action_prob: [0.21200971 0.5759241  0.21206619], action is buy
action_loss: -608.8547973632812, Q_loss : -49.04852294921875
reward is 3.98, reward sum is 382.37, time is 10.0, close is 33.0105

1509/100000
action_prob: [0.21200962 0.57592434 0.21206604], action is buy
action_loss: -609.2448120117188, Q_loss : 34.23450469970703
reward is 4.99, reward sum is 387.36, time is 11.0, close is 33.0604

1510/100000
action_prob: [0.21200952 0.57592463 0.21206588], action is sell
action_loss: -609.634765625, Q_loss : 54.34765625
reward is 5.97, reward sum is 393.33, time is 12.0, close is 33.1201

1511/100000
action_prob: [0.21200944 0.5759249  0.21206574], action is hold
action_loss: -610.0247192382812, Q_loss : -54.10023498535156
reward is -8.96, reward sum is 384.37, time is 13.0, close is 33.0305

1512/100000
action_prob: [0.21200933 0.57592505 0.21

action_loss: -624.0643310546875, Q_loss : -9.815653800964355
reward is 50.81, reward sum is 346.52, time is 0.0, close is 32.652

1548/100000
action_prob: [0.21200606 0.57593346 0.21206047], action is buy
action_loss: -624.4544067382812, Q_loss : 7.775453567504883
reward is 1.99, reward sum is 348.51, time is 1.0, close is 32.6719

1549/100000
action_prob: [0.21200597 0.5759337  0.21206033], action is sell
action_loss: -624.8444213867188, Q_loss : -34.1584358215332
reward is -8.64, reward sum is 339.87, time is 2.0, close is 32.5922

1550/100000
action_prob: [0.2120059  0.5759339  0.21206018], action is buy
action_loss: -625.234375, Q_loss : -52.142330169677734
reward is -0.65, reward sum is 339.22, time is 3.0, close is 32.383

1551/100000
action_prob: [0.21200581 0.5759342  0.21206005], action is hold
action_loss: -625.6243286132812, Q_loss : -46.2862548828125
reward is 8.97, reward sum is 348.19, time is 4.0, close is 32.4727

1552/100000
action_prob: [0.2120057  0.57593435 0.212059

action_loss: -639.6640625, Q_loss : 38.49336242675781
reward is 3.99, reward sum is 376.08, time is 40.0, close is 32.7516

1588/100000
action_prob: [0.2120027  0.57594216 0.21205512], action is sell
action_loss: -640.0540161132812, Q_loss : 32.559471130371094
reward is -18.93, reward sum is 357.15, time is 41.0, close is 32.5623

1589/100000
action_prob: [0.21200262 0.57594246 0.21205497], action is buy
action_loss: -640.4440307617188, Q_loss : 63.495582580566406
reward is 1.00, reward sum is 358.15, time is 42.0, close is 32.5723

1590/100000
action_prob: [0.21200252 0.57594264 0.21205485], action is sell
action_loss: -640.8340454101562, Q_loss : -12.398347854614258
reward is 29.88, reward sum is 388.03, time is 43.0, close is 32.8711

1591/100000
action_prob: [0.21200244 0.5759428  0.21205474], action is sell
action_loss: -641.2239990234375, Q_loss : 78.27775573730469
reward is 53.79, reward sum is 441.82, time is 44.0, close is 33.409

1592/100000
action_prob: [0.21200235 0.5759430

action_loss: -655.263671875, Q_loss : -35.542659759521484
reward is 23.90, reward sum is 438.51, time is 31.0, close is 33.2695

1628/100000
action_prob: [0.21199957 0.5759503  0.21205011], action is hold
action_loss: -655.6537475585938, Q_loss : -3.52655029296875
reward is -4.98, reward sum is 433.53, time is 32.0, close is 33.2197

1629/100000
action_prob: [0.21199948 0.57595056 0.21204998], action is sell
action_loss: -656.043701171875, Q_loss : -15.60044002532959
reward is 9.96, reward sum is 443.49, time is 33.0, close is 33.3193

1630/100000
action_prob: [0.21199942 0.57595074 0.21204987], action is buy
action_loss: -656.4337158203125, Q_loss : 5.393668174743652
reward is 4.98, reward sum is 448.47, time is 34.0, close is 33.3691

1631/100000
action_prob: [0.21199934 0.5759509  0.21204974], action is hold
action_loss: -656.82373046875, Q_loss : 12.717144012451172
reward is 19.93, reward sum is 468.40, time is 35.0, close is 33.5684

1632/100000
action_prob: [0.21199927 0.5759511 

action_loss: -670.8633422851562, Q_loss : 69.15139770507812
reward is -20.92, reward sum is 359.50, time is 22.0, close is 32.0742

1668/100000
action_prob: [0.21199666 0.5759579  0.2120454 ], action is sell
action_loss: -671.25341796875, Q_loss : 66.52047729492188
reward is 9.96, reward sum is 369.46, time is 23.0, close is 32.1738

1669/100000
action_prob: [0.21199659 0.5759581  0.21204531], action is sell
action_loss: -671.6433715820312, Q_loss : -8.481697082519531
reward is 8.97, reward sum is 378.43, time is 24.0, close is 32.2635

1670/100000
action_prob: [0.21199653 0.57595825 0.2120452 ], action is hold
action_loss: -672.0333862304688, Q_loss : -73.71835327148438
reward is 18.92, reward sum is 397.35, time is 25.0, close is 32.4527

1671/100000
action_prob: [0.21199644 0.5759585  0.21204507], action is buy
action_loss: -672.4234008789062, Q_loss : 190.08453369140625
reward is -7.97, reward sum is 389.38, time is 26.0, close is 32.373000000000005

1672/100000
action_prob: [0.211

action_loss: -686.463134765625, Q_loss : 0.15537738800048828
reward is 12.95, reward sum is 290.49, time is 13.0, close is 30.7793

1708/100000
action_prob: [0.21199398 0.57596505 0.21204099], action is hold
action_loss: -686.8531494140625, Q_loss : 10.351285934448242
reward is 8.96, reward sum is 299.45, time is 14.0, close is 30.8689

1709/100000
action_prob: [0.21199387 0.5759652  0.21204089], action is sell
action_loss: -687.2431640625, Q_loss : -1.5254459381103516
reward is -9.96, reward sum is 289.49, time is 15.0, close is 30.7693

1710/100000
action_prob: [0.21199381 0.5759654  0.2120408 ], action is sell
action_loss: -687.6331787109375, Q_loss : -72.4776611328125
reward is -18.92, reward sum is 270.57, time is 16.0, close is 30.5801

1711/100000
action_prob: [0.21199375 0.5759656  0.21204068], action is sell
action_loss: -688.0231323242188, Q_loss : 24.139812469482422
reward is 19.92, reward sum is 290.49, time is 17.0, close is 30.7793

1712/100000
action_prob: [0.21199368 0.

action_loss: -702.062744140625, Q_loss : -19.976234436035156
reward is -1.00, reward sum is 217.56, time is 4.0, close is 29.8529

1748/100000
action_prob: [0.21199144 0.5759717  0.21203683], action is sell
action_loss: -702.4527587890625, Q_loss : 21.935462951660156
reward is -7.97, reward sum is 209.59, time is 5.0, close is 29.7732

1749/100000
action_prob: [0.21199138 0.5759719  0.21203674], action is sell
action_loss: -702.8427734375, Q_loss : -41.517826080322266
reward is 1.00, reward sum is 210.59, time is 6.0, close is 29.7832

1750/100000
action_prob: [0.21199131 0.57597196 0.21203665], action is sell
action_loss: -703.23291015625, Q_loss : -15.922317504882812
reward is 24.90, reward sum is 235.49, time is 7.0, close is 30.0322

1751/100000
action_prob: [0.21199125 0.5759722  0.21203655], action is buy
action_loss: -703.6227416992188, Q_loss : -71.94621276855469
reward is -24.90, reward sum is 210.59, time is 8.0, close is 29.7832

1752/100000
action_prob: [0.21199119 0.575972

action_loss: -717.6622924804688, Q_loss : -3.8906641006469727
reward is 5.97, reward sum is 245.45, time is 44.0, close is 30.1318

1788/100000
action_prob: [0.21198909 0.575978   0.21203294], action is sell
action_loss: -718.0524291992188, Q_loss : -33.61867904663086
reward is 9.96, reward sum is 255.41, time is 45.0, close is 30.2314

1789/100000
action_prob: [0.21198905 0.57597816 0.21203285], action is buy
action_loss: -718.4423828125, Q_loss : -7.637447357177734
reward is -2.98, reward sum is 252.43, time is 46.0, close is 30.2016

1790/100000
action_prob: [0.21198899 0.5759783  0.21203274], action is sell
action_loss: -718.8323974609375, Q_loss : -62.078338623046875
reward is -12.95, reward sum is 239.48, time is 47.0, close is 30.0721

1791/100000
action_prob: [0.21198894 0.5759784  0.21203266], action is sell
action_loss: -719.2222900390625, Q_loss : 29.09136962890625
reward is -3.99, reward sum is 235.49, time is 48.0, close is 30.0322

1792/100000
action_prob: [0.21198887 0.5

action_loss: -733.2620239257812, Q_loss : -0.37261199951171875
reward is 0.00, reward sum is 491.27, time is 35.0, close is 33.0404

1828/100000
action_prob: [0.2119869  0.5759838  0.21202926], action is hold
action_loss: -733.6520385742188, Q_loss : 42.87249755859375
reward is 0.00, reward sum is 491.27, time is 36.0, close is 33.0404

1829/100000
action_prob: [0.21198684 0.575984   0.21202917], action is sell
action_loss: -734.0419921875, Q_loss : -15.260393142700195
reward is 0.00, reward sum is 491.27, time is 37.0, close is 33.0404

1830/100000
action_prob: [0.2119868  0.5759842  0.21202908], action is sell
action_loss: -734.4320678710938, Q_loss : -9.39428424835205
reward is 0.00, reward sum is 491.27, time is 38.0, close is 33.0404

1831/100000
action_prob: [0.21198675 0.5759843  0.212029  ], action is buy
action_loss: -734.822021484375, Q_loss : -83.93994140625
reward is 0.00, reward sum is 491.27, time is 39.0, close is 33.0404

1832/100000
action_prob: [0.21198669 0.5759844  

action_loss: -748.8616333007812, Q_loss : -43.388633728027344
reward is 15.94, reward sum is 639.37, time is 26.0, close is 34.5744

1868/100000
action_prob: [0.21198486 0.57598937 0.21202578], action is sell
action_loss: -749.2517700195312, Q_loss : -12.522523880004883
reward is -30.88, reward sum is 608.49, time is 27.0, close is 34.2656

1869/100000
action_prob: [0.21198478 0.57598954 0.21202569], action is sell
action_loss: -749.6417236328125, Q_loss : -26.041414260864258
reward is 10.96, reward sum is 619.45, time is 28.0, close is 34.3752

1870/100000
action_prob: [0.21198474 0.57598966 0.21202561], action is sell
action_loss: -750.03173828125, Q_loss : 14.892909049987793
reward is 10.96, reward sum is 630.41, time is 29.0, close is 34.4848

1871/100000
action_prob: [0.21198468 0.5759898  0.21202552], action is hold
action_loss: -750.421630859375, Q_loss : -48.514198303222656
reward is -33.87, reward sum is 596.54, time is 30.0, close is 34.1461

1872/100000
action_prob: [0.21198

action_loss: -764.4613647460938, Q_loss : 6.405426025390625
reward is 28.89, reward sum is 595.19, time is 17.0, close is 34.3553

1908/100000
action_prob: [0.21198292 0.5759946  0.21202248], action is sell
action_loss: -764.8513793945312, Q_loss : 5.391456604003906
reward is -6.98, reward sum is 588.21, time is 18.0, close is 34.2855

1909/100000
action_prob: [0.21198286 0.5759947  0.2120224 ], action is sell
action_loss: -765.2413330078125, Q_loss : 10.287565231323242
reward is 6.98, reward sum is 595.19, time is 19.0, close is 34.3553

1910/100000
action_prob: [0.21198282 0.57599485 0.21202233], action is sell
action_loss: -765.6314086914062, Q_loss : -42.66632843017578
reward is -18.93, reward sum is 576.26, time is 20.0, close is 34.166

1911/100000
action_prob: [0.2119828  0.57599497 0.21202226], action is sell
action_loss: -766.0213623046875, Q_loss : -82.5902099609375
reward is -8.96, reward sum is 567.30, time is 21.0, close is 34.0764

1912/100000
action_prob: [0.21198274 0.5

action_loss: -780.0609741210938, Q_loss : -55.34499740600586
reward is -0.69, reward sum is 594.82, time is 8.0, close is 34.8135

1948/100000
action_prob: [0.2119811  0.5759995  0.21201938], action is sell
action_loss: -780.451171875, Q_loss : -17.67448616027832
reward is -6.97, reward sum is 587.85, time is 9.0, close is 34.7438

1949/100000
action_prob: [0.21198109 0.5759996  0.21201931], action is hold
action_loss: -780.8411254882812, Q_loss : -7.878377914428711
reward is 0.99, reward sum is 588.84, time is 10.0, close is 34.7537

1950/100000
action_prob: [0.21198104 0.57599974 0.21201923], action is sell
action_loss: -781.2310791015625, Q_loss : 41.62732696533203
reward is -18.92, reward sum is 569.92, time is 11.0, close is 34.5645

1951/100000
action_prob: [0.21198098 0.57599986 0.21201915], action is sell
action_loss: -781.6210327148438, Q_loss : -113.69623565673828
reward is 4.98, reward sum is 574.90, time is 12.0, close is 34.6143

1952/100000
action_prob: [0.21198094 0.576 

action_loss: -795.660888671875, Q_loss : -7.016619682312012
reward is -8.96, reward sum is 393.61, time is 48.0, close is 32.8014

1988/100000
action_prob: [0.21197942 0.57600415 0.21201642], action is sell
action_loss: -796.0509033203125, Q_loss : -32.48075866699219
reward is -42.84, reward sum is 350.77, time is 0.0, close is 32.373000000000005

1989/100000
action_prob: [0.21197939 0.57600427 0.21201637], action is hold
action_loss: -796.4407958984375, Q_loss : -28.91440200805664
reward is -33.86, reward sum is 316.91, time is 1.0, close is 32.0344

1990/100000
action_prob: [0.21197933 0.5760043  0.21201628], action is sell
action_loss: -796.8309326171875, Q_loss : 32.31970977783203
reward is -23.61, reward sum is 293.30, time is 2.0, close is 31.8053

1991/100000
action_prob: [0.21197931 0.5760045  0.21201622], action is sell
action_loss: -797.2208251953125, Q_loss : 3.482415199279785
reward is 0.00, reward sum is 293.30, time is 3.0, close is 32.0045

1992/100000
action_prob: [0.21

action_loss: -811.260498046875, Q_loss : 63.02342987060547
reward is -1.99, reward sum is 385.30, time is 39.0, close is 33.3293

2028/100000
action_prob: [0.21197781 0.5760085  0.21201365], action is sell
action_loss: -811.6506958007812, Q_loss : -0.8125314712524414
reward is -12.95, reward sum is 372.35, time is 40.0, close is 33.1998

2029/100000
action_prob: [0.21197775 0.5760086  0.21201357], action is buy
action_loss: -812.0406494140625, Q_loss : -1.8419504165649414
reward is 4.98, reward sum is 377.33, time is 41.0, close is 33.2496

2030/100000
action_prob: [0.21197774 0.57600874 0.21201351], action is buy
action_loss: -812.4305419921875, Q_loss : -21.084314346313477
reward is 3.99, reward sum is 381.32, time is 42.0, close is 33.2895

2031/100000
action_prob: [0.21197769 0.57600886 0.21201344], action is buy
action_loss: -812.820556640625, Q_loss : -16.08820152282715
reward is 7.96, reward sum is 389.28, time is 43.0, close is 33.3691

2032/100000
action_prob: [0.21197768 0.57

action_loss: -826.8604125976562, Q_loss : 46.62141418457031
reward is -4.98, reward sum is 540.34, time is 30.0, close is 34.7637

2068/100000
action_prob: [0.21197632 0.57601273 0.21201098], action is hold
action_loss: -827.2504272460938, Q_loss : -148.89247131347656
reward is -1.99, reward sum is 538.35, time is 31.0, close is 34.7438

2069/100000
action_prob: [0.21197626 0.5760128  0.21201092], action is sell
action_loss: -827.640380859375, Q_loss : 46.60363006591797
reward is -37.86, reward sum is 500.49, time is 32.0, close is 34.3652

2070/100000
action_prob: [0.21197625 0.5760129  0.21201086], action is sell
action_loss: -828.0303955078125, Q_loss : -51.160255432128906
reward is 19.93, reward sum is 520.42, time is 33.0, close is 34.5645

2071/100000
action_prob: [0.2119762 0.576013  0.2120108], action is sell
action_loss: -828.42041015625, Q_loss : -42.25422668457031
reward is -9.97, reward sum is 510.45, time is 34.0, close is 34.4648

2072/100000
action_prob: [0.21197617 0.57

action_prob: [0.2119749  0.5760165  0.21200854], action is hold
action_loss: -842.460205078125, Q_loss : -8.464606285095215
reward is -19.92, reward sum is 468.24, time is 21.0, close is 33.9369

2108/100000
action_prob: [0.21197489 0.5760166  0.21200846], action is sell
action_loss: -842.8502807617188, Q_loss : -88.25849914550781
reward is 6.97, reward sum is 475.21, time is 22.0, close is 34.0066

2109/100000
action_prob: [0.21197486 0.5760167  0.21200842], action is sell
action_loss: -843.2402954101562, Q_loss : 50.39760971069336
reward is 18.93, reward sum is 494.14, time is 23.0, close is 34.1959

2110/100000
action_prob: [0.21197483 0.57601684 0.21200836], action is buy
action_loss: -843.6302490234375, Q_loss : 1.5537223815917969
reward is -2.99, reward sum is 491.15, time is 24.0, close is 34.166

2111/100000
action_prob: [0.21197477 0.5760169  0.21200827], action is hold
action_loss: -844.0201416015625, Q_loss : 16.559829711914062
reward is -4.98, reward sum is 486.17, time is 

action_loss: -858.06005859375, Q_loss : 6.329372406005859
reward is 1.00, reward sum is 620.26, time is 12.0, close is 36.2479

2148/100000
action_prob: [0.21197355 0.5760204  0.21200608], action is hold
action_loss: -858.4500122070312, Q_loss : 37.114784240722656
reward is 39.84, reward sum is 660.10, time is 13.0, close is 36.6463

2149/100000
action_prob: [0.21197349 0.5760205  0.212006  ], action is buy
action_loss: -858.8400268554688, Q_loss : 15.371591567993164
reward is -1.00, reward sum is 659.10, time is 14.0, close is 36.6363

2150/100000
action_prob: [0.21197349 0.5760206  0.21200596], action is sell
action_loss: -859.2300415039062, Q_loss : 3.3077011108398438
reward is -12.95, reward sum is 646.15, time is 15.0, close is 36.5068

2151/100000
action_prob: [0.21197344 0.57602066 0.2120059 ], action is hold
action_loss: -859.6200561523438, Q_loss : 4.2938079833984375
reward is 8.97, reward sum is 655.12, time is 16.0, close is 36.5965

2152/100000
action_prob: [0.2119734  0.57

action_loss: -873.6597900390625, Q_loss : -1.8365745544433594
reward is 0.00, reward sum is 543.83, time is 3.0, close is 35.4111

2188/100000
action_prob: [0.21197227 0.57602394 0.21200378], action is sell
action_loss: -874.0499877929688, Q_loss : 0.18953609466552734
reward is 0.00, reward sum is 543.83, time is 4.0, close is 35.4609

2189/100000
action_prob: [0.21197224 0.57602406 0.21200374], action is hold
action_loss: -874.43994140625, Q_loss : -2.3705801963806152
reward is 0.00, reward sum is 543.83, time is 5.0, close is 35.6602

2190/100000
action_prob: [0.2119722 0.5760241 0.2120037], action is sell
action_loss: -874.8297729492188, Q_loss : 58.12167739868164
reward is 0.00, reward sum is 543.83, time is 6.0, close is 35.71

2191/100000
action_prob: [0.21197218 0.5760242  0.2120036 ], action is sell
action_loss: -875.2198486328125, Q_loss : -2.28021240234375
reward is 0.00, reward sum is 543.83, time is 7.0, close is 35.5605

2192/100000
action_prob: [0.21197216 0.5760243  0.21

action_prob: [0.21197109 0.5760272  0.21200167], action is sell
action_loss: -889.259765625, Q_loss : -33.812591552734375
reward is 12.95, reward sum is 632.76, time is 43.0, close is 36.6463

2228/100000
action_prob: [0.21197106 0.57602733 0.21200162], action is buy
action_loss: -889.6497192382812, Q_loss : -25.816486358642578
reward is -9.96, reward sum is 622.80, time is 44.0, close is 36.5467

2229/100000
action_prob: [0.21197104 0.5760274  0.21200156], action is sell
action_loss: -890.0397338867188, Q_loss : -25.92037582397461
reward is 0.00, reward sum is 622.80, time is 45.0, close is 36.5467

2230/100000
action_prob: [0.21197103 0.57602745 0.21200152], action is buy
action_loss: -890.4296875, Q_loss : -75.75426483154297
reward is -20.92, reward sum is 601.88, time is 46.0, close is 36.3375

2231/100000
action_prob: [0.21197098 0.5760275  0.21200147], action is hold
action_loss: -890.8197631835938, Q_loss : -15.868156433105469
reward is 16.93, reward sum is 618.81, time is 47.0,

action_loss: -904.859619140625, Q_loss : -15.018613815307617
reward is -10.96, reward sum is 607.40, time is 34.0, close is 36.5566

2268/100000
action_prob: [0.21196994 0.5760305  0.21199954], action is hold
action_loss: -905.2496948242188, Q_loss : -57.86250305175781
reward is 2.00, reward sum is 609.40, time is 35.0, close is 36.5766

2269/100000
action_prob: [0.21196993 0.5760306  0.21199948], action is sell
action_loss: -905.6396484375, Q_loss : 37.02431106567383
reward is -2.99, reward sum is 606.41, time is 36.0, close is 36.5467

2270/100000
action_prob: [0.21196988 0.5760307  0.21199945], action is buy
action_loss: -906.0296630859375, Q_loss : -20.99028778076172
reward is 9.96, reward sum is 616.37, time is 37.0, close is 36.6463

2271/100000
action_prob: [0.21196988 0.57603073 0.2119994 ], action is sell
action_loss: -906.4196166992188, Q_loss : -21.044178009033203
reward is -5.98, reward sum is 610.39, time is 38.0, close is 36.5865

2272/100000
action_prob: [0.21196984 0.57

action_loss: -920.4595336914062, Q_loss : -40.58555603027344
reward is -5.98, reward sum is 584.02, time is 25.0, close is 36.4371

2308/100000
action_prob: [0.21196888 0.5760336  0.21199758], action is hold
action_loss: -920.8495483398438, Q_loss : -11.774955749511719
reward is 10.96, reward sum is 594.98, time is 26.0, close is 36.5467

2309/100000
action_prob: [0.21196882 0.57603365 0.21199751], action is sell
action_loss: -921.239501953125, Q_loss : -59.042415618896484
reward is -7.97, reward sum is 587.01, time is 27.0, close is 36.467

2310/100000
action_prob: [0.21196881 0.5760337  0.21199748], action is sell
action_loss: -921.6295166015625, Q_loss : -24.702308654785156
reward is 1.00, reward sum is 588.01, time is 28.0, close is 36.477

2311/100000
action_prob: [0.21196878 0.5760338  0.21199743], action is sell
action_loss: -922.0195922851562, Q_loss : -51.17020034790039
reward is -3.99, reward sum is 584.02, time is 29.0, close is 36.4371

2312/100000
action_prob: [0.21196876 

action_loss: -936.0594482421875, Q_loss : -9.330589294433594
reward is 6.98, reward sum is 573.59, time is 16.0, close is 36.3973

2348/100000
action_prob: [0.21196784 0.57603645 0.21199568], action is sell
action_loss: -936.4495239257812, Q_loss : -31.42447280883789
reward is -4.98, reward sum is 568.61, time is 17.0, close is 36.3475

2349/100000
action_prob: [0.21196781 0.5760365  0.21199565], action is buy
action_loss: -936.8394775390625, Q_loss : -16.029132843017578
reward is -8.97, reward sum is 559.64, time is 18.0, close is 36.2578

2350/100000
action_prob: [0.2119678 0.5760366 0.2119956], action is sell
action_loss: -937.2293701171875, Q_loss : -2.382251739501953
reward is 1.99, reward sum is 561.63, time is 19.0, close is 36.2777

2351/100000
action_prob: [0.21196778 0.5760367  0.21199556], action is sell
action_loss: -937.6195068359375, Q_loss : -0.41622018814086914
reward is 0.00, reward sum is 561.63, time is 20.0, close is 36.2777

2352/100000
action_prob: [0.21196775 0.5

action_loss: -951.659423828125, Q_loss : -19.526601791381836
reward is 9.96, reward sum is 633.87, time is 7.0, close is 36.9551

2388/100000
action_prob: [0.21196684 0.5760392  0.21199389], action is buy
action_loss: -952.04931640625, Q_loss : -45.33049011230469
reward is -12.95, reward sum is 620.92, time is 8.0, close is 36.8256

2389/100000
action_prob: [0.21196683 0.5760393  0.21199384], action is sell
action_loss: -952.4393310546875, Q_loss : -1.6743850708007812
reward is -23.91, reward sum is 597.01, time is 9.0, close is 36.5865

2390/100000
action_prob: [0.21196683 0.5760393  0.21199381], action is buy
action_loss: -952.8292846679688, Q_loss : 100.17172241210938
reward is 7.97, reward sum is 604.98, time is 10.0, close is 36.6662

2391/100000
action_prob: [0.2119668  0.57603943 0.21199377], action is sell
action_loss: -953.2193603515625, Q_loss : 18.51783561706543
reward is -10.96, reward sum is 594.02, time is 11.0, close is 36.5566

2392/100000
action_prob: [0.21196677 0.576

action_loss: -967.25927734375, Q_loss : 41.14745330810547
reward is -15.93, reward sum is 563.15, time is 47.0, close is 36.2479

2428/100000
action_prob: [0.21196593 0.5760418  0.21199216], action is hold
action_loss: -967.6494140625, Q_loss : 4.113487243652344
reward is -4.99, reward sum is 558.16, time is 48.0, close is 36.198

2429/100000
action_prob: [0.21196592 0.57604194 0.21199213], action is hold
action_loss: -968.039306640625, Q_loss : 5.7075958251953125
reward is 1.00, reward sum is 559.16, time is 0.0, close is 36.208

2430/100000
action_prob: [0.2119659  0.57604206 0.21199207], action is buy
action_loss: -968.4292602539062, Q_loss : -8.654294967651367
reward is 2.99, reward sum is 562.15, time is 1.0, close is 36.2379

2431/100000
action_prob: [0.21196589 0.5760421  0.21199204], action is sell
action_loss: -968.8193359375, Q_loss : -34.618186950683594
reward is 41.08, reward sum is 603.23, time is 2.0, close is 36.6562

2432/100000
action_prob: [0.21196587 0.5760422  0.211

action_loss: -982.8592529296875, Q_loss : -24.908567428588867
reward is 0.99, reward sum is 737.98, time is 38.0, close is 37.9113

2468/100000
action_prob: [0.21196505 0.5760444  0.21199052], action is hold
action_loss: -983.249267578125, Q_loss : -26.507768630981445
reward is -14.94, reward sum is 723.04, time is 39.0, close is 37.7619

2469/100000
action_prob: [0.21196505 0.5760445  0.21199048], action is sell
action_loss: -983.6392211914062, Q_loss : -96.49635314941406
reward is 11.95, reward sum is 734.99, time is 40.0, close is 37.8814

2470/100000
action_prob: [0.21196502 0.5760445  0.21199043], action is sell
action_loss: -984.0292358398438, Q_loss : 12.109683990478516
reward is -1.99, reward sum is 733.00, time is 41.0, close is 37.8615

2471/100000
action_prob: [0.21196501 0.5760446  0.21199039], action is sell
action_loss: -984.4193115234375, Q_loss : 17.085792541503906
reward is -5.97, reward sum is 727.03, time is 42.0, close is 37.8018

2472/100000
action_prob: [0.211965 

action_loss: -998.459228515625, Q_loss : 7.135412216186523
reward is -5.98, reward sum is 794.23, time is 29.0, close is 38.25

2508/100000
action_prob: [0.21196423 0.5760468  0.21198896], action is sell
action_loss: -998.849365234375, Q_loss : 6.07151985168457
reward is 27.89, reward sum is 822.12, time is 30.0, close is 38.5289

2509/100000
action_prob: [0.2119642  0.5760469  0.21198893], action is sell
action_loss: -999.2392578125, Q_loss : -51.81237030029297
reward is -2.99, reward sum is 819.13, time is 31.0, close is 38.499

2510/100000
action_prob: [0.21196419 0.57604694 0.21198887], action is buy
action_loss: -999.629150390625, Q_loss : 113.74374389648438
reward is -2.99, reward sum is 816.14, time is 32.0, close is 38.4691

2511/100000
action_prob: [0.21196418 0.57604694 0.21198884], action is buy
action_loss: -1000.0192260742188, Q_loss : 42.216407775878906
reward is -11.95, reward sum is 804.19, time is 33.0, close is 38.3496

2512/100000
action_prob: [0.21196415 0.57604706 

action_loss: -1014.0592041015625, Q_loss : 52.914306640625
reward is -2.99, reward sum is 879.32, time is 20.0, close is 39.3357

2548/100000
action_prob: [0.21196343 0.5760491  0.21198744], action is sell
action_loss: -1014.4493408203125, Q_loss : 75.6788558959961
reward is -7.96, reward sum is 871.36, time is 21.0, close is 39.2561

2549/100000
action_prob: [0.21196342 0.57604915 0.21198739], action is hold
action_loss: -1014.8392333984375, Q_loss : 35.09976577758789
reward is -7.97, reward sum is 863.39, time is 22.0, close is 39.1764

2550/100000
action_prob: [0.2119634  0.5760492  0.21198738], action is sell
action_loss: -1015.2291870117188, Q_loss : 109.37772369384766
reward is 1.99, reward sum is 865.38, time is 23.0, close is 39.1963

2551/100000
action_prob: [0.21196339 0.5760493  0.21198733], action is sell
action_loss: -1015.619384765625, Q_loss : -85.95616912841797
reward is -3.99, reward sum is 861.39, time is 24.0, close is 39.1564

2552/100000
action_prob: [0.21196336 0.

action_loss: -1029.26953125, Q_loss : -42.102657318115234
reward is -9.96, reward sum is 796.11, time is 10.0, close is 38.758

2587/100000
action_prob: [0.2119627  0.5760513  0.21198602], action is hold
action_loss: -1029.65966796875, Q_loss : -8.236554145812988
reward is -1.00, reward sum is 795.11, time is 11.0, close is 38.748000000000005

2588/100000
action_prob: [0.21196267 0.5760513  0.21198598], action is sell
action_loss: -1030.0496826171875, Q_loss : 45.629478454589844
reward is 25.90, reward sum is 821.01, time is 12.0, close is 39.007

2589/100000
action_prob: [0.21196266 0.5760514  0.21198596], action is hold
action_loss: -1030.439697265625, Q_loss : -52.03131866455078
reward is -15.93, reward sum is 805.08, time is 13.0, close is 38.8477

2590/100000
action_prob: [0.21196264 0.5760514  0.21198592], action is buy
action_loss: -1030.82958984375, Q_loss : -25.238304138183594
reward is 2.98, reward sum is 808.06, time is 14.0, close is 38.8775

2591/100000
action_prob: [0.211

action_loss: -1044.8701171875, Q_loss : -42.801998138427734
reward is 3.98, reward sum is 879.78, time is 1.0, close is 39.5947

2627/100000
action_prob: [0.21196198 0.5760534  0.21198466], action is sell
action_loss: -1045.2601318359375, Q_loss : -44.35257339477539
reward is 22.13, reward sum is 901.92, time is 2.0, close is 39.8238

2628/100000
action_prob: [0.21196195 0.57605344 0.2119846 ], action is sell
action_loss: -1045.650146484375, Q_loss : -27.486465454101562
reward is 0.00, reward sum is 901.92, time is 3.0, close is 39.8139

2629/100000
action_prob: [0.21196194 0.5760535  0.21198456], action is sell
action_loss: -1046.0401611328125, Q_loss : 2.399643898010254
reward is 0.00, reward sum is 901.92, time is 4.0, close is 39.7143

2630/100000
action_prob: [0.21196194 0.57605356 0.21198454], action is hold
action_loss: -1046.43017578125, Q_loss : 62.81829071044922
reward is 0.00, reward sum is 901.92, time is 5.0, close is 39.7939

2631/100000
action_prob: [0.21196188 0.5760535

action_loss: -1060.4708251953125, Q_loss : -2.5347061157226562
reward is -4.98, reward sum is 807.48, time is 41.0, close is 38.6982

2667/100000
action_prob: [0.21196128 0.5760554  0.21198331], action is sell
action_loss: -1060.86083984375, Q_loss : 30.47140121459961
reward is 4.98, reward sum is 812.46, time is 42.0, close is 38.748000000000005

2668/100000
action_prob: [0.21196127 0.57605547 0.21198328], action is buy
action_loss: -1061.2510986328125, Q_loss : 46.54666519165039
reward is -9.96, reward sum is 802.50, time is 43.0, close is 38.6484

2669/100000
action_prob: [0.21196125 0.5760555  0.21198323], action is buy
action_loss: -1061.6409912109375, Q_loss : -57.54637908935547
reward is 0.00, reward sum is 802.50, time is 44.0, close is 38.6484

2670/100000
action_prob: [0.21196122 0.5760556  0.2119832 ], action is hold
action_loss: -1062.031005859375, Q_loss : -61.420265197753906
reward is 0.00, reward sum is 802.50, time is 45.0, close is 38.6484

2671/100000
action_prob: [0.

action_loss: -1076.0718994140625, Q_loss : 116.34922790527344
reward is 13.95, reward sum is 704.32, time is 32.0, close is 37.5926

2707/100000
action_prob: [0.21196061 0.5760573  0.21198203], action is sell
action_loss: -1076.4619140625, Q_loss : 33.19538116455078
reward is -5.98, reward sum is 698.34, time is 33.0, close is 37.5328

2708/100000
action_prob: [0.2119606  0.57605743 0.21198201], action is buy
action_loss: -1076.85205078125, Q_loss : -38.698509216308594
reward is 0.00, reward sum is 698.34, time is 34.0, close is 37.5328

2709/100000
action_prob: [0.21196058 0.57605743 0.21198197], action is hold
action_loss: -1077.241943359375, Q_loss : 90.87037658691406
reward is -0.99, reward sum is 697.35, time is 35.0, close is 37.5229

2710/100000
action_prob: [0.21196055 0.5760575  0.21198194], action is sell
action_loss: -1077.6319580078125, Q_loss : -161.6462860107422
reward is 7.96, reward sum is 705.31, time is 36.0, close is 37.6025

2711/100000
action_prob: [0.21196055 0.57

action_loss: -1091.6728515625, Q_loss : 14.053253173828125
reward is -7.97, reward sum is 730.72, time is 23.0, close is 38.0508

2747/100000
action_prob: [0.21195999 0.5760592  0.21198079], action is sell
action_loss: -1092.0628662109375, Q_loss : 24.929363250732422
reward is 21.91, reward sum is 752.63, time is 24.0, close is 38.2699

2748/100000
action_prob: [0.21195997 0.5760593  0.21198079], action is buy
action_loss: -1092.4530029296875, Q_loss : 0.2054729461669922
reward is -27.89, reward sum is 724.74, time is 25.0, close is 37.991

2749/100000
action_prob: [0.21195996 0.57605934 0.21198075], action is hold
action_loss: -1092.8428955078125, Q_loss : -24.93842315673828
reward is 14.94, reward sum is 739.68, time is 26.0, close is 38.1404

2750/100000
action_prob: [0.21195996 0.57605934 0.21198073], action is sell
action_loss: -1093.23291015625, Q_loss : -0.5932846069335938
reward is 15.94, reward sum is 755.62, time is 27.0, close is 38.2998

2751/100000
action_prob: [0.21195993

action_loss: -1107.273681640625, Q_loss : -56.74269104003906
reward is 0.00, reward sum is 869.43, time is 14.0, close is 39.2461

2787/100000
action_prob: [0.21195938 0.57606095 0.21197963], action is buy
action_loss: -1107.663818359375, Q_loss : 55.62333297729492
reward is -0.78, reward sum is 868.64, time is 15.0, close is 39.2859

2788/100000
action_prob: [0.21195936 0.57606107 0.21197958], action is sell
action_loss: -1108.053955078125, Q_loss : -52.639007568359375
reward is -7.97, reward sum is 860.67, time is 16.0, close is 39.2062

2789/100000
action_prob: [0.21195936 0.57606107 0.21197958], action is sell
action_loss: -1108.44384765625, Q_loss : 12.086223602294922
reward is -17.92, reward sum is 842.75, time is 17.0, close is 39.027

2790/100000
action_prob: [0.21195933 0.5760611  0.21197952], action is hold
action_loss: -1108.833984375, Q_loss : -74.37128448486328
reward is -5.98, reward sum is 836.77, time is 18.0, close is 38.9672

2791/100000
action_prob: [0.2119593  0.576

action_loss: -1122.874755859375, Q_loss : 50.691287994384766
reward is 0.00, reward sum is 885.79, time is 5.0, close is 39.4453

2827/100000
action_prob: [0.21195881 0.57606274 0.2119785 ], action is sell
action_loss: -1123.264892578125, Q_loss : -21.79660415649414
reward is 0.00, reward sum is 885.79, time is 6.0, close is 39.4453

2828/100000
action_prob: [0.2119588  0.5760628  0.21197845], action is sell
action_loss: -1123.6549072265625, Q_loss : 3.703428268432617
reward is 0.00, reward sum is 885.79, time is 7.0, close is 39.4154

2829/100000
action_prob: [0.21195878 0.5760628  0.21197844], action is sell
action_loss: -1124.044921875, Q_loss : 19.220977783203125
reward is 0.00, reward sum is 885.79, time is 8.0, close is 39.4652

2830/100000
action_prob: [0.21195877 0.57606286 0.2119784 ], action is sell
action_loss: -1124.434814453125, Q_loss : 32.575645446777344
reward is 0.00, reward sum is 885.79, time is 9.0, close is 39.6146

2831/100000
action_prob: [0.21195877 0.57606286 0

action_loss: -1138.475830078125, Q_loss : -26.354734420776367
reward is -12.95, reward sum is 877.03, time is 45.0, close is 39.784

2867/100000
action_prob: [0.21195824 0.57606435 0.21197739], action is sell
action_loss: -1138.8658447265625, Q_loss : -25.338624954223633
reward is 3.98, reward sum is 881.01, time is 46.0, close is 39.8238

2868/100000
action_prob: [0.21195824 0.5760644  0.21197738], action is hold
action_loss: -1139.2559814453125, Q_loss : -18.432518005371094
reward is 1.99, reward sum is 883.00, time is 47.0, close is 39.8437

2869/100000
action_prob: [0.21195824 0.57606447 0.21197735], action is sell
action_loss: -1139.6458740234375, Q_loss : -35.306488037109375
reward is 2.00, reward sum is 885.00, time is 48.0, close is 39.8637

2870/100000
action_prob: [0.21195818 0.57606447 0.2119773 ], action is buy
action_loss: -1140.035888671875, Q_loss : -26.845975875854492
reward is 73.71, reward sum is 958.71, time is 0.0, close is 40.6008

2871/100000
action_prob: [0.21195

action_loss: -1154.076904296875, Q_loss : -61.370758056640625
reward is 20.92, reward sum is 937.19, time is 36.0, close is 40.4514

2907/100000
action_prob: [0.21195772 0.5760659  0.21197633], action is sell
action_loss: -1154.4669189453125, Q_loss : -14.504647254943848
reward is -2.99, reward sum is 934.20, time is 37.0, close is 40.4215

2908/100000
action_prob: [0.21195772 0.576066   0.21197632], action is sell
action_loss: -1154.8570556640625, Q_loss : -54.478538513183594
reward is 1.99, reward sum is 936.19, time is 38.0, close is 40.4414

2909/100000
action_prob: [0.21195768 0.576066   0.21197629], action is hold
action_loss: -1155.2469482421875, Q_loss : -6.50242805480957
reward is 9.96, reward sum is 946.15, time is 39.0, close is 40.541

2910/100000
action_prob: [0.21195768 0.5760661  0.21197626], action is sell
action_loss: -1155.636962890625, Q_loss : -54.36631774902344
reward is 2.99, reward sum is 949.14, time is 40.0, close is 40.5709

2911/100000
action_prob: [0.2119576

action_loss: -1169.677978515625, Q_loss : 15.213221549987793
reward is 3.98, reward sum is 913.65, time is 27.0, close is 40.1824

2947/100000
action_prob: [0.2119572  0.57606745 0.21197534], action is sell
action_loss: -1170.0679931640625, Q_loss : -51.63066864013672
reward is 5.98, reward sum is 919.63, time is 28.0, close is 40.2422

2948/100000
action_prob: [0.2119572  0.57606757 0.2119753 ], action is sell
action_loss: -1170.4580078125, Q_loss : -83.674560546875
reward is -6.97, reward sum is 912.66, time is 29.0, close is 40.1725

2949/100000
action_prob: [0.21195719 0.57606757 0.21197528], action is sell
action_loss: -1170.84814453125, Q_loss : 43.3824462890625
reward is -23.91, reward sum is 888.75, time is 30.0, close is 39.9334

2950/100000
action_prob: [0.21195716 0.57606757 0.21197526], action is hold
action_loss: -1171.238037109375, Q_loss : -54.55234146118164
reward is 4.98, reward sum is 893.73, time is 31.0, close is 39.9832

2951/100000
action_prob: [0.21195714 0.57606

action_loss: -1185.2789306640625, Q_loss : 105.6572036743164
reward is -14.94, reward sum is 857.26, time is 18.0, close is 39.5549

2987/100000
action_prob: [0.21195672 0.57606894 0.21197437], action is hold
action_loss: -1185.6690673828125, Q_loss : -7.76109504699707
reward is 6.97, reward sum is 864.23, time is 19.0, close is 39.6246

2988/100000
action_prob: [0.21195672 0.57606894 0.21197434], action is hold
action_loss: -1186.05908203125, Q_loss : -47.57793426513672
reward is -22.91, reward sum is 841.32, time is 20.0, close is 39.3955

2989/100000
action_prob: [0.2119567  0.57606894 0.2119743 ], action is sell
action_loss: -1186.448974609375, Q_loss : -13.80447006225586
reward is 14.94, reward sum is 856.26, time is 21.0, close is 39.5449

2990/100000
action_prob: [0.21195668 0.57606906 0.21197428], action is sell
action_loss: -1186.8389892578125, Q_loss : -20.868364334106445
reward is 9.96, reward sum is 866.22, time is 22.0, close is 39.6445

2991/100000
action_prob: [0.2119566

action_loss: -1200.8800048828125, Q_loss : -32.53966522216797
reward is 1.00, reward sum is 891.51, time is 9.0, close is 39.8537

3027/100000
action_prob: [0.21195625 0.5760703  0.21197343], action is hold
action_loss: -1201.27001953125, Q_loss : -120.58484649658203
reward is -2.99, reward sum is 888.52, time is 10.0, close is 39.8238

3028/100000
action_prob: [0.21195623 0.57607037 0.2119734 ], action is sell
action_loss: -1201.66015625, Q_loss : 8.305136680603027
reward is 1.99, reward sum is 890.51, time is 11.0, close is 39.8437

3029/100000
action_prob: [0.21195622 0.57607037 0.21197337], action is buy
action_loss: -1202.050048828125, Q_loss : -28.255098342895508
reward is -0.99, reward sum is 889.52, time is 12.0, close is 39.8338

3030/100000
action_prob: [0.21195622 0.57607037 0.21197335], action is sell
action_loss: -1202.4400634765625, Q_loss : 14.995613098144531
reward is -1.00, reward sum is 888.52, time is 13.0, close is 39.8238

3031/100000
action_prob: [0.21195622 0.576

action_loss: -1216.48095703125, Q_loss : 17.70523452758789
reward is 44.83, reward sum is 1047.90, time is 0.0, close is 41.4176

3067/100000
action_prob: [0.2119558  0.5760717  0.21197252], action is sell
action_loss: -1216.87109375, Q_loss : 58.56127166748047
reward is 31.08, reward sum is 1078.98, time is 1.0, close is 41.7363

3068/100000
action_prob: [0.21195579 0.5760717  0.21197248], action is buy
action_loss: -1217.26123046875, Q_loss : 15.79737663269043
reward is -0.83, reward sum is 1078.15, time is 2.0, close is 41.7861

3069/100000
action_prob: [0.21195579 0.5760718  0.21197248], action is sell
action_loss: -1217.651123046875, Q_loss : 91.40348815917969
reward is 40.84, reward sum is 1118.99, time is 3.0, close is 42.1945

3070/100000
action_prob: [0.21195577 0.5760718  0.21197245], action is hold
action_loss: -1218.041015625, Q_loss : -23.77690887451172
reward is 2.00, reward sum is 1120.99, time is 4.0, close is 42.2145

3071/100000
action_prob: [0.21195577 0.5760718  0.2

action_loss: -1232.08203125, Q_loss : -22.300785064697266
reward is -6.97, reward sum is 1109.03, time is 40.0, close is 42.0949

3107/100000
action_prob: [0.21195537 0.576073   0.21197164], action is hold
action_loss: -1232.47216796875, Q_loss : -2.404672622680664
reward is 1.99, reward sum is 1111.02, time is 41.0, close is 42.1148

3108/100000
action_prob: [0.21195537 0.57607305 0.21197164], action is sell
action_loss: -1232.8623046875, Q_loss : -14.736285209655762
reward is 2.00, reward sum is 1113.02, time is 42.0, close is 42.1348

3109/100000
action_prob: [0.21195534 0.57607305 0.2119716 ], action is buy
action_loss: -1233.252197265625, Q_loss : 31.2370662689209
reward is 8.96, reward sum is 1121.98, time is 43.0, close is 42.2244

3110/100000
action_prob: [0.21195534 0.5760731  0.21197158], action is sell
action_loss: -1233.64208984375, Q_loss : -63.893341064453125
reward is 0.00, reward sum is 1121.98, time is 44.0, close is 42.2244

3111/100000
action_prob: [0.21195532 0.5760

action_loss: -1247.6832275390625, Q_loss : 69.38318634033203
reward is 20.92, reward sum is 1281.65, time is 31.0, close is 43.3998

3147/100000
action_prob: [0.21195497 0.57607424 0.2119708 ], action is hold
action_loss: -1248.0733642578125, Q_loss : 46.90290069580078
reward is -2.99, reward sum is 1278.66, time is 32.0, close is 43.3699

3148/100000
action_prob: [0.21195494 0.57607424 0.21197076], action is hold
action_loss: -1248.4632568359375, Q_loss : 12.43541145324707
reward is -7.97, reward sum is 1270.69, time is 33.0, close is 43.2902

3149/100000
action_prob: [0.21195494 0.57607424 0.21197075], action is sell
action_loss: -1248.853271484375, Q_loss : -44.46855545043945
reward is -8.96, reward sum is 1261.73, time is 34.0, close is 43.2006

3150/100000
action_prob: [0.2119549  0.57607436 0.21197075], action is sell
action_loss: -1249.2432861328125, Q_loss : -23.542449951171875
reward is 2.99, reward sum is 1264.72, time is 35.0, close is 43.2305

3151/100000
action_prob: [0.21

action_prob: [0.21195456 0.57607543 0.21197   ], action is sell
action_loss: -1263.2843017578125, Q_loss : 23.29717445373535
reward is 0.00, reward sum is 1189.30, time is 22.0, close is 42.4436

3187/100000
action_prob: [0.21195455 0.57607543 0.21197   ], action is buy
action_loss: -1263.6744384765625, Q_loss : -30.586721420288086
reward is -2.99, reward sum is 1186.31, time is 23.0, close is 42.4137

3188/100000
action_prob: [0.21195455 0.5760755  0.21196997], action is hold
action_loss: -1264.064453125, Q_loss : -32.65060806274414
reward is 2.99, reward sum is 1189.30, time is 24.0, close is 42.4436

3189/100000
action_prob: [0.21195453 0.57607555 0.21196994], action is sell
action_loss: -1264.4544677734375, Q_loss : 9.594127655029297
reward is 7.96, reward sum is 1197.26, time is 25.0, close is 42.5232

3190/100000
action_prob: [0.21195453 0.57607555 0.21196993], action is hold
action_loss: -1264.844482421875, Q_loss : 10.351531982421875
reward is 0.00, reward sum is 1197.26, time 

action_loss: -1278.885498046875, Q_loss : -38.72884750366211
reward is 22.91, reward sum is 1172.66, time is 13.0, close is 42.2145

3227/100000
action_prob: [0.21195418 0.5760766  0.2119692 ], action is sell
action_loss: -1279.275634765625, Q_loss : 86.05725860595703
reward is -11.96, reward sum is 1160.70, time is 14.0, close is 42.0949

3228/100000
action_prob: [0.21195416 0.5760766  0.21196918], action is sell
action_loss: -1279.6656494140625, Q_loss : -26.736629486083984
reward is -25.90, reward sum is 1134.80, time is 15.0, close is 41.8359

3229/100000
action_prob: [0.21195415 0.57607675 0.21196915], action is sell
action_loss: -1280.0556640625, Q_loss : -54.35575485229492
reward is 19.93, reward sum is 1154.73, time is 16.0, close is 42.0352

3230/100000
action_prob: [0.21195415 0.57607675 0.21196914], action is sell
action_loss: -1280.4456787109375, Q_loss : -0.804410457611084
reward is 9.96, reward sum is 1164.69, time is 17.0, close is 42.1348

3231/100000
action_prob: [0.21

action_loss: -1294.4866943359375, Q_loss : 71.36520385742188
reward is 0.00, reward sum is 1205.82, time is 4.0, close is 42.7324

3267/100000
action_prob: [0.21195379 0.57607776 0.21196845], action is sell
action_loss: -1294.8768310546875, Q_loss : 16.31564712524414
reward is -4.98, reward sum is 1200.84, time is 5.0, close is 42.6826

3268/100000
action_prob: [0.21195379 0.5760778  0.21196844], action is sell
action_loss: -1295.266845703125, Q_loss : 29.501270294189453
reward is -9.96, reward sum is 1190.88, time is 6.0, close is 42.583

3269/100000
action_prob: [0.21195377 0.5760778  0.21196839], action is hold
action_loss: -1295.65673828125, Q_loss : 14.174348831176758
reward is 4.98, reward sum is 1195.86, time is 7.0, close is 42.6328

3270/100000
action_prob: [0.21195377 0.5760778  0.21196839], action is sell
action_loss: -1296.0467529296875, Q_loss : -36.89051055908203
reward is -9.96, reward sum is 1185.90, time is 8.0, close is 42.5332

3271/100000
action_prob: [0.21195377 0.

action_loss: -1309.69775390625, Q_loss : 19.70307731628418
reward is 0.99, reward sum is 1142.07, time is 43.0, close is 42.0949

3306/100000
action_prob: [0.21195343 0.5760788  0.21196774], action is sell
action_loss: -1310.0877685546875, Q_loss : -78.82089233398438
reward is 13.95, reward sum is 1156.02, time is 44.0, close is 42.2344

3307/100000
action_prob: [0.21195343 0.57607883 0.2119677 ], action is sell
action_loss: -1310.4779052734375, Q_loss : 0.8852949142456055
reward is -1.00, reward sum is 1155.02, time is 45.0, close is 42.2244

3308/100000
action_prob: [0.21195343 0.57607883 0.21196768], action is sell
action_loss: -1310.867919921875, Q_loss : -55.0186767578125
reward is -14.94, reward sum is 1140.08, time is 46.0, close is 42.075

3309/100000
action_prob: [0.21195343 0.5760789  0.21196768], action is buy
action_loss: -1311.2578125, Q_loss : 12.987930297851562
reward is 3.98, reward sum is 1144.06, time is 47.0, close is 42.1148

3310/100000
action_prob: [0.21195343 0.5

action_loss: -1325.298583984375, Q_loss : -33.26301956176758
reward is -11.95, reward sum is 1130.43, time is 34.0, close is 41.9057

3346/100000
action_prob: [0.21195312 0.5760799  0.21196702], action is sell
action_loss: -1325.688720703125, Q_loss : -13.826835632324219
reward is -6.98, reward sum is 1123.45, time is 35.0, close is 41.8359

3347/100000
action_prob: [0.2119531 0.5760799 0.211967 ], action is sell
action_loss: -1326.0789794921875, Q_loss : -5.851572036743164
reward is 1.00, reward sum is 1124.45, time is 36.0, close is 41.8459

3348/100000
action_prob: [0.21195309 0.5760799  0.21196699], action is hold
action_loss: -1326.46875, Q_loss : 3.705385208129883
reward is 13.95, reward sum is 1138.40, time is 37.0, close is 41.9854

3349/100000
action_prob: [0.21195309 0.57607996 0.21196699], action is hold
action_loss: -1326.85888671875, Q_loss : 46.71235275268555
reward is 9.96, reward sum is 1148.36, time is 38.0, close is 42.085

3350/100000
action_prob: [0.21195304 0.57607

action_loss: -1340.8997802734375, Q_loss : -34.12751770019531
reward is 10.96, reward sum is 1093.90, time is 25.0, close is 41.6168

3386/100000
action_prob: [0.21195278 0.57608086 0.21196635], action is buy
action_loss: -1341.289794921875, Q_loss : -11.512931823730469
reward is 0.00, reward sum is 1093.90, time is 26.0, close is 41.6168

3387/100000
action_prob: [0.21195278 0.57608086 0.21196632], action is hold
action_loss: -1341.679931640625, Q_loss : -55.29674530029297
reward is 5.98, reward sum is 1099.88, time is 27.0, close is 41.6766

3388/100000
action_prob: [0.21195276 0.576081   0.2119663 ], action is sell
action_loss: -1342.070068359375, Q_loss : 64.04551696777344
reward is 3.98, reward sum is 1103.86, time is 28.0, close is 41.7164

3389/100000
action_prob: [0.21195275 0.576081   0.21196629], action is hold
action_loss: -1342.4598388671875, Q_loss : -4.394530296325684
reward is 5.98, reward sum is 1109.84, time is 29.0, close is 41.7762

3390/100000
action_prob: [0.211952

action_loss: -1356.5008544921875, Q_loss : -2.625063896179199
reward is -7.97, reward sum is 1023.51, time is 16.0, close is 41.0988

3426/100000
action_prob: [0.21195248 0.5760819  0.2119657 ], action is sell
action_loss: -1356.890869140625, Q_loss : -63.498878479003906
reward is -10.95, reward sum is 1012.56, time is 17.0, close is 40.9893

3427/100000
action_prob: [0.21195243 0.57608193 0.21196567], action is sell
action_loss: -1357.281005859375, Q_loss : 27.257156372070312
reward is 2.98, reward sum is 1015.54, time is 18.0, close is 41.0191

3428/100000
action_prob: [0.21195245 0.57608193 0.21196565], action is sell
action_loss: -1357.6710205078125, Q_loss : -38.11486053466797
reward is -18.92, reward sum is 996.62, time is 19.0, close is 40.8299

3429/100000
action_prob: [0.21195245 0.57608193 0.21196565], action is hold
action_loss: -1358.0609130859375, Q_loss : -46.390625
reward is -7.97, reward sum is 988.65, time is 20.0, close is 40.7502

3430/100000
action_prob: [0.21195243

action_loss: -1372.1019287109375, Q_loss : -138.11314392089844
reward is -0.82, reward sum is 1059.72, time is 7.0, close is 41.3578

3466/100000
action_prob: [0.21195215 0.5760828  0.21196505], action is hold
action_loss: -1372.491943359375, Q_loss : -43.634971618652344
reward is 1.99, reward sum is 1061.71, time is 8.0, close is 41.3777

3467/100000
action_prob: [0.21195212 0.5760828  0.21196501], action is sell
action_loss: -1372.882080078125, Q_loss : -62.51879119873047
reward is 10.96, reward sum is 1072.67, time is 9.0, close is 41.4873

3468/100000
action_prob: [0.21195213 0.5760828  0.21196501], action is buy
action_loss: -1373.2720947265625, Q_loss : 13.147242546081543
reward is 0.00, reward sum is 1072.67, time is 10.0, close is 41.4873

3469/100000
action_prob: [0.21195213 0.5760829  0.21196501], action is sell
action_loss: -1373.6619873046875, Q_loss : -48.38512420654297
reward is 0.00, reward sum is 1072.67, time is 11.0, close is 41.4873

3470/100000
action_prob: [0.21195

action_loss: -1387.703125, Q_loss : -23.746952056884766
reward is -14.94, reward sum is 892.38, time is 47.0, close is 39.6844

3506/100000
action_prob: [0.21195184 0.5760837  0.21196443], action is buy
action_loss: -1388.0931396484375, Q_loss : -55.63092041015625
reward is -1.00, reward sum is 891.38, time is 48.0, close is 39.6744

3507/100000
action_prob: [0.21195184 0.5760837  0.2119644 ], action is sell
action_loss: -1388.4832763671875, Q_loss : -55.784889221191406
reward is -15.94, reward sum is 875.44, time is 0.0, close is 39.515

3508/100000
action_prob: [0.21195185 0.5760838  0.2119644 ], action is sell
action_loss: -1388.8731689453125, Q_loss : 16.82144546508789
reward is 30.05, reward sum is 905.49, time is 1.0, close is 39.8238

3509/100000
action_prob: [0.21195182 0.5760838  0.21196438], action is sell
action_loss: -1389.26318359375, Q_loss : -46.732666015625
reward is 0.00, reward sum is 905.49, time is 2.0, close is 39.6545

3510/100000
action_prob: [0.21195182 0.576083

action_prob: [0.21195157 0.5760846  0.21196383], action is sell
action_loss: -1403.30419921875, Q_loss : -4.811795234680176
reward is 0.99, reward sum is 906.69, time is 38.0, close is 39.8437

3546/100000
action_prob: [0.21195158 0.5760846  0.21196383], action is sell
action_loss: -1403.6943359375, Q_loss : -44.54506301879883
reward is -1.99, reward sum is 904.70, time is 39.0, close is 39.8238

3547/100000
action_prob: [0.21195157 0.5760846  0.21196383], action is sell
action_loss: -1404.0843505859375, Q_loss : 50.71916580200195
reward is 3.99, reward sum is 908.69, time is 40.0, close is 39.8637

3548/100000
action_prob: [0.21195155 0.5760847  0.21196379], action is hold
action_loss: -1404.474365234375, Q_loss : -16.984798431396484
reward is -2.00, reward sum is 906.69, time is 41.0, close is 39.8437

3549/100000
action_prob: [0.21195157 0.57608473 0.21196379], action is sell
action_loss: -1404.8643798828125, Q_loss : -41.02861404418945
reward is 0.00, reward sum is 906.69, time is 

action_loss: -1418.9052734375, Q_loss : 49.72100067138672
reward is -12.95, reward sum is 899.12, time is 29.0, close is 39.6943

3586/100000
action_prob: [0.2119513  0.5760855  0.21196325], action is sell
action_loss: -1419.29541015625, Q_loss : -36.192962646484375
reward is 0.00, reward sum is 899.12, time is 30.0, close is 39.6943

3587/100000
action_prob: [0.21195129 0.5760855  0.21196324], action is buy
action_loss: -1419.685546875, Q_loss : 3.7532215118408203
reward is 4.98, reward sum is 904.10, time is 31.0, close is 39.7441

3588/100000
action_prob: [0.21195127 0.5760855  0.21196322], action is sell
action_loss: -1420.075439453125, Q_loss : 52.509254455566406
reward is 8.97, reward sum is 913.07, time is 32.0, close is 39.8338

3589/100000
action_prob: [0.21195126 0.5760855  0.21196319], action is buy
action_loss: -1420.465576171875, Q_loss : 35.78529357910156
reward is 0.99, reward sum is 914.06, time is 33.0, close is 39.8437

3590/100000
action_prob: [0.21195126 0.57608557 

action_loss: -1434.50634765625, Q_loss : -6.275096893310547
reward is -1.99, reward sum is 816.85, time is 20.0, close is 38.4293

3626/100000
action_prob: [0.21195102 0.5760863  0.21196267], action is hold
action_loss: -1434.896484375, Q_loss : 39.42108917236328
reward is -1.99, reward sum is 814.86, time is 21.0, close is 38.4094

3627/100000
action_prob: [0.211951   0.5760863  0.21196266], action is hold
action_loss: -1435.2867431640625, Q_loss : 65.1871337890625
reward is 3.98, reward sum is 818.84, time is 22.0, close is 38.4492

3628/100000
action_prob: [0.211951   0.57608634 0.21196266], action is buy
action_loss: -1435.676513671875, Q_loss : -78.04669189453125
reward is -10.96, reward sum is 807.88, time is 23.0, close is 38.3396

3629/100000
action_prob: [0.211951   0.57608634 0.21196264], action is sell
action_loss: -1436.0667724609375, Q_loss : -2.4206552505493164
reward is -3.98, reward sum is 803.90, time is 24.0, close is 38.2998

3630/100000
action_prob: [0.21195099 0.57

action_loss: -1450.107666015625, Q_loss : 6.428962230682373
reward is -0.76, reward sum is 766.49, time is 11.0, close is 38.4492

3666/100000
action_prob: [0.21195076 0.57608706 0.21196213], action is hold
action_loss: -1450.497802734375, Q_loss : 15.324995040893555
reward is -13.94, reward sum is 752.55, time is 12.0, close is 38.3098

3667/100000
action_prob: [0.21195076 0.5760871  0.21196212], action is buy
action_loss: -1450.8876953125, Q_loss : -25.14019012451172
reward is -9.96, reward sum is 742.59, time is 13.0, close is 38.2102

3668/100000
action_prob: [0.21195075 0.5760871  0.21196212], action is sell
action_loss: -1451.2779541015625, Q_loss : 21.477214813232422
reward is -13.95, reward sum is 728.64, time is 14.0, close is 38.0707

3669/100000
action_prob: [0.21195073 0.5760871  0.2119621 ], action is sell
action_loss: -1451.66796875, Q_loss : 1.4633984565734863
reward is 12.95, reward sum is 741.59, time is 15.0, close is 38.2002

3670/100000
action_prob: [0.21195073 0.57

action_loss: -1465.708984375, Q_loss : 30.404874801635742
reward is 0.00, reward sum is 676.07, time is 2.0, close is 37.9213

3706/100000
action_prob: [0.21195051 0.5760879  0.21196161], action is sell
action_loss: -1466.09912109375, Q_loss : -29.5909481048584
reward is 0.00, reward sum is 676.07, time is 3.0, close is 37.8516

3707/100000
action_prob: [0.2119505  0.5760879  0.21196158], action is buy
action_loss: -1466.4892578125, Q_loss : 0.2750835418701172
reward is -0.75, reward sum is 675.32, time is 4.0, close is 37.8516

3708/100000
action_prob: [0.21195051 0.5760879  0.21196158], action is sell
action_loss: -1466.879150390625, Q_loss : 23.28127098083496
reward is -3.99, reward sum is 671.33, time is 5.0, close is 37.8117

3709/100000
action_prob: [0.2119505  0.5760879  0.21196158], action is hold
action_loss: -1467.2691650390625, Q_loss : -16.612699508666992
reward is 6.97, reward sum is 678.30, time is 6.0, close is 37.8814

3710/100000
action_prob: [0.21195048 0.57608795 0.2

action_loss: -1481.31005859375, Q_loss : 1.1069202423095703
reward is -11.96, reward sum is 810.78, time is 42.0, close is 39.2062

3746/100000
action_prob: [0.21195026 0.5760886  0.21196109], action is hold
action_loss: -1481.7003173828125, Q_loss : 27.032949447631836
reward is -0.99, reward sum is 809.79, time is 43.0, close is 39.1963

3747/100000
action_prob: [0.21195026 0.57608867 0.21196109], action is sell
action_loss: -1482.09033203125, Q_loss : -32.39720916748047
reward is 3.98, reward sum is 813.77, time is 44.0, close is 39.2361

3748/100000
action_prob: [0.21195026 0.57608867 0.21196108], action is sell
action_loss: -1482.4803466796875, Q_loss : -36.86482620239258
reward is -17.93, reward sum is 795.84, time is 45.0, close is 39.0568

3749/100000
action_prob: [0.21195024 0.57608867 0.21196106], action is hold
action_loss: -1482.870361328125, Q_loss : 21.971357345581055
reward is 6.98, reward sum is 802.82, time is 46.0, close is 39.1266

3750/100000
action_prob: [0.21195024

action_loss: -1496.9111328125, Q_loss : -3.9990224838256836
reward is -21.91, reward sum is 812.23, time is 33.0, close is 39.3457

3786/100000
action_prob: [0.21195003 0.5760894  0.2119606 ], action is sell
action_loss: -1497.30126953125, Q_loss : -50.902992248535156
reward is 0.00, reward sum is 812.23, time is 34.0, close is 39.3457

3787/100000
action_prob: [0.21195003 0.5760894  0.21196058], action is buy
action_loss: -1497.6915283203125, Q_loss : -20.42006492614746
reward is -4.98, reward sum is 807.25, time is 35.0, close is 39.2959

3788/100000
action_prob: [0.21195002 0.5760894  0.21196058], action is sell
action_loss: -1498.081298828125, Q_loss : 79.97379302978516
reward is 9.96, reward sum is 817.21, time is 36.0, close is 39.3955

3789/100000
action_prob: [0.21195002 0.5760894  0.21196057], action is sell
action_loss: -1498.471435546875, Q_loss : 18.321102142333984
reward is -10.96, reward sum is 806.25, time is 37.0, close is 39.2859

3790/100000
action_prob: [0.21195002 0

action_loss: -1512.51220703125, Q_loss : -29.665884017944336
reward is 0.00, reward sum is 837.54, time is 24.0, close is 40.1924

3826/100000
action_prob: [0.21194983 0.5760901  0.21196012], action is sell
action_loss: -1512.90234375, Q_loss : 4.871063232421875
reward is 26.89, reward sum is 864.43, time is 25.0, close is 40.4613

3827/100000
action_prob: [0.2119498  0.5760901  0.21196012], action is sell
action_loss: -1513.29248046875, Q_loss : -31.390499114990234
reward is 8.97, reward sum is 873.40, time is 26.0, close is 40.551

3828/100000
action_prob: [0.2119498 0.5760901 0.2119601], action is hold
action_loss: -1513.682373046875, Q_loss : 25.65312957763672
reward is 14.94, reward sum is 888.34, time is 27.0, close is 40.7004

3829/100000
action_prob: [0.21194977 0.5760901  0.21196006], action is buy
action_loss: -1514.072509765625, Q_loss : 2.9334325790405273
reward is -6.97, reward sum is 881.37, time is 28.0, close is 40.6307

3830/100000
action_prob: [0.21194978 0.57609016 0

action_loss: -1528.1134033203125, Q_loss : 57.65489196777344
reward is 1.99, reward sum is 932.54, time is 15.0, close is 41.049

3866/100000
action_prob: [0.21194959 0.57609075 0.21195963], action is sell
action_loss: -1528.5035400390625, Q_loss : 71.620849609375
reward is -3.98, reward sum is 928.56, time is 16.0, close is 41.0092

3867/100000
action_prob: [0.2119496  0.5760908  0.21195965], action is sell
action_loss: -1528.893798828125, Q_loss : 70.36115264892578
reward is 3.98, reward sum is 932.54, time is 17.0, close is 41.049

3868/100000
action_prob: [0.2119496  0.5760908  0.21195965], action is buy
action_loss: -1529.2835693359375, Q_loss : -90.91777038574219
reward is 11.96, reward sum is 944.50, time is 18.0, close is 41.1686

3869/100000
action_prob: [0.2119496  0.5760908  0.21195963], action is buy
action_loss: -1529.673828125, Q_loss : 55.60321807861328
reward is 0.00, reward sum is 944.50, time is 19.0, close is 41.1686

3870/100000
action_prob: [0.21194956 0.5760909  0

action_loss: -1543.714599609375, Q_loss : -37.33716583251953
reward is 0.00, reward sum is 861.00, time is 6.0, close is 40.1426

3906/100000
action_prob: [0.2119494 0.5760914 0.2119592], action is hold
action_loss: -1544.104736328125, Q_loss : -56.41097640991211
reward is 0.00, reward sum is 861.00, time is 7.0, close is 40.023

3907/100000
action_prob: [0.21194938 0.57609147 0.21195918], action is sell
action_loss: -1544.4947509765625, Q_loss : -28.454944610595703
reward is 0.00, reward sum is 861.00, time is 8.0, close is 39.7441

3908/100000
action_prob: [0.21194938 0.57609147 0.21195917], action is sell
action_loss: -1544.884765625, Q_loss : 32.77643585205078
reward is 0.00, reward sum is 861.00, time is 9.0, close is 39.8338

3909/100000
action_prob: [0.21194936 0.57609147 0.21195917], action is sell
action_loss: -1545.2747802734375, Q_loss : -106.0727310180664
reward is 0.00, reward sum is 861.00, time is 10.0, close is 39.7342

3910/100000
action_prob: [0.21194936 0.57609147 0.

action_prob: [0.21194918 0.57609206 0.21195877], action is hold
action_loss: -1559.3157958984375, Q_loss : -9.084709167480469
reward is 7.97, reward sum is 923.94, time is 46.0, close is 40.5211

3946/100000
action_prob: [0.21194918 0.57609206 0.21195877], action is sell
action_loss: -1559.7059326171875, Q_loss : -25.527074813842773
reward is -5.98, reward sum is 917.96, time is 47.0, close is 40.4613

3947/100000
action_prob: [0.21194917 0.57609206 0.21195875], action is sell
action_loss: -1560.095947265625, Q_loss : 30.429109573364258
reward is 8.97, reward sum is 926.93, time is 48.0, close is 40.551

3948/100000
action_prob: [0.21194917 0.57609206 0.21195875], action is sell
action_loss: -1560.4859619140625, Q_loss : -5.263857841491699
reward is 30.88, reward sum is 957.81, time is 0.0, close is 40.8598

3949/100000
action_prob: [0.21194915 0.57609206 0.21195874], action is sell
action_loss: -1560.8759765625, Q_loss : 44.16132354736328
reward is 18.12, reward sum is 975.93, time is

action_loss: -1574.9169921875, Q_loss : 19.170793533325195
reward is 0.00, reward sum is 933.28, time is 37.0, close is 40.9992

3986/100000
action_prob: [0.21194898 0.5760927  0.21195835], action is sell
action_loss: -1575.3070068359375, Q_loss : 4.1369781494140625
reward is 0.00, reward sum is 933.28, time is 38.0, close is 40.9992

3987/100000
action_prob: [0.21194898 0.5760927  0.21195833], action is sell
action_loss: -1575.6971435546875, Q_loss : -7.896990776062012
reward is -22.91, reward sum is 910.37, time is 39.0, close is 40.7701

3988/100000
action_prob: [0.21194898 0.5760927  0.21195832], action is buy
action_loss: -1576.087158203125, Q_loss : -17.615646362304688
reward is 0.00, reward sum is 910.37, time is 40.0, close is 40.7701

3989/100000
action_prob: [0.21194895 0.5760927  0.21195829], action is buy
action_loss: -1576.4771728515625, Q_loss : -52.7109375
reward is 6.97, reward sum is 917.34, time is 41.0, close is 40.8398

3990/100000
action_prob: [0.21194896 0.5760928

action_loss: -1590.5181884765625, Q_loss : -29.855148315429688
reward is -13.94, reward sum is 853.94, time is 28.0, close is 40.551

4026/100000
action_prob: [0.21194878 0.5760933  0.21195792], action is buy
action_loss: -1590.908203125, Q_loss : -90.58911895751953
reward is -7.97, reward sum is 845.97, time is 29.0, close is 40.4713

4027/100000
action_prob: [0.21194878 0.5760933  0.21195792], action is hold
action_loss: -1591.29833984375, Q_loss : -53.47661209106445
reward is 5.98, reward sum is 851.95, time is 30.0, close is 40.5311

4028/100000
action_prob: [0.21194878 0.5760933  0.2119579 ], action is sell
action_loss: -1591.6881103515625, Q_loss : -33.87689971923828
reward is 0.99, reward sum is 852.94, time is 31.0, close is 40.541

4029/100000
action_prob: [0.21194877 0.5760933  0.21195789], action is sell
action_loss: -1592.078369140625, Q_loss : 9.979286193847656
reward is -2.99, reward sum is 849.95, time is 32.0, close is 40.5111

4030/100000
action_prob: [0.21194877 0.576

action_loss: -1606.1192626953125, Q_loss : -118.1031265258789
reward is -4.98, reward sum is 963.86, time is 19.0, close is 41.8957

4066/100000
action_prob: [0.21194859 0.5760939  0.21195751], action is buy
action_loss: -1606.5093994140625, Q_loss : 76.17933654785156
reward is -1.00, reward sum is 962.86, time is 20.0, close is 41.8857

4067/100000
action_prob: [0.21194859 0.5760939  0.2119575 ], action is buy
action_loss: -1606.8995361328125, Q_loss : 31.760969161987305
reward is 0.00, reward sum is 962.86, time is 21.0, close is 41.8857

4068/100000
action_prob: [0.21194859 0.5760939  0.21195748], action is sell
action_loss: -1607.2894287109375, Q_loss : 19.817155838012695
reward is 5.98, reward sum is 968.84, time is 22.0, close is 41.9455

4069/100000
action_prob: [0.2119486  0.5760939  0.21195748], action is sell
action_loss: -1607.6795654296875, Q_loss : 35.95293045043945
reward is 19.92, reward sum is 988.76, time is 23.0, close is 42.1447

4070/100000
action_prob: [0.21194859 

action_loss: -1621.720458984375, Q_loss : 69.46281433105469
reward is 0.99, reward sum is 817.78, time is 10.0, close is 40.541

4106/100000
action_prob: [0.21194844 0.57609445 0.21195714], action is hold
action_loss: -1622.110595703125, Q_loss : 56.815162658691406
reward is 23.91, reward sum is 841.69, time is 11.0, close is 40.7801

4107/100000
action_prob: [0.21194841 0.5760945  0.21195711], action is sell
action_loss: -1622.5006103515625, Q_loss : -41.20497512817383
reward is -13.95, reward sum is 827.74, time is 12.0, close is 40.6406

4108/100000
action_prob: [0.21194841 0.5760945  0.21195711], action is sell
action_loss: -1622.8905029296875, Q_loss : -32.74412536621094
reward is -0.99, reward sum is 826.75, time is 13.0, close is 40.6307

4109/100000
action_prob: [0.21194841 0.5760945  0.2119571 ], action is sell
action_loss: -1623.28076171875, Q_loss : -18.620243072509766
reward is -1.00, reward sum is 825.75, time is 14.0, close is 40.6207

4110/100000
action_prob: [0.21194841

action_loss: -1637.3216552734375, Q_loss : -44.685752868652344
reward is 1.18, reward sum is 878.73, time is 1.0, close is 41.1586

4146/100000
action_prob: [0.21194823 0.576095   0.21195674], action is sell
action_loss: -1637.7117919921875, Q_loss : -18.992103576660156
reward is 0.00, reward sum is 878.73, time is 2.0, close is 41.1088

4147/100000
action_prob: [0.21194823 0.576095   0.21195674], action is sell
action_loss: -1638.101806640625, Q_loss : 34.378929138183594
reward is 0.00, reward sum is 878.73, time is 3.0, close is 41.1088

4148/100000
action_prob: [0.21194823 0.576095   0.21195672], action is sell
action_loss: -1638.49169921875, Q_loss : -84.91394805908203
reward is 0.00, reward sum is 878.73, time is 4.0, close is 40.9395

4149/100000
action_prob: [0.21194825 0.57609504 0.21195672], action is sell
action_loss: -1638.8819580078125, Q_loss : -24.358856201171875
reward is 0.00, reward sum is 878.73, time is 5.0, close is 40.79

4150/100000
action_prob: [0.21194823 0.5760

action_prob: [0.21194807 0.5760955  0.2119564 ], action is sell
action_loss: -1652.9229736328125, Q_loss : -61.38923263549805
reward is 8.97, reward sum is 947.64, time is 41.0, close is 41.4973

4186/100000
action_prob: [0.21194807 0.5760956  0.21195638], action is sell
action_loss: -1653.31298828125, Q_loss : -37.988121032714844
reward is 23.90, reward sum is 971.54, time is 42.0, close is 41.7363

4187/100000
action_prob: [0.21194807 0.5760956  0.21195637], action is sell
action_loss: -1653.703125, Q_loss : 4.342981338500977
reward is -29.88, reward sum is 941.66, time is 43.0, close is 41.4375

4188/100000
action_prob: [0.21194807 0.5760956  0.21195637], action is hold
action_loss: -1654.0931396484375, Q_loss : -21.305908203125
reward is 22.91, reward sum is 964.57, time is 44.0, close is 41.6666

4189/100000
action_prob: [0.21194807 0.5760956  0.21195637], action is sell
action_loss: -1654.483154296875, Q_loss : -30.464797973632812
reward is -7.97, reward sum is 956.60, time is 45

action_loss: -1668.524169921875, Q_loss : -6.558588981628418
reward is -9.97, reward sum is 858.34, time is 32.0, close is 40.4314

4226/100000
action_prob: [0.21194793 0.5760961  0.21195604], action is sell
action_loss: -1668.914306640625, Q_loss : -98.33914184570312
reward is -8.96, reward sum is 849.38, time is 33.0, close is 40.3418

4227/100000
action_prob: [0.21194793 0.5760961  0.21195604], action is hold
action_loss: -1669.3043212890625, Q_loss : 29.318683624267578
reward is 18.93, reward sum is 868.31, time is 34.0, close is 40.5311

4228/100000
action_prob: [0.21194793 0.5760961  0.21195604], action is sell
action_loss: -1669.694091796875, Q_loss : 43.07307052612305
reward is -3.99, reward sum is 864.32, time is 35.0, close is 40.4912

4229/100000
action_prob: [0.21194793 0.5760961  0.21195601], action is sell
action_loss: -1670.08447265625, Q_loss : 38.87910461425781
reward is 5.98, reward sum is 870.30, time is 36.0, close is 40.551

4230/100000
action_prob: [0.21194789 0.5

action_loss: -1684.12548828125, Q_loss : 11.138721466064453
reward is -13.95, reward sum is 811.90, time is 23.0, close is 39.8537

4266/100000
action_prob: [0.21194774 0.5760966  0.21195568], action is sell
action_loss: -1684.5155029296875, Q_loss : -85.17008972167969
reward is 7.97, reward sum is 819.87, time is 24.0, close is 39.9334

4267/100000
action_prob: [0.21194774 0.5760966  0.21195565], action is sell
action_loss: -1684.905517578125, Q_loss : -28.444461822509766
reward is 5.98, reward sum is 825.85, time is 25.0, close is 39.9932

4268/100000
action_prob: [0.21194774 0.5760966  0.21195565], action is hold
action_loss: -1685.2955322265625, Q_loss : 24.666351318359375
reward is 0.00, reward sum is 825.85, time is 26.0, close is 39.9932

4269/100000
action_prob: [0.21194772 0.5760966  0.21195565], action is hold
action_loss: -1685.685791015625, Q_loss : -34.766841888427734
reward is -8.97, reward sum is 816.88, time is 27.0, close is 39.9035

4270/100000
action_prob: [0.2119477

action_loss: -1699.7266845703125, Q_loss : -51.701377868652344
reward is -8.96, reward sum is 817.27, time is 14.0, close is 39.9932

4306/100000
action_prob: [0.2119476  0.5760971  0.21195535], action is buy
action_loss: -1700.116943359375, Q_loss : -33.04119110107422
reward is -7.97, reward sum is 809.30, time is 15.0, close is 39.9135

4307/100000
action_prob: [0.2119476  0.5760971  0.21195534], action is sell
action_loss: -1700.5068359375, Q_loss : -29.955005645751953
reward is 7.97, reward sum is 817.27, time is 16.0, close is 39.9932

4308/100000
action_prob: [0.21194759 0.5760971  0.21195534], action is sell
action_loss: -1700.896728515625, Q_loss : -55.918975830078125
reward is 4.98, reward sum is 822.25, time is 17.0, close is 40.043

4309/100000
action_prob: [0.21194759 0.5760971  0.21195531], action is sell
action_loss: -1701.286865234375, Q_loss : 2.35756778717041
reward is 5.97, reward sum is 828.22, time is 18.0, close is 40.1027

4310/100000
action_prob: [0.21194759 0.57

action_loss: -1715.328125, Q_loss : -48.01332092285156
reward is 0.00, reward sum is 896.16, time is 5.0, close is 40.7004

4346/100000
action_prob: [0.21194744 0.57609755 0.21195501], action is sell
action_loss: -1715.7178955078125, Q_loss : -64.48664855957031
reward is 0.00, reward sum is 896.16, time is 6.0, close is 40.79

4347/100000
action_prob: [0.21194744 0.57609755 0.211955  ], action is buy
action_loss: -1716.108154296875, Q_loss : -51.9911003112793
reward is -0.81, reward sum is 895.35, time is 7.0, close is 40.6805

4348/100000
action_prob: [0.21194744 0.57609755 0.211955  ], action is buy
action_loss: -1716.4979248046875, Q_loss : -41.97491455078125
reward is 37.85, reward sum is 933.20, time is 8.0, close is 41.059

4349/100000
action_prob: [0.21194743 0.57609755 0.21195498], action is hold
action_loss: -1716.88818359375, Q_loss : -68.86791229248047
reward is 2.99, reward sum is 936.19, time is 9.0, close is 41.0889

4350/100000
action_prob: [0.21194741 0.5760976  0.21195

action_loss: -1730.92919921875, Q_loss : 11.650732040405273
reward is -18.92, reward sum is 910.29, time is 45.0, close is 40.8299

4386/100000
action_prob: [0.2119473  0.576098   0.21195468], action is sell
action_loss: -1731.3193359375, Q_loss : 51.674713134765625
reward is 0.99, reward sum is 911.28, time is 46.0, close is 40.8398

4387/100000
action_prob: [0.2119473  0.576098   0.21195468], action is buy
action_loss: -1731.70947265625, Q_loss : -13.39704704284668
reward is 9.97, reward sum is 921.25, time is 47.0, close is 40.9395

4388/100000
action_prob: [0.21194729 0.576098   0.21195467], action is hold
action_loss: -1732.09912109375, Q_loss : 33.608985900878906
reward is 0.00, reward sum is 921.25, time is 48.0, close is 40.9395

4389/100000
action_prob: [0.21194729 0.576098   0.21195465], action is sell
action_loss: -1732.489501953125, Q_loss : 13.605172157287598
reward is 8.96, reward sum is 930.21, time is 0.0, close is 41.0291

4390/100000
action_prob: [0.21194729 0.576098 

action_loss: -1746.530517578125, Q_loss : 35.30464172363281
reward is 4.98, reward sum is 920.60, time is 36.0, close is 40.4314

4426/100000
action_prob: [0.21194716 0.5760985  0.21195439], action is sell
action_loss: -1746.92041015625, Q_loss : 11.420823097229004
reward is 7.97, reward sum is 928.57, time is 37.0, close is 40.5111

4427/100000
action_prob: [0.21194716 0.5760985  0.21195437], action is sell
action_loss: -1747.3106689453125, Q_loss : 20.42685317993164
reward is 1.00, reward sum is 929.57, time is 38.0, close is 40.5211

4428/100000
action_prob: [0.21194716 0.5760985  0.21195437], action is sell
action_loss: -1747.70068359375, Q_loss : -0.56695556640625
reward is 0.00, reward sum is 929.57, time is 39.0, close is 40.5211

4429/100000
action_prob: [0.21194716 0.5760985  0.21195437], action is sell
action_loss: -1748.0906982421875, Q_loss : -1.5409278869628906
reward is -9.96, reward sum is 919.61, time is 40.0, close is 40.4215

4430/100000
action_prob: [0.21194717 0.576

action_loss: -1762.1318359375, Q_loss : -95.34130859375
reward is -19.92, reward sum is 803.44, time is 27.0, close is 39.0967

4466/100000
action_prob: [0.21194701 0.5760989  0.21195406], action is sell
action_loss: -1762.521728515625, Q_loss : -41.349395751953125
reward is 4.98, reward sum is 808.42, time is 28.0, close is 39.1465

4467/100000
action_prob: [0.21194701 0.5760989  0.21195406], action is buy
action_loss: -1762.911865234375, Q_loss : -83.39909362792969
reward is 0.00, reward sum is 808.42, time is 29.0, close is 39.1465

4468/100000
action_prob: [0.21194701 0.5760989  0.21195406], action is buy
action_loss: -1763.3018798828125, Q_loss : 121.92859649658203
reward is 12.95, reward sum is 821.37, time is 30.0, close is 39.275999999999996

4469/100000
action_prob: [0.21194701 0.576099   0.21195406], action is sell
action_loss: -1763.69189453125, Q_loss : -74.40687561035156
reward is -7.97, reward sum is 813.40, time is 31.0, close is 39.1963

4470/100000
action_prob: [0.2119

action_loss: -1777.7330322265625, Q_loss : -24.81725311279297
reward is -1.00, reward sum is 792.90, time is 18.0, close is 39.0568

4506/100000
action_prob: [0.21194689 0.57609934 0.21195377], action is sell
action_loss: -1778.123046875, Q_loss : -56.486297607421875
reward is 13.95, reward sum is 806.85, time is 19.0, close is 39.1963

4507/100000
action_prob: [0.21194689 0.57609934 0.21195377], action is sell
action_loss: -1778.51318359375, Q_loss : -1.508544921875
reward is -5.98, reward sum is 800.87, time is 20.0, close is 39.1365

4508/100000
action_prob: [0.21194689 0.57609934 0.21195377], action is sell
action_loss: -1778.903076171875, Q_loss : -31.534801483154297
reward is -5.97, reward sum is 794.90, time is 21.0, close is 39.0768

4509/100000
action_prob: [0.21194687 0.57609934 0.21195376], action is sell
action_loss: -1779.293212890625, Q_loss : 36.26003646850586
reward is -4.98, reward sum is 789.92, time is 22.0, close is 39.027

4510/100000
action_prob: [0.21194687 0.576

action_loss: -1793.3343505859375, Q_loss : -81.69335174560547
reward is -7.97, reward sum is 727.60, time is 9.0, close is 38.3695

4546/100000
action_prob: [0.21194676 0.57609975 0.2119535 ], action is buy
action_loss: -1793.724365234375, Q_loss : 24.982837677001953
reward is 15.94, reward sum is 743.54, time is 10.0, close is 38.5289

4547/100000
action_prob: [0.21194676 0.57609975 0.21195349], action is sell
action_loss: -1794.1143798828125, Q_loss : -54.60820388793945
reward is 1.99, reward sum is 745.53, time is 11.0, close is 38.5488

4548/100000
action_prob: [0.21194674 0.57609975 0.21195346], action is hold
action_loss: -1794.5042724609375, Q_loss : -14.694393157958984
reward is 1.00, reward sum is 746.53, time is 12.0, close is 38.5588

4549/100000
action_prob: [0.21194673 0.57609975 0.21195346], action is buy
action_loss: -1794.89453125, Q_loss : 9.891090393066406
reward is 6.97, reward sum is 753.50, time is 13.0, close is 38.6285

4550/100000
action_prob: [0.21194674 0.5760

action_loss: -1808.9356689453125, Q_loss : -126.67929077148438
reward is 10.95, reward sum is 809.28, time is 0.0, close is 39.1863

4586/100000
action_prob: [0.21194664 0.5761002  0.21195321], action is buy
action_loss: -1809.32568359375, Q_loss : 31.26433563232422
reward is -14.94, reward sum is 794.34, time is 1.0, close is 39.0369

4587/100000
action_prob: [0.21194664 0.5761002  0.21195321], action is sell
action_loss: -1809.7156982421875, Q_loss : -87.83707427978516
reward is 10.19, reward sum is 804.53, time is 2.0, close is 39.1465

4588/100000
action_prob: [0.21194662 0.5761002  0.21195321], action is hold
action_loss: -1810.105712890625, Q_loss : -31.041046142578125
reward is 0.00, reward sum is 804.53, time is 3.0, close is 38.9572

4589/100000
action_prob: [0.21194662 0.5761002  0.21195321], action is buy
action_loss: -1810.495849609375, Q_loss : 6.300996780395508
reward is -0.78, reward sum is 803.75, time is 4.0, close is 38.8775

4590/100000
action_prob: [0.21194662 0.576

action_loss: -1824.5367431640625, Q_loss : -60.97222900390625
reward is -10.96, reward sum is 789.81, time is 40.0, close is 38.7381

4626/100000
action_prob: [0.21194652 0.5761005  0.21195294], action is buy
action_loss: -1824.9267578125, Q_loss : 4.680793762207031
reward is 0.00, reward sum is 789.81, time is 41.0, close is 38.7381

4627/100000
action_prob: [0.2119465  0.5761005  0.21195292], action is sell
action_loss: -1825.31689453125, Q_loss : -4.012576103210449
reward is 3.98, reward sum is 793.79, time is 42.0, close is 38.7779

4628/100000
action_prob: [0.2119465  0.5761006  0.21195292], action is sell
action_loss: -1825.7069091796875, Q_loss : -10.206989288330078
reward is -7.97, reward sum is 785.82, time is 43.0, close is 38.6982

4629/100000
action_prob: [0.2119465  0.5761006  0.21195292], action is hold
action_loss: -1826.096923828125, Q_loss : -27.150957107543945
reward is -23.90, reward sum is 761.92, time is 44.0, close is 38.4592

4630/100000
action_prob: [0.2119465  

action_loss: -1840.137939453125, Q_loss : -86.26133728027344
reward is 6.97, reward sum is 886.87, time is 31.0, close is 39.7043

4666/100000
action_prob: [0.2119464  0.57610095 0.21195267], action is sell
action_loss: -1840.5279541015625, Q_loss : -71.29530334472656
reward is -7.97, reward sum is 878.90, time is 32.0, close is 39.6246

4667/100000
action_prob: [0.2119464  0.57610095 0.21195267], action is buy
action_loss: -1840.91796875, Q_loss : -31.34912109375
reward is -19.92, reward sum is 858.98, time is 33.0, close is 39.4254

4668/100000
action_prob: [0.2119464  0.57610095 0.21195267], action is sell
action_loss: -1841.30810546875, Q_loss : -57.21308517456055
reward is -5.98, reward sum is 853.00, time is 34.0, close is 39.3656

4669/100000
action_prob: [0.2119464  0.57610095 0.21195266], action is hold
action_loss: -1841.6981201171875, Q_loss : 57.273101806640625
reward is 21.92, reward sum is 874.92, time is 35.0, close is 39.5848

4670/100000
action_prob: [0.2119464  0.5761

action_loss: -1855.739013671875, Q_loss : -23.547435760498047
reward is 28.89, reward sum is 1060.61, time is 22.0, close is 41.6268

4706/100000
action_prob: [0.21194628 0.57610124 0.21195243], action is sell
action_loss: -1856.129150390625, Q_loss : 39.38875198364258
reward is 10.95, reward sum is 1071.56, time is 23.0, close is 41.7363

4707/100000
action_prob: [0.21194628 0.5761013  0.21195243], action is hold
action_loss: -1856.5191650390625, Q_loss : -1.5852155685424805
reward is 15.94, reward sum is 1087.50, time is 24.0, close is 41.8957

4708/100000
action_prob: [0.21194628 0.5761013  0.21195239], action is sell
action_loss: -1856.9090576171875, Q_loss : -62.24903106689453
reward is 33.87, reward sum is 1121.37, time is 25.0, close is 42.2344

4709/100000
action_prob: [0.21194628 0.5761013  0.21195239], action is sell
action_loss: -1857.2991943359375, Q_loss : -79.96839904785156
reward is 19.92, reward sum is 1141.29, time is 26.0, close is 42.4336

4710/100000
action_prob: [0

action_loss: -1871.3402099609375, Q_loss : 12.037220001220703
reward is -6.97, reward sum is 1307.93, time is 13.0, close is 43.9078

4746/100000
action_prob: [0.21194616 0.57610166 0.21195216], action is hold
action_loss: -1871.730224609375, Q_loss : 13.192652702331543
reward is -3.98, reward sum is 1303.95, time is 14.0, close is 43.868

4747/100000
action_prob: [0.21194616 0.57610166 0.21195216], action is buy
action_loss: -1872.1202392578125, Q_loss : 49.27953338623047
reward is -5.98, reward sum is 1297.97, time is 15.0, close is 43.8082

4748/100000
action_prob: [0.21194616 0.57610166 0.21195215], action is buy
action_loss: -1872.51025390625, Q_loss : -13.705129623413086
reward is 1.99, reward sum is 1299.96, time is 16.0, close is 43.8281

4749/100000
action_prob: [0.21194616 0.57610166 0.21195215], action is sell
action_loss: -1872.900390625, Q_loss : -33.365726470947266
reward is -14.94, reward sum is 1285.02, time is 17.0, close is 43.6787

4750/100000
action_prob: [0.2119461

action_loss: -1886.9410400390625, Q_loss : -7.879323959350586
reward is -0.87, reward sum is 1256.39, time is 4.0, close is 43.4098

4786/100000
action_prob: [0.21194606 0.57610196 0.21195194], action is hold
action_loss: -1887.3314208984375, Q_loss : -22.883289337158203
reward is 0.00, reward sum is 1256.39, time is 5.0, close is 43.4098

4787/100000
action_prob: [0.21194604 0.576102   0.21195191], action is hold
action_loss: -1887.721435546875, Q_loss : 29.216516494750977
reward is 0.00, reward sum is 1256.39, time is 6.0, close is 43.4098

4788/100000
action_prob: [0.21194604 0.576102   0.21195191], action is sell
action_loss: -1888.111083984375, Q_loss : -3.9210734367370605
reward is 0.00, reward sum is 1256.39, time is 7.0, close is 43.4098

4789/100000
action_prob: [0.21194604 0.576102   0.21195191], action is hold
action_loss: -1888.50146484375, Q_loss : 50.924957275390625
reward is 0.00, reward sum is 1256.39, time is 8.0, close is 43.4098

4790/100000
action_prob: [0.21194604 

action_loss: -1902.542236328125, Q_loss : 22.3129825592041
reward is 0.00, reward sum is 1256.39, time is 44.0, close is 43.4098

4826/100000
action_prob: [0.21194595 0.5761024  0.2119517 ], action is buy
action_loss: -1902.932373046875, Q_loss : -55.44083023071289
reward is 0.00, reward sum is 1256.39, time is 45.0, close is 43.4098

4827/100000
action_prob: [0.21194595 0.5761024  0.2119517 ], action is sell
action_loss: -1903.322509765625, Q_loss : 28.826793670654297
reward is 0.00, reward sum is 1256.39, time is 46.0, close is 43.4098

4828/100000
action_prob: [0.21194595 0.5761024  0.21195167], action is buy
action_loss: -1903.7122802734375, Q_loss : -10.09717082977295
reward is 0.00, reward sum is 1256.39, time is 47.0, close is 43.4098

4829/100000
action_prob: [0.21194594 0.5761024  0.21195167], action is hold
action_loss: -1904.1025390625, Q_loss : -17.090984344482422
reward is 0.00, reward sum is 1256.39, time is 48.0, close is 43.4098

4830/100000
action_prob: [0.21194594 0.5

action_loss: -1918.1434326171875, Q_loss : 25.03877830505371
reward is -11.96, reward sum is 1373.15, time is 35.0, close is 44.9238

4866/100000
action_prob: [0.21194583 0.5761027  0.21195146], action is sell
action_loss: -1918.533447265625, Q_loss : 23.65467071533203
reward is -2.99, reward sum is 1370.16, time is 36.0, close is 44.8939

4867/100000
action_prob: [0.21194583 0.5761027  0.21195146], action is buy
action_loss: -1918.923583984375, Q_loss : 4.0708770751953125
reward is -7.96, reward sum is 1362.20, time is 37.0, close is 44.8143

4868/100000
action_prob: [0.21194583 0.5761027  0.21195146], action is sell
action_loss: -1919.3134765625, Q_loss : -68.00311279296875
reward is 1.99, reward sum is 1364.19, time is 38.0, close is 44.8342

4869/100000
action_prob: [0.21194583 0.57610273 0.21195145], action is buy
action_loss: -1919.70361328125, Q_loss : -1.0661506652832031
reward is -1.00, reward sum is 1363.19, time is 39.0, close is 44.8242

4870/100000
action_prob: [0.21194583

action_loss: -1933.7445068359375, Q_loss : -59.247459411621094
reward is 3.98, reward sum is 1360.38, time is 26.0, close is 44.8242

4906/100000
action_prob: [0.21194573 0.576103   0.21195121], action is sell
action_loss: -1934.1346435546875, Q_loss : 12.548721313476562
reward is 0.00, reward sum is 1360.38, time is 27.0, close is 44.8242

4907/100000
action_prob: [0.21194573 0.576103   0.21195121], action is buy
action_loss: -1934.524658203125, Q_loss : -9.147256851196289
reward is 10.96, reward sum is 1371.34, time is 28.0, close is 44.9338

4908/100000
action_prob: [0.21194573 0.576103   0.21195121], action is sell
action_loss: -1934.9146728515625, Q_loss : -16.339061737060547
reward is 1.99, reward sum is 1373.33, time is 29.0, close is 44.9537

4909/100000
action_prob: [0.21194574 0.57610303 0.21195121], action is sell
action_loss: -1935.304931640625, Q_loss : -62.15302658081055
reward is -7.97, reward sum is 1365.36, time is 30.0, close is 44.873999999999995

4910/100000
action_

action_loss: -1949.3455810546875, Q_loss : 3.684580087661743
reward is 0.00, reward sum is 1368.54, time is 17.0, close is 44.9238

4946/100000
action_prob: [0.21194564 0.5761034  0.21195102], action is buy
action_loss: -1949.7357177734375, Q_loss : -21.517377853393555
reward is 0.00, reward sum is 1368.54, time is 18.0, close is 44.9238

4947/100000
action_prob: [0.21194564 0.5761034  0.21195102], action is sell
action_loss: -1950.125732421875, Q_loss : -17.58119010925293
reward is 0.00, reward sum is 1368.54, time is 19.0, close is 44.9238

4948/100000
action_prob: [0.21194564 0.5761034  0.21195102], action is sell
action_loss: -1950.515625, Q_loss : -33.475154876708984
reward is 0.00, reward sum is 1368.54, time is 20.0, close is 44.9238

4949/100000
action_prob: [0.21194564 0.5761034  0.211951  ], action is buy
action_loss: -1950.9058837890625, Q_loss : -64.17764282226562
reward is 0.00, reward sum is 1368.54, time is 21.0, close is 44.9238

4950/100000
action_prob: [0.21194564 0.5

action_loss: -1964.94677734375, Q_loss : -46.579505920410156
reward is 0.00, reward sum is 1367.64, time is 8.0, close is 44.9238

4986/100000
action_prob: [0.21194556 0.5761036  0.21195081], action is buy
action_loss: -1965.3369140625, Q_loss : -41.56332015991211
reward is -0.90, reward sum is 1366.74, time is 9.0, close is 44.9238

4987/100000
action_prob: [0.21194555 0.5761036  0.2119508 ], action is sell
action_loss: -1965.7269287109375, Q_loss : 51.616310119628906
reward is 0.00, reward sum is 1366.74, time is 10.0, close is 44.9238

4988/100000
action_prob: [0.21194555 0.5761036  0.2119508 ], action is buy
action_loss: -1966.1168212890625, Q_loss : -25.76110076904297
reward is 0.00, reward sum is 1366.74, time is 11.0, close is 44.9238

4989/100000
action_prob: [0.21194555 0.5761036  0.21195078], action is sell
action_loss: -1966.5069580078125, Q_loss : 30.12493133544922
reward is 0.00, reward sum is 1366.74, time is 12.0, close is 44.9238

4990/100000
action_prob: [0.21194555 0.

action_prob: [0.21194544 0.576104   0.2119506 ], action is sell
action_loss: -1980.5479736328125, Q_loss : -10.496137619018555
reward is 0.00, reward sum is 1366.74, time is 48.0, close is 44.9238

5026/100000
action_prob: [0.21194544 0.576104   0.21195059], action is hold
action_loss: -1980.93798828125, Q_loss : -45.75941848754883
reward is 0.00, reward sum is 1366.74, time is 0.0, close is 44.9238

5027/100000
action_prob: [0.21194544 0.576104   0.21195059], action is sell
action_loss: -1981.328125, Q_loss : 9.066768646240234
reward is -0.90, reward sum is 1365.85, time is 1.0, close is 44.9238

5028/100000
action_prob: [0.21194544 0.576104   0.21195059], action is buy
action_loss: -1981.718017578125, Q_loss : -22.827199935913086
reward is -0.90, reward sum is 1364.95, time is 2.0, close is 44.9238

5029/100000
action_prob: [0.21194544 0.576104   0.21195059], action is hold
action_loss: -1982.108154296875, Q_loss : 36.97898483276367
reward is 0.00, reward sum is 1364.95, time is 3.0,

action_loss: -1996.14892578125, Q_loss : -29.921398162841797
reward is 0.00, reward sum is 1364.95, time is 39.0, close is 44.9238

5066/100000
action_prob: [0.21194537 0.5761042  0.21195038], action is buy
action_loss: -1996.5391845703125, Q_loss : -87.3193588256836
reward is 0.00, reward sum is 1364.95, time is 40.0, close is 44.9238

5067/100000
action_prob: [0.21194535 0.5761042  0.21195038], action is buy
action_loss: -1996.928955078125, Q_loss : 7.1807708740234375
reward is 0.00, reward sum is 1364.95, time is 41.0, close is 44.9238

5068/100000
action_prob: [0.21194537 0.5761043  0.21195038], action is hold
action_loss: -1997.3192138671875, Q_loss : -80.74313354492188
reward is 0.00, reward sum is 1364.95, time is 42.0, close is 44.9238

5069/100000
action_prob: [0.21194537 0.5761043  0.21195038], action is sell
action_loss: -1997.7093505859375, Q_loss : -33.98711395263672
reward is 0.00, reward sum is 1364.95, time is 43.0, close is 44.9238

5070/100000
action_prob: [0.21194537

action_loss: -2011.75, Q_loss : -23.137493133544922
reward is 0.00, reward sum is 1363.15, time is 30.0, close is 44.9238

5106/100000
action_prob: [0.21194528 0.5761045  0.2119502 ], action is hold
action_loss: -2012.140380859375, Q_loss : 60.668540954589844
reward is 0.00, reward sum is 1363.15, time is 31.0, close is 44.9238

5107/100000
action_prob: [0.21194528 0.5761045  0.2119502 ], action is buy
action_loss: -2012.5302734375, Q_loss : -11.095273971557617
reward is 0.00, reward sum is 1363.15, time is 32.0, close is 44.9238

5108/100000
action_prob: [0.21194527 0.5761045  0.2119502 ], action is hold
action_loss: -2012.9200439453125, Q_loss : -15.109240531921387
reward is 0.00, reward sum is 1363.15, time is 33.0, close is 44.9238

5109/100000
action_prob: [0.21194528 0.5761046  0.21195018], action is sell
action_loss: -2013.3104248046875, Q_loss : -28.003055572509766
reward is 0.00, reward sum is 1363.15, time is 34.0, close is 44.9238

5110/100000
action_prob: [0.21194528 0.5761

action_loss: -2027.3511962890625, Q_loss : -16.187763214111328
reward is 0.00, reward sum is 1361.35, time is 21.0, close is 44.9238

5146/100000
action_prob: [0.21194519 0.5761048  0.21194997], action is hold
action_loss: -2027.741455078125, Q_loss : -14.247405052185059
reward is 0.00, reward sum is 1361.35, time is 22.0, close is 44.9238

5147/100000
action_prob: [0.21194519 0.5761048  0.21194997], action is sell
action_loss: -2028.13134765625, Q_loss : -93.09121704101562
reward is 0.00, reward sum is 1361.35, time is 23.0, close is 44.9238

5148/100000
action_prob: [0.21194519 0.5761048  0.21194997], action is hold
action_loss: -2028.521240234375, Q_loss : -48.225189208984375
reward is 0.00, reward sum is 1361.35, time is 24.0, close is 44.9238

5149/100000
action_prob: [0.21194519 0.5761048  0.21194997], action is sell
action_loss: -2028.911376953125, Q_loss : -66.18914794921875
reward is 0.00, reward sum is 1361.35, time is 25.0, close is 44.9238

5150/100000
action_prob: [0.21194

action_loss: -2042.952392578125, Q_loss : -53.23952865600586
reward is 25.90, reward sum is 1474.06, time is 12.0, close is 45.9697

5186/100000
action_prob: [0.2119451  0.5761051  0.21194981], action is sell
action_loss: -2043.342529296875, Q_loss : -14.523348808288574
reward is 10.96, reward sum is 1485.02, time is 13.0, close is 46.0793

5187/100000
action_prob: [0.21194439 0.5761074  0.21194829], action is sell
action_loss: -2043.732666015625, Q_loss : -63.347312927246094
reward is -9.96, reward sum is 1475.06, time is 14.0, close is 45.9797

5188/100000
action_prob: [0.21194512 0.5761051  0.21194981], action is buy
action_loss: -2044.1224365234375, Q_loss : -3.6732616424560547
reward is -15.94, reward sum is 1459.12, time is 15.0, close is 45.8203

5189/100000
action_prob: [0.21194512 0.5761051  0.21194981], action is buy
action_loss: -2044.5128173828125, Q_loss : 16.384906768798828
reward is -3.98, reward sum is 1455.14, time is 16.0, close is 45.7805

5190/100000
action_prob: [0

action_loss: -2058.5537109375, Q_loss : 11.376045227050781
reward is 13.95, reward sum is 1382.61, time is 3.0, close is 44.9139

5226/100000
action_prob: [0.21194501 0.57610536 0.21194963], action is sell
action_loss: -2058.94384765625, Q_loss : -19.04888153076172
reward is 19.92, reward sum is 1402.53, time is 4.0, close is 45.1131

5227/100000
action_prob: [0.21194501 0.57610536 0.21194963], action is sell
action_loss: -2059.333740234375, Q_loss : -6.653262138366699
reward is 13.94, reward sum is 1416.47, time is 5.0, close is 45.2525

5228/100000
action_prob: [0.21194501 0.57610536 0.21194962], action is buy
action_loss: -2059.7236328125, Q_loss : 23.392770767211914
reward is -5.97, reward sum is 1410.50, time is 6.0, close is 45.1928

5229/100000
action_prob: [0.21194501 0.57610536 0.21194962], action is sell
action_loss: -2060.11376953125, Q_loss : 23.168956756591797
reward is -21.92, reward sum is 1388.58, time is 7.0, close is 44.9736

5230/100000
action_prob: [0.21194503 0.576

action_loss: -2074.15478515625, Q_loss : 53.968421936035156
reward is -2.00, reward sum is 1279.01, time is 43.0, close is 43.8779

5266/100000
action_prob: [0.21194494 0.5761056  0.21194944], action is buy
action_loss: -2074.544921875, Q_loss : -61.62538528442383
reward is -4.98, reward sum is 1274.03, time is 44.0, close is 43.8281

5267/100000
action_prob: [0.21194494 0.5761056  0.21194944], action is buy
action_loss: -2074.93505859375, Q_loss : -52.23334884643555
reward is -12.95, reward sum is 1261.08, time is 45.0, close is 43.6986

5268/100000
action_prob: [0.21194495 0.57610565 0.21194942], action is sell
action_loss: -2075.324951171875, Q_loss : -37.01530456542969
reward is -10.95, reward sum is 1250.13, time is 46.0, close is 43.5891

5269/100000
action_prob: [0.21194495 0.57610565 0.21194942], action is hold
action_loss: -2075.715087890625, Q_loss : -49.521514892578125
reward is 3.98, reward sum is 1254.11, time is 47.0, close is 43.6289

5270/100000
action_prob: [0.21194492

action_loss: -2089.756103515625, Q_loss : -48.72751998901367
reward is 22.91, reward sum is 1247.36, time is 34.0, close is 43.6289

5306/100000
action_prob: [0.21194485 0.57610583 0.21194926], action is sell
action_loss: -2090.14599609375, Q_loss : -55.148521423339844
reward is -4.98, reward sum is 1242.38, time is 35.0, close is 43.5791

5307/100000
action_prob: [0.21194485 0.57610583 0.21194924], action is sell
action_loss: -2090.5361328125, Q_loss : -117.16123962402344
reward is -1.00, reward sum is 1241.38, time is 36.0, close is 43.5691

5308/100000
action_prob: [0.21194485 0.57610583 0.21194924], action is hold
action_loss: -2090.926025390625, Q_loss : -46.60395050048828
reward is 20.92, reward sum is 1262.30, time is 37.0, close is 43.7783

5309/100000
action_prob: [0.21194485 0.57610583 0.21194924], action is hold
action_loss: -2091.316162109375, Q_loss : -6.933085918426514
reward is 0.00, reward sum is 1262.30, time is 38.0, close is 43.7783

5310/100000
action_prob: [0.21194

action_loss: -2105.357177734375, Q_loss : 38.9443244934082
reward is -25.90, reward sum is 1254.56, time is 25.0, close is 44.0074

5346/100000
action_prob: [0.2119448  0.5761061  0.21194911], action is hold
action_loss: -2105.747314453125, Q_loss : -104.73744201660156
reward is 16.94, reward sum is 1271.50, time is 26.0, close is 44.1768

5347/100000
action_prob: [0.2119448 0.5761061 0.2119491], action is hold
action_loss: -2106.137451171875, Q_loss : -1.1314001083374023
reward is -4.98, reward sum is 1266.52, time is 27.0, close is 44.126999999999995

5348/100000
action_prob: [0.21194479 0.57610613 0.2119491 ], action is sell
action_loss: -2106.52734375, Q_loss : -39.67450714111328
reward is 11.95, reward sum is 1278.47, time is 28.0, close is 44.2465

5349/100000
action_prob: [0.21194477 0.57610613 0.2119491 ], action is sell
action_loss: -2106.91748046875, Q_loss : -51.686614990234375
reward is 7.97, reward sum is 1286.44, time is 29.0, close is 44.3262

5350/100000
action_prob: [0

action_loss: -2120.95849609375, Q_loss : 42.68043518066406
reward is 3.98, reward sum is 1225.91, time is 16.0, close is 43.6787

5386/100000
action_prob: [0.21194474 0.57610637 0.21194893], action is sell
action_loss: -2121.348388671875, Q_loss : 43.54646682739258
reward is 6.97, reward sum is 1232.88, time is 17.0, close is 43.7484

5387/100000
action_prob: [0.21194473 0.57610637 0.21194893], action is sell
action_loss: -2121.738525390625, Q_loss : -127.79735565185547
reward is 6.98, reward sum is 1239.86, time is 18.0, close is 43.8182

5388/100000
action_prob: [0.21194473 0.57610637 0.21194892], action is hold
action_loss: -2122.12841796875, Q_loss : -38.17131423950195
reward is 18.92, reward sum is 1258.78, time is 19.0, close is 44.0074

5389/100000
action_prob: [0.21194473 0.57610637 0.21194892], action is hold
action_loss: -2122.5185546875, Q_loss : -23.215129852294922
reward is 1.99, reward sum is 1260.77, time is 20.0, close is 44.0273

5390/100000
action_prob: [0.21194473 0.

action_loss: -2136.5595703125, Q_loss : 4.534489631652832
reward is -15.94, reward sum is 1409.38, time is 7.0, close is 45.7207

5426/100000
action_prob: [0.21194464 0.5761066  0.21194878], action is buy
action_loss: -2136.94970703125, Q_loss : -9.276834487915039
reward is 13.95, reward sum is 1423.33, time is 8.0, close is 45.8602

5427/100000
action_prob: [0.21194465 0.5761066  0.21194878], action is sell
action_loss: -2137.33984375, Q_loss : -23.303442001342773
reward is 7.96, reward sum is 1431.29, time is 9.0, close is 45.9398

5428/100000
action_prob: [0.21194465 0.5761066  0.21194878], action is sell
action_loss: -2137.729736328125, Q_loss : 57.50273895263672
reward is 7.97, reward sum is 1439.26, time is 10.0, close is 46.0195

5429/100000
action_prob: [0.21194465 0.5761066  0.21194878], action is buy
action_loss: -2138.119873046875, Q_loss : -26.35122299194336
reward is 59.77, reward sum is 1499.03, time is 11.0, close is 46.6172

5430/100000
action_prob: [0.21194465 0.576106

action_loss: -2152.160888671875, Q_loss : -61.371604919433594
reward is 3.99, reward sum is 1492.06, time is 47.0, close is 46.5475

5466/100000
action_prob: [0.21194458 0.5761068  0.21194863], action is hold
action_loss: -2152.551025390625, Q_loss : -58.41542053222656
reward is -1.00, reward sum is 1491.06, time is 48.0, close is 46.5375

5467/100000
action_prob: [0.21194457 0.57610685 0.21194859], action is sell
action_loss: -2152.94091796875, Q_loss : 32.65581130981445
reward is -111.56, reward sum is 1379.50, time is 0.0, close is 45.4219

5468/100000
action_prob: [0.21194457 0.57610685 0.21194859], action is sell
action_loss: -2153.330810546875, Q_loss : 21.31664276123047
reward is 30.95, reward sum is 1410.45, time is 1.0, close is 45.7406

5469/100000
action_prob: [0.21194457 0.57610685 0.21194859], action is sell
action_loss: -2153.720947265625, Q_loss : -42.46717071533203
reward is 0.00, reward sum is 1410.45, time is 2.0, close is 45.5713

5470/100000
action_prob: [0.21194457

action_loss: -2167.761962890625, Q_loss : -23.67755126953125
reward is -1.00, reward sum is 1356.75, time is 38.0, close is 46.0793

5506/100000
action_prob: [0.2119445  0.576107   0.21194845], action is hold
action_loss: -2168.152099609375, Q_loss : -33.189720153808594
reward is 53.79, reward sum is 1410.54, time is 39.0, close is 46.6172

5507/100000
action_prob: [0.2119445  0.576107   0.21194845], action is sell
action_loss: -2168.5419921875, Q_loss : -21.59755516052246
reward is 0.00, reward sum is 1410.54, time is 40.0, close is 46.6172

5508/100000
action_prob: [0.2119445  0.576107   0.21194844], action is sell
action_loss: -2168.93212890625, Q_loss : 33.10070037841797
reward is -29.88, reward sum is 1380.66, time is 41.0, close is 46.3184

5509/100000
action_prob: [0.21194449 0.576107   0.21194844], action is hold
action_loss: -2169.322265625, Q_loss : -89.46326446533203
reward is 45.82, reward sum is 1426.48, time is 42.0, close is 46.7766

5510/100000
action_prob: [0.21194449 

action_loss: -2183.36328125, Q_loss : -64.39872741699219
reward is -9.96, reward sum is 1342.95, time is 29.0, close is 45.4219

5546/100000
action_prob: [0.21194445 0.57610726 0.21194829], action is hold
action_loss: -2183.753173828125, Q_loss : -22.867464065551758
reward is 20.92, reward sum is 1363.87, time is 30.0, close is 45.6311

5547/100000
action_prob: [0.21194445 0.57610726 0.21194829], action is hold
action_loss: -2184.143310546875, Q_loss : -1.8414306640625
reward is -3.99, reward sum is 1359.88, time is 31.0, close is 45.5912

5548/100000
action_prob: [0.21194445 0.57610726 0.21194829], action is hold
action_loss: -2184.533203125, Q_loss : 34.914756774902344
reward is -6.97, reward sum is 1352.91, time is 32.0, close is 45.5215

5549/100000
action_prob: [0.21194445 0.57610726 0.21194829], action is sell
action_loss: -2184.92333984375, Q_loss : -20.84921646118164
reward is -20.92, reward sum is 1331.99, time is 33.0, close is 45.3123

5550/100000
action_prob: [0.21194445 0.

action_loss: -2198.96435546875, Q_loss : -44.93959426879883
reward is -0.99, reward sum is 1394.93, time is 20.0, close is 46.7467

5586/100000
action_prob: [0.2119444  0.57610744 0.21194816], action is buy
action_loss: -2199.3544921875, Q_loss : 14.299816131591797
reward is 1.99, reward sum is 1396.92, time is 21.0, close is 46.7666

5587/100000
action_prob: [0.2119444  0.57610744 0.21194816], action is sell
action_loss: -2199.744384765625, Q_loss : -60.907371520996094
reward is -7.97, reward sum is 1388.95, time is 22.0, close is 46.6869

5588/100000
action_prob: [0.2119444  0.57610744 0.21194814], action is buy
action_loss: -2200.134521484375, Q_loss : -5.732751846313477
reward is -6.97, reward sum is 1381.98, time is 23.0, close is 46.6172

5589/100000
action_prob: [0.21194439 0.57610744 0.21194814], action is sell
action_loss: -2200.5244140625, Q_loss : -70.84516143798828
reward is 9.96, reward sum is 1391.94, time is 24.0, close is 46.7168

5590/100000
action_prob: [0.21194439 0.

action_loss: -2214.565673828125, Q_loss : -68.41414642333984
reward is 0.00, reward sum is 1425.86, time is 11.0, close is 46.9658

5626/100000
action_prob: [0.21194433 0.5761077  0.21194802], action is buy
action_loss: -2214.95556640625, Q_loss : -65.01950073242188
reward is -0.94, reward sum is 1424.92, time is 12.0, close is 46.926

5627/100000
action_prob: [0.21194433 0.5761077  0.21194802], action is sell
action_loss: -2215.345703125, Q_loss : -63.013465881347656
reward is -10.96, reward sum is 1413.96, time is 13.0, close is 46.8164

5628/100000
action_prob: [0.21194433 0.5761077  0.21194802], action is hold
action_loss: -2215.735595703125, Q_loss : 65.53271484375
reward is 11.95, reward sum is 1425.91, time is 14.0, close is 46.9359

5629/100000
action_prob: [0.2119443  0.5761077  0.21194799], action is hold
action_loss: -2216.125732421875, Q_loss : -22.131254196166992
reward is -5.97, reward sum is 1419.94, time is 15.0, close is 46.8762

5630/100000
action_prob: [0.2119443  0.

action_loss: -2230.166748046875, Q_loss : -59.18163299560547
reward is 0.00, reward sum is 1463.77, time is 2.0, close is 47.3145

5666/100000
action_prob: [0.21194427 0.5761079  0.21194787], action is hold
action_loss: -2230.556884765625, Q_loss : 22.554397583007812
reward is -27.90, reward sum is 1435.87, time is 3.0, close is 47.0355

5667/100000
action_prob: [0.21194425 0.5761079  0.21194786], action is sell
action_loss: -2230.947021484375, Q_loss : 28.048986434936523
reward is -12.89, reward sum is 1422.98, time is 4.0, close is 46.916000000000004

5668/100000
action_prob: [0.21194425 0.5761079  0.21194786], action is sell
action_loss: -2231.3369140625, Q_loss : -44.193382263183594
reward is 0.00, reward sum is 1422.98, time is 5.0, close is 46.916000000000004

5669/100000
action_prob: [0.21194425 0.5761079  0.21194786], action is buy
action_loss: -2231.72705078125, Q_loss : -0.49720096588134766
reward is -0.94, reward sum is 1422.04, time is 6.0, close is 46.8164

5670/100000
act

action_loss: -2245.76806640625, Q_loss : 15.43226432800293
reward is 4.98, reward sum is 1586.40, time is 42.0, close is 48.46

5706/100000
action_prob: [0.2119442  0.57610804 0.21194772], action is sell
action_loss: -2246.158203125, Q_loss : -14.561548233032227
reward is -3.99, reward sum is 1582.41, time is 43.0, close is 48.4201

5707/100000
action_prob: [0.2119442  0.57610804 0.21194772], action is sell
action_loss: -2246.54833984375, Q_loss : -15.099515914916992
reward is 5.98, reward sum is 1588.39, time is 44.0, close is 48.4799

5708/100000
action_prob: [0.2119442  0.57610804 0.21194772], action is buy
action_loss: -2246.938232421875, Q_loss : -27.479328155517578
reward is -6.97, reward sum is 1581.42, time is 45.0, close is 48.4102

5709/100000
action_prob: [0.2119442  0.57610804 0.21194772], action is sell
action_loss: -2247.328369140625, Q_loss : -53.31329345703125
reward is -1.00, reward sum is 1580.42, time is 46.0, close is 48.4002

5710/100000
action_prob: [0.21194421 0.

action_loss: -2261.369384765625, Q_loss : 74.44331359863281
reward is -2.98, reward sum is 1722.94, time is 33.0, close is 50.1932

5746/100000
action_prob: [0.21194413 0.5761083  0.2119476 ], action is hold
action_loss: -2261.759521484375, Q_loss : -6.707644462585449
reward is -3.99, reward sum is 1718.95, time is 34.0, close is 50.1533

5747/100000
action_prob: [0.21194413 0.5761083  0.2119476 ], action is sell
action_loss: -2262.149658203125, Q_loss : -52.471458435058594
reward is -22.91, reward sum is 1696.04, time is 35.0, close is 49.9242

5748/100000
action_prob: [0.21194413 0.5761083  0.2119476 ], action is buy
action_loss: -2262.53955078125, Q_loss : -82.45542907714844
reward is -2.99, reward sum is 1693.05, time is 36.0, close is 49.8943

5749/100000
action_prob: [0.21194413 0.5761083  0.2119476 ], action is hold
action_loss: -2262.9296875, Q_loss : -56.499244689941406
reward is -10.95, reward sum is 1682.10, time is 37.0, close is 49.7848

5750/100000
action_prob: [0.2119441

action_loss: -2276.970703125, Q_loss : 16.090377807617188
reward is 36.85, reward sum is 1836.50, time is 24.0, close is 51.6275

5786/100000
action_prob: [0.21194409 0.57610846 0.21194747], action is sell
action_loss: -2277.36083984375, Q_loss : 13.223636627197266
reward is 36.86, reward sum is 1873.36, time is 25.0, close is 51.9961

5787/100000
action_prob: [0.21194409 0.57610846 0.21194747], action is sell
action_loss: -2277.7509765625, Q_loss : -33.61662292480469
reward is 69.73, reward sum is 1943.09, time is 26.0, close is 52.6934

5788/100000
action_prob: [0.21194409 0.57610846 0.21194747], action is sell
action_loss: -2278.14111328125, Q_loss : -16.487720489501953
reward is -29.89, reward sum is 1913.20, time is 27.0, close is 52.3945

5789/100000
action_prob: [0.21194409 0.57610846 0.21194747], action is sell
action_loss: -2278.531005859375, Q_loss : 89.18061828613281
reward is -9.96, reward sum is 1903.24, time is 28.0, close is 52.2949

5790/100000
action_prob: [0.21194409 

action_loss: -2292.572265625, Q_loss : -12.935720443725586
reward is 5.98, reward sum is 2076.50, time is 15.0, close is 54.3469

5826/100000
action_prob: [0.21194404 0.57610863 0.21194737], action is sell
action_loss: -2292.96240234375, Q_loss : -21.789531707763672
reward is -4.98, reward sum is 2071.52, time is 16.0, close is 54.2971

5827/100000
action_prob: [0.21194403 0.57610863 0.21194737], action is sell
action_loss: -2293.352294921875, Q_loss : -27.953502655029297
reward is -8.97, reward sum is 2062.55, time is 17.0, close is 54.2074

5828/100000
action_prob: [0.21194403 0.57610863 0.21194737], action is sell
action_loss: -2293.7421875, Q_loss : -49.81747055053711
reward is 22.91, reward sum is 2085.46, time is 18.0, close is 54.4365

5829/100000
action_prob: [0.21194403 0.5761087  0.21194734], action is sell
action_loss: -2294.13232421875, Q_loss : -46.338932037353516
reward is 1.00, reward sum is 2086.46, time is 19.0, close is 54.4465

5830/100000
action_prob: [0.21194403 0.

action_loss: -2308.173583984375, Q_loss : -50.00166320800781
reward is 0.00, reward sum is 2132.19, time is 6.0, close is 54.2373

5866/100000
action_prob: [0.21194398 0.5761088  0.21194723], action is hold
action_loss: -2308.5634765625, Q_loss : -60.91563415527344
reward is 0.00, reward sum is 2132.19, time is 7.0, close is 54.3867

5867/100000
action_prob: [0.21194397 0.5761088  0.21194719], action is sell
action_loss: -2308.95361328125, Q_loss : -48.969444274902344
reward is 0.00, reward sum is 2132.19, time is 8.0, close is 54.5162

5868/100000
action_prob: [0.21194397 0.5761088  0.21194719], action is sell
action_loss: -2309.343505859375, Q_loss : -10.273416519165039
reward is 0.00, reward sum is 2132.19, time is 9.0, close is 54.3867

5869/100000
action_prob: [0.21194397 0.5761088  0.21194719], action is sell
action_loss: -2309.733642578125, Q_loss : 9.882616996765137
reward is 0.00, reward sum is 2132.19, time is 10.0, close is 54.4465

5870/100000
action_prob: [0.21194397 0.576

action_loss: -2323.77490234375, Q_loss : 33.57223892211914
reward is -5.98, reward sum is 2229.70, time is 46.0, close is 55.3828

5906/100000
action_prob: [0.21194392 0.57610893 0.21194708], action is hold
action_loss: -2324.164794921875, Q_loss : 10.206754684448242
reward is -11.95, reward sum is 2217.75, time is 47.0, close is 55.2633

5907/100000
action_prob: [0.21194392 0.576109   0.21194708], action is sell
action_loss: -2324.55517578125, Q_loss : 97.27445983886719
reward is 3.98, reward sum is 2221.73, time is 48.0, close is 55.3031

5908/100000
action_prob: [0.21194392 0.576109   0.21194708], action is sell
action_loss: -2324.94482421875, Q_loss : -77.03935241699219
reward is -11.95, reward sum is 2209.78, time is 0.0, close is 55.1836

5909/100000
action_prob: [0.21194392 0.576109   0.21194708], action is buy
action_loss: -2325.3349609375, Q_loss : 50.47666931152344
reward is 93.63, reward sum is 2303.41, time is 1.0, close is 56.1199

5910/100000
action_prob: [0.21194392 0.57

action_loss: -2339.37646484375, Q_loss : 14.616292953491211
reward is -22.91, reward sum is 2208.57, time is 37.0, close is 55.5422

5946/100000
action_prob: [0.21194386 0.5761091  0.21194698], action is sell
action_loss: -2339.76611328125, Q_loss : 16.45232391357422
reward is 4.98, reward sum is 2213.55, time is 38.0, close is 55.592

5947/100000
action_prob: [0.21194386 0.5761091  0.21194698], action is sell
action_loss: -2340.156494140625, Q_loss : 19.66159439086914
reward is 0.00, reward sum is 2213.55, time is 39.0, close is 55.592

5948/100000
action_prob: [0.21194384 0.5761092  0.21194696], action is sell
action_loss: -2340.54638671875, Q_loss : 45.83991241455078
reward is 1.99, reward sum is 2215.54, time is 40.0, close is 55.6119

5949/100000
action_prob: [0.21194384 0.5761092  0.21194696], action is sell
action_loss: -2340.9365234375, Q_loss : 11.47072982788086
reward is -1.99, reward sum is 2213.55, time is 41.0, close is 55.592

5950/100000
action_prob: [0.21194384 0.576109

action_loss: -2354.9775390625, Q_loss : -36.46979904174805
reward is -3.99, reward sum is 2211.32, time is 28.0, close is 54.8648

5986/100000
action_prob: [0.21194384 0.57610935 0.21194689], action is sell
action_loss: -2355.36767578125, Q_loss : 1.3463821411132812
reward is -11.95, reward sum is 2199.37, time is 29.0, close is 54.7453

5987/100000
action_prob: [0.21194384 0.57610935 0.21194686], action is sell
action_loss: -2355.7578125, Q_loss : -5.426573753356934
reward is 6.97, reward sum is 2206.34, time is 30.0, close is 54.815

5988/100000
action_prob: [0.2119438  0.57610935 0.21194686], action is sell
action_loss: -2356.147705078125, Q_loss : -13.591402053833008
reward is -4.98, reward sum is 2201.36, time is 31.0, close is 54.7652

5989/100000
action_prob: [0.2119438  0.57610935 0.21194686], action is hold
action_loss: -2356.537841796875, Q_loss : -73.36536407470703
reward is -27.89, reward sum is 2173.47, time is 32.0, close is 54.4863

5990/100000
action_prob: [0.21194379 0

action_loss: -2370.578857421875, Q_loss : -113.03474426269531
reward is -14.94, reward sum is 2058.76, time is 19.0, close is 52.9723

6026/100000
action_prob: [0.21194376 0.57610947 0.21194677], action is buy
action_loss: -2370.968994140625, Q_loss : 49.490482330322266
reward is -30.88, reward sum is 2027.88, time is 20.0, close is 52.6635

6027/100000
action_prob: [0.21194376 0.57610947 0.21194676], action is sell
action_loss: -2371.359130859375, Q_loss : 23.116466522216797
reward is 13.94, reward sum is 2041.82, time is 21.0, close is 52.8029

6028/100000
action_prob: [0.21194376 0.57610947 0.21194676], action is buy
action_loss: -2371.7490234375, Q_loss : -21.667497634887695
reward is -20.92, reward sum is 2020.90, time is 22.0, close is 52.5937

6029/100000
action_prob: [0.21194376 0.57610947 0.21194676], action is sell
action_loss: -2372.13916015625, Q_loss : -49.721309661865234
reward is -32.87, reward sum is 1988.03, time is 23.0, close is 52.265

6030/100000
action_prob: [0.21

action_loss: -2386.18017578125, Q_loss : 2.9392247200012207
reward is 46.82, reward sum is 2228.96, time is 10.0, close is 55.1338

6066/100000
action_prob: [0.21194372 0.57610965 0.21194662], action is sell
action_loss: -2386.5703125, Q_loss : 18.08586311340332
reward is -9.96, reward sum is 2219.00, time is 11.0, close is 55.0342

6067/100000
action_prob: [0.21194372 0.57610965 0.21194662], action is sell
action_loss: -2386.960693359375, Q_loss : -47.79962921142578
reward is -51.80, reward sum is 2167.20, time is 12.0, close is 54.5162

6068/100000
action_prob: [0.21194372 0.57610965 0.21194662], action is sell
action_loss: -2387.350341796875, Q_loss : 0.15655851364135742
reward is -21.91, reward sum is 2145.29, time is 13.0, close is 54.2971

6069/100000
action_prob: [0.21194372 0.57610965 0.21194662], action is buy
action_loss: -2387.740478515625, Q_loss : 17.82131004333496
reward is -19.92, reward sum is 2125.37, time is 14.0, close is 54.0979

6070/100000
action_prob: [0.21194372

action_loss: -2401.78173828125, Q_loss : -170.27627563476562
reward is -49.88, reward sum is 2270.72, time is 1.0, close is 55.5621

6106/100000
action_prob: [0.21194366 0.57610977 0.2119465 ], action is buy
action_loss: -2402.171630859375, Q_loss : -10.151756286621094
reward is -1.12, reward sum is 2269.60, time is 2.0, close is 55.7314

6107/100000
action_prob: [0.21194366 0.57610977 0.2119465 ], action is buy
action_loss: -2402.561767578125, Q_loss : -12.075575828552246
reward is 2.00, reward sum is 2271.60, time is 3.0, close is 55.7514

6108/100000
action_prob: [0.21194366 0.57610977 0.2119465 ], action is sell
action_loss: -2402.951904296875, Q_loss : 5.840459823608398
reward is 0.00, reward sum is 2271.60, time is 4.0, close is 55.7514

6109/100000
action_prob: [0.21194366 0.57610977 0.2119465 ], action is sell
action_loss: -2403.341796875, Q_loss : 50.66664505004883
reward is -46.82, reward sum is 2224.78, time is 5.0, close is 55.2832

6110/100000
action_prob: [0.21194366 0.57

action_loss: -2417.383056640625, Q_loss : -7.263736724853516
reward is -4.98, reward sum is 2289.53, time is 41.0, close is 55.9307

6146/100000
action_prob: [0.21194363 0.57610995 0.21194641], action is sell
action_loss: -2417.77294921875, Q_loss : 8.65229606628418
reward is -1.00, reward sum is 2288.53, time is 42.0, close is 55.9207

6147/100000
action_prob: [0.21194363 0.57610995 0.21194641], action is buy
action_loss: -2418.1630859375, Q_loss : -39.000221252441406
reward is -25.90, reward sum is 2262.63, time is 43.0, close is 55.6617

6148/100000
action_prob: [0.21194361 0.57610995 0.21194641], action is hold
action_loss: -2418.552978515625, Q_loss : -34.13548278808594
reward is -31.87, reward sum is 2230.76, time is 44.0, close is 55.343

6149/100000
action_prob: [0.21194361 0.57610995 0.2119464 ], action is hold
action_loss: -2418.943115234375, Q_loss : -49.76042556762695
reward is 8.96, reward sum is 2239.72, time is 45.0, close is 55.4326

6150/100000
action_prob: [0.21194361

action_loss: -2432.984130859375, Q_loss : 35.44016647338867
reward is 35.86, reward sum is 2310.22, time is 32.0, close is 55.7613

6186/100000
action_prob: [0.21194361 0.5761101  0.21194634], action is sell
action_loss: -2433.37451171875, Q_loss : 86.21634674072266
reward is 7.97, reward sum is 2318.19, time is 33.0, close is 55.841

6187/100000
action_prob: [0.21194361 0.5761101  0.21194634], action is buy
action_loss: -2433.764404296875, Q_loss : -71.10761260986328
reward is -20.92, reward sum is 2297.27, time is 34.0, close is 55.6318

6188/100000
action_prob: [0.21194361 0.5761101  0.21194632], action is sell
action_loss: -2434.154541015625, Q_loss : -58.08894348144531
reward is -4.98, reward sum is 2292.29, time is 35.0, close is 55.582

6189/100000
action_prob: [0.21194361 0.5761101  0.21194632], action is sell
action_loss: -2434.54443359375, Q_loss : -10.405394554138184
reward is -11.95, reward sum is 2280.34, time is 36.0, close is 55.4625

6190/100000
action_prob: [0.21194361

action_loss: -2448.585693359375, Q_loss : -40.40577697753906
reward is -1.99, reward sum is 2523.19, time is 23.0, close is 58.4508

6226/100000
action_prob: [0.21194355 0.57611024 0.21194622], action is sell
action_loss: -2448.9755859375, Q_loss : -21.529743194580078
reward is -1.99, reward sum is 2521.20, time is 24.0, close is 58.4309

6227/100000
action_prob: [0.21194355 0.57611024 0.21194622], action is sell
action_loss: -2449.365966796875, Q_loss : -18.238872528076172
reward is 9.96, reward sum is 2531.16, time is 25.0, close is 58.5305

6228/100000
action_prob: [0.21194355 0.57611024 0.2119462 ], action is hold
action_loss: -2449.755615234375, Q_loss : -9.577526092529297
reward is -9.96, reward sum is 2521.20, time is 26.0, close is 58.4309

6229/100000
action_prob: [0.21194355 0.57611024 0.2119462 ], action is hold
action_loss: -2450.14599609375, Q_loss : -70.4014892578125
reward is 13.94, reward sum is 2535.14, time is 27.0, close is 58.5703

6230/100000
action_prob: [0.211943

action_loss: -2464.187255859375, Q_loss : -64.96504974365234
reward is -7.97, reward sum is 2674.27, time is 14.0, close is 61.1303

6266/100000
action_prob: [0.21194349 0.5761104  0.2119461 ], action is sell
action_loss: -2464.5771484375, Q_loss : 17.1743106842041
reward is -24.91, reward sum is 2649.36, time is 15.0, close is 60.8812

6267/100000
action_prob: [0.21194349 0.5761104  0.2119461 ], action is sell
action_loss: -2464.96728515625, Q_loss : -61.519657135009766
reward is 15.94, reward sum is 2665.30, time is 16.0, close is 61.0406

6268/100000
action_prob: [0.21194348 0.5761104  0.2119461 ], action is sell
action_loss: -2465.357177734375, Q_loss : 45.05652618408203
reward is 1.99, reward sum is 2667.29, time is 17.0, close is 61.0605

6269/100000
action_prob: [0.21194348 0.5761104  0.2119461 ], action is sell
action_loss: -2465.747314453125, Q_loss : -46.57743835449219
reward is -4.98, reward sum is 2662.31, time is 18.0, close is 61.0107

6270/100000
action_prob: [0.21194348

action_loss: -2479.788330078125, Q_loss : -29.92782211303711
reward is 44.83, reward sum is 2644.93, time is 5.0, close is 60.4629

6306/100000
action_prob: [0.21194345 0.5761105  0.21194601], action is sell
action_loss: -2480.178466796875, Q_loss : -5.771790504455566
reward is -60.76, reward sum is 2584.17, time is 6.0, close is 59.8553

6307/100000
action_prob: [0.21194345 0.5761105  0.21194601], action is sell
action_loss: -2480.568603515625, Q_loss : -29.785600662231445
reward is -4.98, reward sum is 2579.19, time is 7.0, close is 59.8055

6308/100000
action_prob: [0.21194346 0.57611054 0.21194601], action is sell
action_loss: -2480.95849609375, Q_loss : -83.16607666015625
reward is 25.90, reward sum is 2605.09, time is 8.0, close is 60.0645

6309/100000
action_prob: [0.21194346 0.57611054 0.21194601], action is sell
action_loss: -2481.3486328125, Q_loss : 13.026460647583008
reward is 48.80, reward sum is 2653.89, time is 9.0, close is 60.5525

6310/100000
action_prob: [0.21194346 

action_loss: -2495.389892578125, Q_loss : -70.07958221435547
reward is -14.95, reward sum is 2600.10, time is 45.0, close is 60.0146

6346/100000
action_prob: [0.21194342 0.5761107  0.21194594], action is sell
action_loss: -2495.77978515625, Q_loss : -28.698593139648438
reward is -21.91, reward sum is 2578.19, time is 46.0, close is 59.7955

6347/100000
action_prob: [0.21194342 0.5761107  0.21194594], action is sell
action_loss: -2496.169921875, Q_loss : -19.961702346801758
reward is 8.97, reward sum is 2587.16, time is 47.0, close is 59.8852

6348/100000
action_prob: [0.21194342 0.5761107  0.21194594], action is sell
action_loss: -2496.559814453125, Q_loss : -49.89551544189453
reward is -2.99, reward sum is 2584.17, time is 48.0, close is 59.8553

6349/100000
action_prob: [0.21194342 0.5761107  0.21194594], action is sell
action_loss: -2496.949951171875, Q_loss : 58.750518798828125
reward is -1.00, reward sum is 2583.17, time is 0.0, close is 59.8453

6350/100000
action_prob: [0.21194

action_loss: -2510.990966796875, Q_loss : -42.889862060546875
reward is -15.94, reward sum is 2412.43, time is 36.0, close is 59.7955

6386/100000
action_prob: [0.21194336 0.5761107  0.21194583], action is buy
action_loss: -2511.381103515625, Q_loss : 95.45616912841797
reward is 7.97, reward sum is 2420.40, time is 37.0, close is 59.8752

6387/100000
action_prob: [0.21194336 0.5761107  0.21194583], action is hold
action_loss: -2511.771240234375, Q_loss : 6.792354583740234
reward is -11.95, reward sum is 2408.45, time is 38.0, close is 59.7557

6388/100000
action_prob: [0.21194336 0.5761107  0.21194583], action is hold
action_loss: -2512.1611328125, Q_loss : 15.728385925292969
reward is 0.99, reward sum is 2409.44, time is 39.0, close is 59.7656

6389/100000
action_prob: [0.21194336 0.5761107  0.21194583], action is sell
action_loss: -2512.55126953125, Q_loss : -23.1854248046875
reward is -26.89, reward sum is 2382.55, time is 40.0, close is 59.4967

6390/100000
action_prob: [0.21194337

action_prob: [0.21194333 0.5761109  0.21194574], action is hold
action_loss: -2526.59228515625, Q_loss : 80.32403564453125
reward is -26.89, reward sum is 2528.53, time is 27.0, close is 60.8016

6426/100000
action_prob: [0.21194334 0.57611096 0.21194574], action is sell
action_loss: -2526.982421875, Q_loss : 28.530221939086914
reward is 7.96, reward sum is 2536.49, time is 28.0, close is 60.8812

6427/100000
action_prob: [0.21194333 0.57611096 0.21194574], action is sell
action_loss: -2527.372802734375, Q_loss : 38.68316650390625
reward is -53.78, reward sum is 2482.71, time is 29.0, close is 60.3434

6428/100000
action_prob: [0.21194333 0.57611096 0.21194574], action is hold
action_loss: -2527.7626953125, Q_loss : -19.448558807373047
reward is 26.89, reward sum is 2509.60, time is 30.0, close is 60.6123

6429/100000
action_prob: [0.21194333 0.57611096 0.21194574], action is buy
action_loss: -2528.152587890625, Q_loss : -102.97152709960938
reward is -14.94, reward sum is 2494.66, time

action_loss: -2542.19384765625, Q_loss : 2.488095283508301
reward is -14.94, reward sum is 2827.87, time is 18.0, close is 62.4053

6466/100000
action_prob: [0.2119433  0.5761111  0.21194567], action is sell
action_loss: -2542.583740234375, Q_loss : -34.22928237915039
reward is 14.94, reward sum is 2842.81, time is 19.0, close is 62.5547

6467/100000
action_prob: [0.2119433  0.5761111  0.21194567], action is hold
action_loss: -2542.97412109375, Q_loss : -33.369686126708984
reward is -9.96, reward sum is 2832.85, time is 20.0, close is 62.4551

6468/100000
action_prob: [0.2119433  0.5761111  0.21194567], action is sell
action_loss: -2543.36376953125, Q_loss : -37.31365203857422
reward is -21.92, reward sum is 2810.93, time is 21.0, close is 62.2359

6469/100000
action_prob: [0.2119433  0.5761111  0.21194567], action is hold
action_loss: -2543.75390625, Q_loss : -58.959068298339844
reward is 7.97, reward sum is 2818.90, time is 22.0, close is 62.3156

6470/100000
action_prob: [0.2119433 

action_loss: -2557.794921875, Q_loss : -7.273845672607422
reward is -24.91, reward sum is 2813.43, time is 9.0, close is 62.2857

6506/100000
action_prob: [0.21194324 0.57611114 0.21194556], action is hold
action_loss: -2558.185302734375, Q_loss : 1.2581815719604492
reward is -12.95, reward sum is 2800.48, time is 10.0, close is 62.1562

6507/100000
action_prob: [0.21194324 0.57611114 0.21194556], action is sell
action_loss: -2558.5751953125, Q_loss : 5.224216461181641
reward is 4.99, reward sum is 2805.47, time is 11.0, close is 62.2061

6508/100000
action_prob: [0.21194324 0.57611114 0.21194556], action is sell
action_loss: -2558.96533203125, Q_loss : -99.05252075195312
reward is 4.98, reward sum is 2810.45, time is 12.0, close is 62.2559

6509/100000
action_prob: [0.21194325 0.5761112  0.21194556], action is sell
action_loss: -2559.355224609375, Q_loss : 28.25341033935547
reward is -13.95, reward sum is 2796.50, time is 13.0, close is 62.1164

6510/100000
action_prob: [0.21194325 0.

action_loss: -2573.396484375, Q_loss : 51.09605407714844
reward is 33.86, reward sum is 2800.48, time is 0.0, close is 62.1562

6546/100000
action_prob: [0.21194322 0.5761113  0.21194549], action is sell
action_loss: -2573.78662109375, Q_loss : 40.184730529785156
reward is -51.05, reward sum is 2749.43, time is 1.0, close is 61.6582

6547/100000
action_prob: [0.21194322 0.5761113  0.21194547], action is buy
action_loss: -2574.1767578125, Q_loss : -134.41172790527344
reward is -1.24, reward sum is 2748.19, time is 2.0, close is 61.1402

6548/100000
action_prob: [0.21194322 0.5761113  0.21194547], action is sell
action_loss: -2574.566650390625, Q_loss : 21.144304275512695
reward is 19.93, reward sum is 2768.12, time is 3.0, close is 61.3395

6549/100000
action_prob: [0.21194322 0.5761113  0.21194547], action is hold
action_loss: -2574.95654296875, Q_loss : -146.26950073242188
reward is 8.96, reward sum is 2777.08, time is 4.0, close is 61.4291

6550/100000
action_prob: [0.21194322 0.5761

action_loss: -2588.998046875, Q_loss : 6.225988388061523
reward is 12.95, reward sum is 2751.18, time is 40.0, close is 61.1701

6586/100000
action_prob: [0.2119432 0.5761114 0.2119454], action is sell
action_loss: -2589.3876953125, Q_loss : -27.763856887817383
reward is 18.93, reward sum is 2770.11, time is 41.0, close is 61.3594

6587/100000
action_prob: [0.2119432 0.5761114 0.2119454], action is buy
action_loss: -2589.77783203125, Q_loss : 32.96217727661133
reward is 24.90, reward sum is 2795.01, time is 42.0, close is 61.6084

6588/100000
action_prob: [0.2119432 0.5761114 0.2119454], action is hold
action_loss: -2590.168212890625, Q_loss : -42.84178924560547
reward is 4.98, reward sum is 2799.99, time is 43.0, close is 61.6582

6589/100000
action_prob: [0.2119432 0.5761114 0.2119454], action is hold
action_loss: -2590.5576171875, Q_loss : 14.063591003417969
reward is -1.99, reward sum is 2798.00, time is 44.0, close is 61.6383

6590/100000
action_prob: [0.2119432 0.5761114 0.211945

action_loss: -2604.599365234375, Q_loss : -8.075836181640625
reward is 14.94, reward sum is 2707.86, time is 31.0, close is 61.5586

6626/100000
action_prob: [0.21194315 0.57611156 0.21194531], action is sell
action_loss: -2604.989013671875, Q_loss : 1.9217185974121094
reward is 4.98, reward sum is 2712.84, time is 32.0, close is 61.6084

6627/100000
action_prob: [0.21194315 0.57611156 0.21194531], action is hold
action_loss: -2605.37939453125, Q_loss : -103.08958435058594
reward is -49.80, reward sum is 2663.04, time is 33.0, close is 61.1104

6628/100000
action_prob: [0.21194315 0.57611156 0.21194531], action is sell
action_loss: -2605.769287109375, Q_loss : -36.49856948852539
reward is 4.98, reward sum is 2668.02, time is 34.0, close is 61.1602

6629/100000
action_prob: [0.21194315 0.57611156 0.21194531], action is hold
action_loss: -2606.159423828125, Q_loss : -57.93170928955078
reward is 5.97, reward sum is 2673.99, time is 35.0, close is 61.2199

6630/100000
action_prob: [0.21194

action_loss: -2620.20068359375, Q_loss : 42.97890853881836
reward is 5.98, reward sum is 2941.44, time is 22.0, close is 62.3057

6666/100000
action_prob: [0.21194312 0.5761117  0.21194525], action is sell
action_loss: -2620.590576171875, Q_loss : -6.016956329345703
reward is 14.94, reward sum is 2956.38, time is 23.0, close is 62.4551

6667/100000
action_prob: [0.21194312 0.5761117  0.21194525], action is hold
action_loss: -2620.98095703125, Q_loss : -1.0834417343139648
reward is 3.98, reward sum is 2960.36, time is 24.0, close is 62.4949

6668/100000
action_prob: [0.21194312 0.5761117  0.21194525], action is buy
action_loss: -2621.370849609375, Q_loss : 220.80630493164062
reward is -23.90, reward sum is 2936.46, time is 25.0, close is 62.2559

6669/100000
action_prob: [0.21194312 0.5761117  0.21194525], action is sell
action_loss: -2621.760498046875, Q_loss : -8.2176513671875
reward is -9.97, reward sum is 2926.49, time is 26.0, close is 62.1562

6670/100000
action_prob: [0.21194312 

action_prob: [0.21194309 0.57611173 0.21194518], action is sell
action_loss: -2635.802001953125, Q_loss : 61.492897033691406
reward is 1.99, reward sum is 3035.55, time is 13.0, close is 62.5248

6706/100000
action_prob: [0.2119431  0.5761118  0.21194518], action is sell
action_loss: -2636.19189453125, Q_loss : -30.271997451782227
reward is 20.92, reward sum is 3056.47, time is 14.0, close is 62.733999999999995

6707/100000
action_prob: [0.2119431  0.5761118  0.21194518], action is buy
action_loss: -2636.58203125, Q_loss : -68.15567016601562
reward is -5.98, reward sum is 3050.49, time is 15.0, close is 62.6742

6708/100000
action_prob: [0.2119431  0.5761118  0.21194518], action is hold
action_loss: -2636.97216796875, Q_loss : 46.440372467041016
reward is -13.94, reward sum is 3036.55, time is 16.0, close is 62.5348

6709/100000
action_prob: [0.2119431  0.5761118  0.21194518], action is hold
action_loss: -2637.362060546875, Q_loss : -0.04959678649902344
reward is 6.97, reward sum is 30

action_loss: -2651.4033203125, Q_loss : -16.523977279663086
reward is 0.00, reward sum is 2990.47, time is 4.0, close is 61.6383

6746/100000
action_prob: [0.21194306 0.57611185 0.2119451 ], action is sell
action_loss: -2651.793212890625, Q_loss : -154.0079345703125
reward is 0.00, reward sum is 2990.47, time is 5.0, close is 61.7877

6747/100000
action_prob: [0.21194305 0.57611185 0.2119451 ], action is buy
action_loss: -2652.18359375, Q_loss : 43.198089599609375
reward is -1.24, reward sum is 2989.23, time is 6.0, close is 61.7578

6748/100000
action_prob: [0.21194305 0.57611185 0.2119451 ], action is hold
action_loss: -2652.573486328125, Q_loss : -27.45197868347168
reward is -0.99, reward sum is 2988.24, time is 7.0, close is 61.7479

6749/100000
action_prob: [0.21194303 0.57611185 0.21194507], action is sell
action_loss: -2652.96337890625, Q_loss : 20.623640060424805
reward is 0.00, reward sum is 2988.24, time is 8.0, close is 61.7479

6750/100000
action_prob: [0.21194303 0.5761118

action_loss: -2667.004638671875, Q_loss : 17.282880783081055
reward is -97.62, reward sum is 2701.36, time is 44.0, close is 58.8791

6786/100000
action_prob: [0.211943   0.576112   0.21194501], action is sell
action_loss: -2667.394775390625, Q_loss : -17.693889617919922
reward is 94.63, reward sum is 2795.99, time is 45.0, close is 59.8254

6787/100000
action_prob: [0.211943   0.576112   0.21194501], action is buy
action_loss: -2667.78466796875, Q_loss : 21.45117950439453
reward is -16.93, reward sum is 2779.06, time is 46.0, close is 59.6561

6788/100000
action_prob: [0.211943   0.576112   0.21194501], action is sell
action_loss: -2668.1748046875, Q_loss : -66.53182220458984
reward is -70.73, reward sum is 2708.33, time is 47.0, close is 58.9488

6789/100000
action_prob: [0.211943   0.576112   0.21194501], action is sell
action_loss: -2668.564697265625, Q_loss : -50.61548614501953
reward is -4.98, reward sum is 2703.35, time is 48.0, close is 58.898999999999994

6790/100000
action_pr

action_loss: -2682.606201171875, Q_loss : -16.87617301940918
reward is 19.93, reward sum is 2758.70, time is 35.0, close is 59.0684

6826/100000
action_prob: [0.21194299 0.5761121  0.21194495], action is buy
action_loss: -2682.995849609375, Q_loss : -3.8298349380493164
reward is 5.97, reward sum is 2764.67, time is 36.0, close is 59.1281

6827/100000
action_prob: [0.21194299 0.5761121  0.21194495], action is buy
action_loss: -2683.38623046875, Q_loss : 16.056198120117188
reward is -34.86, reward sum is 2729.81, time is 37.0, close is 58.7795

6828/100000
action_prob: [0.21194299 0.5761121  0.21194495], action is sell
action_loss: -2683.7763671875, Q_loss : -86.63776397705078
reward is -10.96, reward sum is 2718.85, time is 38.0, close is 58.6699

6829/100000
action_prob: [0.21194299 0.5761121  0.21194494], action is sell
action_loss: -2684.166015625, Q_loss : 7.098269462585449
reward is -39.84, reward sum is 2679.01, time is 39.0, close is 58.2715

6830/100000
action_prob: [0.21194299 

action_loss: -2698.20751953125, Q_loss : -78.64213562011719
reward is 34.86, reward sum is 2960.48, time is 26.0, close is 60.5625

6866/100000
action_prob: [0.21194296 0.57611215 0.21194488], action is sell
action_loss: -2698.597412109375, Q_loss : -33.986083984375
reward is 8.96, reward sum is 2969.44, time is 27.0, close is 60.6521

6867/100000
action_prob: [0.21194296 0.57611215 0.21194488], action is sell
action_loss: -2698.9873046875, Q_loss : -69.59709930419922
reward is -2.98, reward sum is 2966.46, time is 28.0, close is 60.6223

6868/100000
action_prob: [0.21194296 0.57611215 0.21194486], action is buy
action_loss: -2699.37744140625, Q_loss : -105.03018951416016
reward is -16.94, reward sum is 2949.52, time is 29.0, close is 60.4529

6869/100000
action_prob: [0.21194296 0.57611215 0.21194486], action is sell
action_loss: -2699.76708984375, Q_loss : -27.04768180847168
reward is 28.89, reward sum is 2978.41, time is 30.0, close is 60.7418

6870/100000
action_prob: [0.21194296 0

action_loss: -2713.808837890625, Q_loss : 15.671939849853516
reward is 13.94, reward sum is 3086.56, time is 17.0, close is 61.8873

6906/100000
action_prob: [0.21194293 0.5761123  0.2119448 ], action is sell
action_loss: -2714.19873046875, Q_loss : -105.04822540283203
reward is -12.95, reward sum is 3073.61, time is 18.0, close is 61.7578

6907/100000
action_prob: [0.21194294 0.5761123  0.2119448 ], action is sell
action_loss: -2714.588623046875, Q_loss : -24.155994415283203
reward is -16.93, reward sum is 3056.68, time is 19.0, close is 61.5885

6908/100000
action_prob: [0.21194294 0.5761123  0.2119448 ], action is sell
action_loss: -2714.97900390625, Q_loss : 42.560340881347656
reward is 25.90, reward sum is 3082.58, time is 20.0, close is 61.8475

6909/100000
action_prob: [0.21194294 0.5761123  0.2119448 ], action is sell
action_loss: -2715.36865234375, Q_loss : -14.27641487121582
reward is -8.97, reward sum is 3073.61, time is 21.0, close is 61.7578

6910/100000
action_prob: [0.21

action_loss: -2729.41015625, Q_loss : -28.304006576538086
reward is -10.96, reward sum is 3114.97, time is 8.0, close is 61.449

6946/100000
action_prob: [0.21194291 0.5761124  0.21194474], action is hold
action_loss: -2729.7998046875, Q_loss : -13.388256072998047
reward is 10.96, reward sum is 3125.93, time is 9.0, close is 61.5586

6947/100000
action_prob: [0.21194291 0.5761124  0.21194474], action is sell
action_loss: -2730.190185546875, Q_loss : 26.508058547973633
reward is -17.93, reward sum is 3108.00, time is 10.0, close is 61.3793

6948/100000
action_prob: [0.21194291 0.5761124  0.21194474], action is hold
action_loss: -2730.580078125, Q_loss : -68.19590759277344
reward is 6.97, reward sum is 3114.97, time is 11.0, close is 61.449

6949/100000
action_prob: [0.21194291 0.5761124  0.21194474], action is sell
action_loss: -2730.97021484375, Q_loss : -150.83804321289062
reward is -16.93, reward sum is 3098.04, time is 12.0, close is 61.2797

6950/100000
action_prob: [0.21194291 0.5

action_loss: -2745.01123046875, Q_loss : 23.27004623413086
reward is -13.94, reward sum is 3027.32, time is 48.0, close is 60.5725

6986/100000
action_prob: [0.21194287 0.5761124  0.21194467], action is sell
action_loss: -2745.4013671875, Q_loss : -64.263916015625
reward is -20.92, reward sum is 3006.40, time is 0.0, close is 60.3633

6987/100000
action_prob: [0.21194287 0.5761124  0.21194467], action is sell
action_loss: -2745.791259765625, Q_loss : -93.1878890991211
reward is -41.08, reward sum is 2965.32, time is 1.0, close is 59.9648

6988/100000
action_prob: [0.21194287 0.57611245 0.21194468], action is sell
action_loss: -2746.181640625, Q_loss : 43.99507141113281
reward is 0.00, reward sum is 2965.32, time is 2.0, close is 60.0645

6989/100000
action_prob: [0.21194288 0.5761125  0.21194468], action is sell
action_loss: -2746.5712890625, Q_loss : -4.535820960998535
reward is 0.00, reward sum is 2965.32, time is 3.0, close is 59.5166

6990/100000
action_prob: [0.21194288 0.5761125 

action_loss: -2760.61279296875, Q_loss : 24.101402282714844
reward is -9.97, reward sum is 2854.53, time is 39.0, close is 58.5105

7026/100000
action_prob: [0.21194285 0.57611257 0.21194462], action is sell
action_loss: -2761.0029296875, Q_loss : 6.260135650634766
reward is 10.96, reward sum is 2865.49, time is 40.0, close is 58.6201

7027/100000
action_prob: [0.21194285 0.57611257 0.21194462], action is buy
action_loss: -2761.392822265625, Q_loss : -82.24089813232422
reward is 4.98, reward sum is 2870.47, time is 41.0, close is 58.6699

7028/100000
action_prob: [0.21194285 0.57611257 0.21194462], action is sell
action_loss: -2761.78271484375, Q_loss : 22.172203063964844
reward is 3.99, reward sum is 2874.46, time is 42.0, close is 58.7098

7029/100000
action_prob: [0.21194285 0.57611257 0.21194462], action is sell
action_loss: -2762.1728515625, Q_loss : -79.67608642578125
reward is -26.90, reward sum is 2847.56, time is 43.0, close is 58.4408

7030/100000
action_prob: [0.21194285 0.5

action_loss: -2776.21435546875, Q_loss : 18.057849884033203
reward is 16.94, reward sum is 2803.38, time is 30.0, close is 56.9268

7066/100000
action_prob: [0.21194282 0.5761126  0.21194455], action is hold
action_loss: -2776.60400390625, Q_loss : 64.80389404296875
reward is -44.83, reward sum is 2758.55, time is 31.0, close is 56.4785

7067/100000
action_prob: [0.21194282 0.5761126  0.21194454], action is sell
action_loss: -2776.994140625, Q_loss : -27.779773712158203
reward is -9.96, reward sum is 2748.59, time is 32.0, close is 56.3789

7068/100000
action_prob: [0.21194282 0.5761126  0.21194454], action is buy
action_loss: -2777.38427734375, Q_loss : -20.823741912841797
reward is -9.96, reward sum is 2738.63, time is 33.0, close is 56.2793

7069/100000
action_prob: [0.21194282 0.5761126  0.21194454], action is sell
action_loss: -2777.77392578125, Q_loss : 17.935630798339844
reward is 24.90, reward sum is 2763.53, time is 34.0, close is 56.5283

7070/100000
action_prob: [0.21194282 

action_loss: -2791.81591796875, Q_loss : 25.861907958984375
reward is 0.00, reward sum is 2760.28, time is 21.0, close is 56.5184

7106/100000
action_prob: [0.2119428  0.57611275 0.21194449], action is sell
action_loss: -2792.20556640625, Q_loss : -28.782054901123047
reward is 0.00, reward sum is 2760.28, time is 22.0, close is 56.5184

7107/100000
action_prob: [0.21194279 0.57611275 0.21194449], action is sell
action_loss: -2792.59521484375, Q_loss : -68.7960205078125
reward is 0.00, reward sum is 2760.28, time is 23.0, close is 56.5184

7108/100000
action_prob: [0.21194279 0.57611275 0.21194449], action is hold
action_loss: -2792.985595703125, Q_loss : 9.850322723388672
reward is 0.00, reward sum is 2760.28, time is 24.0, close is 56.5184

7109/100000
action_prob: [0.21194279 0.57611275 0.21194449], action is buy
action_loss: -2793.37548828125, Q_loss : -62.84365463256836
reward is 0.00, reward sum is 2760.28, time is 25.0, close is 56.5184

7110/100000
action_prob: [0.21194279 0.576

In [None]:
agent._data_set._buffer