source:
https://github.com/llSourcell/Q-Learning-for-Trading

# 環境設定
* Python 2.7. 
* To install all the libraries, run pip install -r requirements.txt

# 如何執行
* 訓練 Deep Q agent  
  python run.py --mode train
* 測試模型效率  
  python run.py --mode test --weights <trained_model>

# run.py

In [5]:
import pickle
import time
import numpy as np
import argparse
import re

import os
import pandas as pd
from sklearn.preprocessing import StandardScaler

from envs import TradingEnv
from agent import DQNAgent
#from utils import get_data, get_scaler, maybe_make_dir

* 訓練模型
* 傳入 "--mode train" 參數, 其他用預設值
* episode = 2000; 訓練回合
* batch_size = 32; 記憶回放深度
* initial_invest = 20000; 初始資金

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument('-e', '--episode', type=int, default=2000,
                  help='number of episode to run')
parser.add_argument('-b', '--batch_size', type=int, default=32,
                  help='batch size for experience replay')
parser.add_argument('-i', '--initial_invest', type=int, default=20000,
                  help='initial investment amount')
parser.add_argument('-m', '--mode', type=str, required=True,
                  help='either "train" or "test"')
parser.add_argument('-w', '--weights', type=str, help='a trained model weights')
args = parser.parse_args(["--mode", "train"])

print(args)

Namespace(batch_size=32, episode=2000, initial_invest=20000, mode='train', weights=None)


* 載入歷史股價
* 微軟(MSFT), IBM, 高通(QCOM)
* 只採用收盤股價 (close)

In [6]:
# show 微軟股價前幾筆看看
msft_df = pd.read_csv('data/daily_MSFT.csv')
msft_df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2017-12-27,85.65,85.98,85.215,85.52,7325723
1,2017-12-26,85.31,85.5346,85.03,85.4,9883300
2,2017-12-22,85.4,85.63,84.92,85.51,14033977
3,2017-12-21,86.05,86.1,85.4,85.5,16638402
4,2017-12-20,86.2,86.3,84.71,85.52,23425009


In [7]:
def get_data(col='close'):
  """ Returns a 3 x n_step array """
  msft = pd.read_csv('data/daily_MSFT.csv', usecols=[col])
  ibm = pd.read_csv('data/daily_IBM.csv', usecols=[col])
  qcom = pd.read_csv('data/daily_QCOM.csv', usecols=[col])
  # recent price are at top; reverse it
  return np.array([msft[col].values[::-1],
                   ibm[col].values[::-1],
                   qcom[col].values[::-1]])

* 讀取 csv file, 放入 3xn array 中 => [[MSFT], [IBM], [QCOM]]
* 將歷史股價轉置, 也就是說把最新的股價放到 array 最前面

In [9]:
stock_data = get_data()
print("{}".format(stock_data))
print("shape:{}".format(stock_data.shape))

[[116.56   112.62   113.81   ...  85.51    85.4     85.52  ]
 [116.     112.06   116.     ... 152.5    152.83   153.0385]
 [179.3    162.1    158.     ...  64.73    64.3     64.52  ]]
shape:(3, 4526)


In [10]:
data = np.around(stock_data)  # 四捨五入取整數
print("{}".format(data))

[[117. 113. 114. ...  86.  85.  86.]
 [116. 112. 116. ... 152. 153. 153.]
 [179. 162. 158. ...  65.  64.  65.]]


* 將 data 分成訓練和測試兩類
* train_data 3526 筆
* test_data  1000 筆

In [12]:
train_data = data[:, :3526]
test_data = data[:, 3526:]
print("train shape:{}".format(train_data.shape))
print("test shape:{}".format(test_data.shape))

train shape:(3, 3526)
test shape:(3, 1000)


In [None]:
if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('-e', '--episode', type=int, default=2000,
                      help='number of episode to run')
  parser.add_argument('-b', '--batch_size', type=int, default=32,
                      help='batch size for experience replay')
  parser.add_argument('-i', '--initial_invest', type=int, default=20000,
                      help='initial investment amount')
  parser.add_argument('-m', '--mode', type=str, required=True,
                      help='either "train" or "test"')
  parser.add_argument('-w', '--weights', type=str, help='a trained model weights')
  args = parser.parse_args()

  maybe_make_dir('weights')
  maybe_make_dir('portfolio_val')

  timestamp = time.strftime('%Y%m%d%H%M')

  data = np.around(get_data())
  print("{}".format(data))
  exit()
  train_data = data[:, :3526]
  test_data = data[:, 3526:]

  env = TradingEnv(train_data, args.initial_invest)
  state_size = env.observation_space.shape
  action_size = env.action_space.n
  agent = DQNAgent(state_size, action_size)
  scaler = get_scaler(env)

  portfolio_value = []

  if args.mode == 'test':
    # remake the env with test data
    env = TradingEnv(test_data, args.initial_invest)
    # load trained weights
    agent.load(args.weights)
    # when test, the timestamp is same as time when weights was trained
    timestamp = re.findall(r'\d{12}', args.weights)[0]

  for e in range(args.episode):
    state = env.reset()
    state = scaler.transform([state])
    for time in range(env.n_step):
      action = agent.act(state)
      next_state, reward, done, info = env.step(action)
      next_state = scaler.transform([next_state])
      if args.mode == 'train':
        agent.remember(state, action, reward, next_state, done)
      state = next_state
      if done:
        print("episode: {}/{}, episode end value: {}".format(
          e + 1, args.episode, info['cur_val']))
        portfolio_value.append(info['cur_val']) # append episode end portfolio value
        break
      if args.mode == 'train' and len(agent.memory) > args.batch_size:
        agent.replay(args.batch_size)
    if args.mode == 'train' and (e + 1) % 10 == 0:  # checkpoint weights
      agent.save('weights/{}-dqn.h5'.format(timestamp))

  # save portfolio value history to disk
  with open('portfolio_val/{}-{}.p'.format(timestamp, args.mode), 'wb') as fp:
    pickle.dump(portfolio_value, fp)