<a href="https://colab.research.google.com/github/AI4Finance-Foundation/FinRL/blob/master/FinRL_PaperTrading_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Disclaimer: Nothing herein is financial advice, and NOT a recommendation to trade real money. Many platforms exist for simulated trading (paper trading) which can be used for building and developing the methods discussed. Please use common sense and always first consult a professional before trading or investing.

# Part 1: Install FinRL

In [4]:
pip install ray[tune]

Collecting tabulate; extra == "tune"
  Downloading tabulate-0.8.9-py3-none-any.whl (25 kB)
Installing collected packages: tabulate
Successfully installed tabulate-0.8.9
Note: you may need to restart the kernel to use updated packages.


In [5]:
## install finrl library
!pip install git+https://github.com/AI4Finance-LLC/FinRL-Library.git

Collecting git+https://github.com/AI4Finance-LLC/FinRL-Library.git
  Cloning https://github.com/AI4Finance-LLC/FinRL-Library.git to /tmp/pip-req-build-o1ktavxo
  Running command git clone -q https://github.com/AI4Finance-LLC/FinRL-Library.git /tmp/pip-req-build-o1ktavxo




Building wheels for collected packages: finrl
  Building wheel for finrl (setup.py) ... [?25ldone
[?25h  Created wheel for finrl: filename=finrl-0.3.3-py3-none-any.whl size=3883612 sha256=08b5c15f2415cdb36029701ae621fd20c03a049b1c59daa2b62c4b635fb6a81d
  Stored in directory: /tmp/pip-ephem-wheel-cache-ryxioivf/wheels/e8/19/74/11261997d6bdca44ba73e8eeedb94a3e3d340259516a0887eb
Successfully built finrl


## Import related modules

In [6]:
from finrl.train import train
from finrl.test import test
from finrl.apps.config import DOW_30_TICKER
from finrl.apps.config import TECHNICAL_INDICATORS_LIST
from finrl.neo_finrl.env_stock_trading.env_stocktrading_np import StockTradingEnv
from finrl.neo_finrl.env_stock_trading.env_stock_papertrading import AlpacaPaperTrading
from finrl.neo_finrl.data_processor import DataProcessor
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline

import numpy as np
import pandas as pd

TypeError: int() argument must be a string, a bytes-like object or a number, not 'NaTType'

## Import Dow Jones 30 Symbols

In [3]:
ticker_list = DOW_30_TICKER
action_dim = len(DOW_30_TICKER)

In [4]:
print(ticker_list)

['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']


In [5]:
print(TECHNICAL_INDICATORS_LIST)

['macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma']


## Calculate the DRL state dimension manually for paper trading

In [6]:
# amount + (turbulence, turbulence_bool) + (price, shares, cd (holding time)) * stock_dim + tech_dim
state_dim = 1 + 2 + 3 * action_dim + len(TECHNICAL_INDICATORS_LIST) * action_dim

In [7]:
state_dim

333

## Get the API Keys Ready

In [8]:
API_KEY = "YOUR_ALPACA_API_KEY"
API_SECRET = "YOUR_ALPACA_API_SECRET"
APCA_API_BASE_URL = 'https://paper-api.alpaca.markets'
data_url = 'wss://data.alpaca.markets'
env = StockTradingEnv

## Show the data

### Step 1. Pick a data source

In [9]:
DP = DataProcessor(data_source = 'alpaca',
                  API_KEY = API_KEY, 
                  API_SECRET = API_SECRET, 
                  APCA_API_BASE_URL = APCA_API_BASE_URL
                  )

Alpaca successfully connected


### Step 2. Get ticker list, Set start date and end date, specify the data frequency

In [10]:
data = DP.download_data(start_date = '2021-10-01', 
                        end_date = '2021-10-05',
                        ticker_list = ticker_list, 
                        time_interval= '1Min')

Data before 2021-10-01T15:59:00-04:00 is successfully fetched
Data before 2021-10-02T15:59:00-04:00 is successfully fetched
Data before 2021-10-03T15:59:00-04:00 is successfully fetched
Data before 2021-10-04T15:59:00-04:00 is successfully fetched
Data before 2021-10-05T15:59:00-04:00 is successfully fetched


### Step 3. Data Cleaning & Feature Engineering

In [11]:
data = DP.clean_data(data)
data = DP.add_technical_indicator(data, TECHNICAL_INDICATORS_LIST)
data = DP.add_vix(data)

Data clean finished!
Succesfully add technical indicators
Data before 2021-10-01T15:59:00-04:00 is successfully fetched
Data before 2021-10-02T15:59:00-04:00 is successfully fetched
Data before 2021-10-03T15:59:00-04:00 is successfully fetched
Data before 2021-10-04T15:59:00-04:00 is successfully fetched
Data before 2021-10-05T15:59:00-04:00 is successfully fetched
Data clean finished!


In [12]:
data.tail(20)

Unnamed: 0,time,open,high,low,close,volume,tic,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,VIXY
35080,2021-10-05 15:59:00-04:00,386.74,386.74,385.86,385.86,2904.0,GS,-0.207385,387.589166,386.345834,33.723238,-357.485069,51.604691,387.049333,387.364167,21.68
35081,2021-10-05 15:59:00-04:00,329.6,329.6,329.31,329.43,1787.0,HD,-0.448482,331.711177,329.320823,26.228665,-169.376805,76.054777,330.815333,331.21325,21.68
35082,2021-10-05 15:59:00-04:00,215.72,215.72,215.59,215.59,1493.0,HON,-0.240973,217.088555,215.553945,18.327939,-191.493943,68.521373,216.415833,216.538,21.68
35083,2021-10-05 15:59:00-04:00,143.14,143.18,143.1,143.16,4180.0,IBM,-0.145148,143.866517,143.059983,30.924818,-160.386451,48.082544,143.534,143.722417,21.68
35084,2021-10-05 15:59:00-04:00,53.955,53.975,53.935,53.945,6274.0,INTC,-0.038827,54.1669,53.9251,29.721071,-169.470945,57.246117,54.06,54.096,21.68
35085,2021-10-05 15:59:00-04:00,159.72,159.73,159.54,159.58,4075.0,JNJ,-0.073306,160.073831,159.657669,34.877315,-306.971514,34.150919,159.888667,160.02625,21.68
35086,2021-10-05 15:59:00-04:00,168.87,168.88,168.65,168.65,7616.0,JPM,-0.140191,169.514417,168.669583,33.354857,-173.244738,42.521277,169.178,169.313083,21.68
35087,2021-10-05 15:59:00-04:00,53.085,53.09,53.055,53.08,15679.0,KO,-0.012753,53.198559,53.068441,42.715325,-190.35744,29.750107,53.138333,53.13775,21.68
35088,2021-10-05 15:59:00-04:00,245.33,245.33,245.18,245.18,2704.0,MCD,-0.269918,246.739285,245.066715,21.785286,-160.170797,79.728448,246.058333,246.202083,21.68
35089,2021-10-05 15:59:00-04:00,177.96,177.96,177.79,177.79,1754.0,MMM,-0.076775,178.495952,177.863548,34.300943,-232.273583,44.417489,178.2135,178.172583,21.68


### Step 4. Transform to numpy array

In [13]:
price_array, tech_array, turbulence_array = DP.df_to_array(data, if_vix='True')

Successfully transformed into array


In [14]:
price_array

array([[141.89 , 213.36 , 174.74 , ...,  54.09 ,  47.075, 139.575],
       [141.42 , 212.62 , 168.69 , ...,  54.17 ,  47.16 , 139.53 ],
       [141.08 , 212.5  , 169.46 , ...,  54.11 ,  47.17 , 139.445],
       ...,
       [141.26 , 211.87 , 175.06 , ...,  54.595,  47.13 , 136.78 ],
       [141.12 , 211.85 , 174.98 , ...,  54.535,  47.11 , 136.68 ],
       [140.98 , 211.77 , 174.74 , ...,  54.5  ,  47.09 , 136.62 ]])

# Part 2: Train the agent

## Train

In [15]:
ERL_PARAMS = {"learning_rate": 3e-6,"batch_size": 2048,"gamma":  0.985,
        "seed":312,"net_dimension":512, "target_step":5000, "eval_gap":30}

In [16]:
train(start_date = '2021-10-11', 
      end_date = '2021-10-15',
      ticker_list = ticker_list, 
      data_source = 'alpaca',
      time_interval= '1Min', 
      technical_indicator_list= TECHNICAL_INDICATORS_LIST,
      drl_lib='elegantrl', 
      env=env,
      model_name='ppo', 
      API_KEY = API_KEY, 
      API_SECRET = API_SECRET, 
      APCA_API_BASE_URL = APCA_API_BASE_URL,
      erl_params=ERL_PARAMS,
      cwd='./papertrading_erl', #current_working_dir
      break_step=1e5)

Alpaca successfully connected
Data before 2021-10-11T15:59:00-04:00 is successfully fetched
Data before 2021-10-12T15:59:00-04:00 is successfully fetched
Data before 2021-10-13T15:59:00-04:00 is successfully fetched
Data before 2021-10-14T15:59:00-04:00 is successfully fetched
Data before 2021-10-15T15:59:00-04:00 is successfully fetched
Data clean finished!
Succesfully add technical indicators
Data before 2021-10-11T15:59:00-04:00 is successfully fetched
Data before 2021-10-12T15:59:00-04:00 is successfully fetched
Data before 2021-10-13T15:59:00-04:00 is successfully fetched
Data before 2021-10-14T15:59:00-04:00 is successfully fetched
Data before 2021-10-15T15:59:00-04:00 is successfully fetched
Data clean finished!
Successfully transformed into array
| Remove cwd: ./papertrading_erl
################################################################################
ID     Step    maxR |    avgR   stdR   avgS  stdS |    expR   objC   etc.
0  3.90e+03    1.01 |
0  3.90e+03    1.01 |    

## Test

In [17]:
account_value_erl=test(start_date = '2021-10-18', 
                      end_date = '2021-10-19',
                      ticker_list = ticker_list, 
                      data_source = 'alpaca',
                      time_interval= '1Min', 
                      technical_indicator_list= TECHNICAL_INDICATORS_LIST,
                      drl_lib='elegantrl', 
                      env=env, 
                      model_name='ppo', 
                      API_KEY = API_KEY, 
                      API_SECRET = API_SECRET, 
                      APCA_API_BASE_URL = APCA_API_BASE_URL,
                      cwd='./papertrading_erl',
                      net_dimension = 512)

Alpaca successfully connected
Data before 2021-10-18T15:59:00-04:00 is successfully fetched
Data before 2021-10-19T15:59:00-04:00 is successfully fetched
Data clean finished!
Succesfully add technical indicators
Data before 2021-10-18T15:59:00-04:00 is successfully fetched
Data before 2021-10-19T15:59:00-04:00 is successfully fetched
Data clean finished!
Successfully transformed into array
price_array:  780
Test Finished!
episode_return 1.0075374272908477


## Use full data to train 

After tuning well, retrain on the training and testing sets

In [None]:
train(start_date = '2021-10-11', 
      end_date = '2021-10-19',
      ticker_list = ticker_list, 
      data_source = 'alpaca',
      time_interval= '1Min', 
      technical_indicator_list= TECHNICAL_INDICATORS_LIST,
      drl_lib='elegantrl', 
      env=env, 
      model_name='ppo', 
      API_KEY = API_KEY, 
      API_SECRET = API_SECRET, 
      APCA_API_BASE_URL = APCA_API_BASE_URL ,
      cwd='./papertrading_erl_retrain',
      break_step=5e4)

Alpaca successfully connected
Data before 2021-10-11T15:59:00-04:00 is successfully fetched
Data before 2021-10-12T15:59:00-04:00 is successfully fetched
Data before 2021-10-13T15:59:00-04:00 is successfully fetched
Data before 2021-10-14T15:59:00-04:00 is successfully fetched
Data before 2021-10-15T15:59:00-04:00 is successfully fetched
Data before 2021-10-16T15:59:00-04:00 is successfully fetched
Data before 2021-10-17T15:59:00-04:00 is successfully fetched


sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/1Min 3 more time(s)...


Data before 2021-10-18T15:59:00-04:00 is successfully fetched


sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/1Min 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/1Min 3 more time(s)...


Data before 2021-10-19T15:59:00-04:00 is successfully fetched
Data clean finished!
Succesfully add technical indicators
Data before 2021-10-11T15:59:00-04:00 is successfully fetched
Data before 2021-10-12T15:59:00-04:00 is successfully fetched
Data before 2021-10-13T15:59:00-04:00 is successfully fetched
Data before 2021-10-14T15:59:00-04:00 is successfully fetched
Data before 2021-10-15T15:59:00-04:00 is successfully fetched
Data before 2021-10-16T15:59:00-04:00 is successfully fetched
Data before 2021-10-17T15:59:00-04:00 is successfully fetched
Data before 2021-10-18T15:59:00-04:00 is successfully fetched
Data before 2021-10-19T15:59:00-04:00 is successfully fetched
Data clean finished!
Successfully transformed into array
| Remove cwd: ./papertrading_erl_retrain
################################################################################
ID     Step    maxR |    avgR   stdR   avgS  stdS |    expR   objC   etc.
0  2.73e+03    1.01 |
0  2.73e+03    1.01 |    1.01    0.0   2728    

# Part 3: Deploy the agent

## Setup Alpaca Paper trading environment

In [None]:
import datetime
import threading
from finrl.neo_finrl.data_processors.processor_alpaca import AlpacaProcessor
from elegantrl.run import *
import alpaca_trade_api as tradeapi
import time
import pandas as pd
import numpy as np
import torch
import sys
import os
import gym

class AlpacaPaperTrading():

    def __init__(self,ticker_list, time_interval, drl_lib, agent, cwd, net_dim, 
                 state_dim, action_dim, API_KEY, API_SECRET, 
                 APCA_API_BASE_URL, tech_indicator_list, turbulence_thresh=30, 
                 max_stock=1e2, latency = None):
        #load agent
        self.drl_lib = drl_lib
        if agent =='ppo':
            if drl_lib == 'elegantrl':              
              from elegantrl.agent import AgentPPO
              #load agent
              try:
                  agent = AgentPPO()
                  agent.init(net_dim, state_dim, action_dim)
                  agent.save_or_load_agent(cwd=cwd, if_save=False)
                  self.act = agent.act
                  self.device = agent.device
              except:
                  raise ValueError('Fail to load agent!')
            
            elif drl_lib == 'rllib':
                from ray.rllib.agents import ppo
                from ray.rllib.agents.ppo.ppo import PPOTrainer
                
                config = ppo.DEFAULT_CONFIG.copy()
                config['env'] = StockEnvEmpty
                config["log_level"] = "WARN"
                config['env_config'] = {'state_dim':state_dim,
                            'action_dim':action_dim,}
                trainer = PPOTrainer(env=StockEnvEmpty, config=config)
                trainer.restore(cwd)
                try:
                    trainer.restore(cwd)
                    self.agent = trainer
                    print("Restoring from checkpoint path", cwd)
                except:
                    raise ValueError('Fail to load agent!')
                    
            elif drl_lib == 'stable_baselines3':
                from stable_baselines3 import PPO
                
                try:
                    #load agent
                    self.model = PPO.load(cwd)
                    print("Successfully load model", cwd)
                except:
                    raise ValueError('Fail to load agent!')
                    
            else:
                raise ValueError('The DRL library input is NOT supported yet. Please check your input.')
               
        else:
            raise ValueError('Agent input is NOT supported yet.')
            
            
            
        #connect to Alpaca trading API
        try:
            self.alpaca = tradeapi.REST(API_KEY,API_SECRET,APCA_API_BASE_URL, 'v2')
        except:
            raise ValueError('Fail to connect Alpaca. Please check account info and internet connection.')
        
        #read trading time interval
        if time_interval == '1s':
            self.time_interval = 1
        elif time_interval == '5s':
            self.time_interval = 5
        elif time_interval == '1Min':
            self.time_interval = 60
        elif time_interval == '5Min':
            self.time_interval = 60 * 5
        elif time_interval == '15Min':
            self.time_interval = 60 * 15
        else:
            raise ValueError('Time interval input is NOT supported yet.')
        
        #read trading settings
        self.tech_indicator_list = tech_indicator_list
        self.turbulence_thresh = turbulence_thresh
        self.max_stock = max_stock 
        
        #initialize account
        self.stocks = np.asarray([0] * len(ticker_list)) #stocks holding
        self.stocks_cd = np.zeros_like(self.stocks) 
        self.cash = None #cash record 
        self.stocks_df = pd.DataFrame(self.stocks, columns=['stocks'], index = ticker_list)
        self.asset_list = []
        self.price = np.asarray([0] * len(ticker_list))
        self.stockUniverse = ticker_list
        self.turbulence_bool = 0
        self.equities = []
        
    def test_latency(self, test_times = 10): 
        total_time = 0
        for i in range(0, test_times):
            time0 = time.time()
            self.get_state()
            time1 = time.time()
            temp_time = time1 - time0
            total_time += temp_time
        latency = total_time/test_times
        print('latency for data processing: ', latency)
        return latency
        
    def run(self):
        orders = self.alpaca.list_orders(status="open")
        for order in orders:
          self.alpaca.cancel_order(order.id)
    
        # Wait for market to open.
        print("Waiting for market to open...")
        tAMO = threading.Thread(target=self.awaitMarketOpen)
        tAMO.start()
        tAMO.join()
        print("Market opened.")
        while True:

          # Figure out when the market will close so we can prepare to sell beforehand.
          clock = self.alpaca.get_clock()
          closingTime = clock.next_close.replace(tzinfo=datetime.timezone.utc).timestamp()
          currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()
          self.timeToClose = closingTime - currTime
    
          if(self.timeToClose < (60)):
            # Close all positions when 1 minutes til market close.
            print("Market closing soon. Stop trading.")
            break
            
            '''# Close all positions when 1 minutes til market close.
            print("Market closing soon.  Closing positions.")
    
            positions = self.alpaca.list_positions()
            for position in positions:
              if(position.side == 'long'):
                orderSide = 'sell'
              else:
                orderSide = 'buy'
              qty = abs(int(float(position.qty)))
              respSO = []
              tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))
              tSubmitOrder.start()
              tSubmitOrder.join()
    
            # Run script again after market close for next trading day.
            print("Sleeping until market close (15 minutes).")
            time.sleep(60 * 15)'''
            
          else:
            trade = threading.Thread(target=self.trade)
            trade.start()
            trade.join()
            last_equity = float(self.alpaca.get_account().last_equity)
            cur_time = time.time()
            self.equities.append([cur_time,last_equity])
            time.sleep(self.time_interval)
            
    def awaitMarketOpen(self):
        isOpen = self.alpaca.get_clock().is_open
        while(not isOpen):
          clock = self.alpaca.get_clock()
          openingTime = clock.next_open.replace(tzinfo=datetime.timezone.utc).timestamp()
          currTime = clock.timestamp.replace(tzinfo=datetime.timezone.utc).timestamp()
          timeToOpen = int((openingTime - currTime) / 60)
          print(str(timeToOpen) + " minutes til market open.")
          time.sleep(60)
          isOpen = self.alpaca.get_clock().is_open
    
    def trade(self):
        state = self.get_state()
        
        if self.drl_lib == 'elegantrl':
            with torch.no_grad():
                s_tensor = torch.as_tensor((state,), device=self.device)
                a_tensor = self.act(s_tensor)  
                action = a_tensor.detach().cpu().numpy()[0]  
                
            action = (action * self.max_stock).astype(int)
            
        elif self.drl_lib == 'rllib':
            action = self.agent.compute_single_action(state)
        
        elif self.drl_lib == 'stable_baselines3':
            action = self.model.predict(state)[0]
            
        else:
            raise ValueError('The DRL library input is NOT supported yet. Please check your input.')
        
        self.stocks_cd += 1
        if self.turbulence_bool == 0:
            min_action = 10  # stock_cd
            for index in np.where(action < -min_action)[0]:  # sell_index:
                sell_num_shares = min(self.stocks[index], -action[index])
                qty =  abs(int(sell_num_shares))
                respSO = []
                tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'sell', respSO))
                tSubmitOrder.start()
                tSubmitOrder.join()
                self.cash = float(self.alpaca.get_account().cash)
                self.stocks_cd[index] = 0

            for index in np.where(action > min_action)[0]:  # buy_index:
                if self.cash < 0:
                    tmp_cash = 0
                else:
                    tmp_cash = self.cash
                buy_num_shares = min(tmp_cash // self.price[index], abs(int(action[index])))
                qty = abs(int(buy_num_shares))
                respSO = []
                tSubmitOrder = threading.Thread(target=self.submitOrder(qty, self.stockUniverse[index], 'buy', respSO))
                tSubmitOrder.start()
                tSubmitOrder.join()
                self.cash = float(self.alpaca.get_account().cash)
                self.stocks_cd[index] = 0
                
        else:  # sell all when turbulence
            positions = self.alpaca.list_positions()
            for position in positions:
                if(position.side == 'long'):
                    orderSide = 'sell'
                else:
                    orderSide = 'buy'
                qty = abs(int(float(position.qty)))
                respSO = []
                tSubmitOrder = threading.Thread(target=self.submitOrder(qty, position.symbol, orderSide, respSO))
                tSubmitOrder.start()
                tSubmitOrder.join()
            
            self.stocks_cd[:] = 0
            
    
    def get_state(self):
        alpaca = AlpacaProcessor(api=self.alpaca)
        price, tech, turbulence = alpaca.fetch_latest_data(ticker_list = self.stockUniverse, time_interval='1Min',
                                                     tech_indicator_list=self.tech_indicator_list)
        turbulence_bool = 1 if turbulence >= self.turbulence_thresh else 0
        
        turbulence = (self.sigmoid_sign(turbulence, self.turbulence_thresh) * 2 ** -5).astype(np.float32)
        
        tech = tech * 2 ** -7
        positions = self.alpaca.list_positions()
        stocks = [0] * len(self.stockUniverse)
        for position in positions:
            ind = self.stockUniverse.index(position.symbol)
            stocks[ind] = ( abs(int(float(position.qty))))
        
        stocks = np.asarray(stocks, dtype = float)
        cash = float(self.alpaca.get_account().cash)
        self.cash = cash
        self.stocks = stocks
        self.turbulence_bool = turbulence_bool 
        self.price = price
        
        
        
        amount = np.array(max(self.cash, 1e4) * (2 ** -12), dtype=np.float32)
        scale = np.array(2 ** -6, dtype=np.float32)
        state = np.hstack((amount,
                    turbulence,
                    self.turbulence_bool,
                    price * scale,
                    self.stocks * scale,
                    self.stocks_cd,
                    tech,
                    )).astype(np.float32)
        print(len(self.stockUniverse))
        return state
        
    def submitOrder(self, qty, stock, side, resp):
        if(qty > 0):
          try:
            self.alpaca.submit_order(stock, qty, side, "market", "day")
            print("Market order of | " + str(qty) + " " + stock + " " + side + " | completed.")
            resp.append(True)
          except:
            print("Order of | " + str(qty) + " " + stock + " " + side + " | did not go through.")
            resp.append(False)
        else:
          print("Quantity is 0, order of | " + str(qty) + " " + stock + " " + side + " | not completed.")
          resp.append(True)

    @staticmethod
    def sigmoid_sign(ary, thresh):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x * np.e)) - 0.5

        return sigmoid(ary / thresh) * thresh
    
class StockEnvEmpty(gym.Env):
    #Empty Env used for loading rllib agent
    def __init__(self,config):
      state_dim = config['state_dim']
      action_dim = config['action_dim']
      self.observation_space = gym.spaces.Box(low=-3000, high=3000, shape=(state_dim,), dtype=np.float32)
      self.action_space = gym.spaces.Box(low=-1, high=1, shape=(action_dim,), dtype=np.float32)
        
    def reset(self):
        return 

    def step(self, actions):
        return

## Run Paper trading

In [None]:
print(DOW_30_TICKER)

['AAPL', 'MSFT', 'JPM', 'V', 'RTX', 'PG', 'GS', 'NKE', 'DIS', 'AXP', 'HD', 'INTC', 'WMT', 'IBM', 'MRK', 'UNH', 'KO', 'CAT', 'TRV', 'JNJ', 'CVX', 'MCD', 'VZ', 'CSCO', 'XOM', 'BA', 'MMM', 'PFE', 'WBA', 'DD']


In [None]:
state_dim

333

In [None]:
action_dim

30

In [None]:
paper_trading_erl = AlpacaPaperTrading(ticker_list = DOW_30_TICKER, 
                                       time_interval = '1Min', 
                                       drl_lib = 'elegantrl', 
                                       agent = 'ppo', 
                                       cwd = './papertrading_erl_retrain', 
                                       net_dim = 512, 
                                       state_dim = state_dim, 
                                       action_dim= action_dim, 
                                       API_KEY = API_KEY, 
                                       API_SECRET = API_SECRET, 
                                       APCA_API_BASE_URL = APCA_API_BASE_URL, 
                                       tech_indicator_list = TECHNICAL_INDICATORS_LIST, 
                                       turbulence_thresh=30, 
                                       max_stock=1e2)
paper_trading_erl.run()

Waiting for market to open...
1219 minutes til market open.
1218 minutes til market open.


KeyboardInterrupt: ignored

# Part 4: Check Portfolio Performance

In [None]:
import alpaca_trade_api as tradeapi
import trading_calendars as tc
import numpy as np
import pandas as pd
import pytz
import yfinance as yf
import matplotlib.ticker as ticker
import matplotlib.dates as mdates
from datetime import datetime as dt
from finrl.plot import backtest_stats
import matplotlib.pyplot as plt

In [None]:
def get_trading_days(start, end):
    nyse = tc.get_calendar('NYSE')
    df = nyse.sessions_in_range(pd.Timestamp(start,tz=pytz.UTC),
                                pd.Timestamp(end,tz=pytz.UTC))
    trading_days = []
    for day in df:
        trading_days.append(str(day)[:10])

    return trading_days

def alpaca_history(key, secret, url, start, end):
    api = tradeapi.REST(key, secret, url, 'v2')
    trading_days = get_trading_days(start, end)
    df = pd.DataFrame()
    for day in trading_days:
        df = df.append(api.get_portfolio_history(date_start = day,timeframe='5Min').df.iloc[:78])
    equities = df.equity.values
    cumu_returns = equities/equities[0]
    cumu_returns = cumu_returns[~np.isnan(cumu_returns)]
    
    return df, cumu_returns

def DIA_history(start):
    data_df = yf.download(['^DJI'],start=start, interval="5m")
    data_df = data_df.iloc[48:]
    baseline_returns = data_df['Adj Close'].values/data_df['Adj Close'].values[0]
    return data_df, baseline_returns

## Get cumulative return

In [None]:
df_erl, cumu_erl = alpaca_history(key=API_KEY, 
                                  secret=API_SECRET, 
                                  url=APCA_API_BASE_URL, 
                                  start='2021-10-19', 
                                  end='2021-10-22')


In [None]:
df_djia, cumu_djia = DIA_history(start='2021-10-19')

[*********************100%***********************]  1 of 1 completed


In [None]:
print(df_erl)


                           profit_loss  profit_loss_pct      equity
timestamp                                                          
2021-10-19 09:30:00-04:00     -9324.06        -0.009005  1026133.65
2021-10-19 09:35:00-04:00    -10106.67        -0.009761  1025351.04
2021-10-19 09:40:00-04:00     -8463.56        -0.008174  1026994.15
2021-10-19 09:45:00-04:00     -9907.60        -0.009568  1025550.11
2021-10-19 09:50:00-04:00     -8516.10        -0.008224  1026941.61
...                                ...              ...         ...
2021-10-22 15:35:00-04:00      1560.63         0.001507  1037018.34
2021-10-22 15:40:00-04:00       729.00         0.000704  1036186.71
2021-10-22 15:45:00-04:00       546.57         0.000528  1036004.28
2021-10-22 15:50:00-04:00       459.65         0.000444  1035917.36
2021-10-22 15:55:00-04:00       862.31         0.000833  1036320.02

[312 rows x 3 columns]


In [None]:
print(df_djia)

                                   Open          High  ...     Adj Close    Volume
Datetime                                               ...                        
2021-10-19 13:30:00-04:00  35428.171875  35434.890625  ...  35434.699219   2301420
2021-10-19 13:35:00-04:00  35435.179688  35440.160156  ...  35437.269531   1806707
2021-10-19 13:40:00-04:00  35437.480469  35437.710938  ...  35430.851562   1797519
2021-10-19 13:45:00-04:00  35430.601562  35432.808594  ...  35423.058594   1917764
2021-10-19 13:50:00-04:00  35422.929688  35435.828125  ...  35435.691406   2018081
...                                 ...           ...  ...           ...       ...
2021-10-22 15:35:00-04:00  35708.800781  35710.929688  ...  35684.738281   3547180
2021-10-22 15:40:00-04:00  35684.339844  35692.960938  ...  35675.769531   4243899
2021-10-22 15:45:00-04:00  35675.710938  35680.800781  ...  35670.238281   5178920
2021-10-22 15:50:00-04:00  35670.808594  35699.308594  ...  35692.921875   9231104
2021

In [None]:
df_erl.tail()

Unnamed: 0_level_0,profit_loss,profit_loss_pct,equity
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-10-22 15:35:00-04:00,1560.63,0.001507,1037018.34
2021-10-22 15:40:00-04:00,729.0,0.000704,1036186.71
2021-10-22 15:45:00-04:00,546.57,0.000528,1036004.28
2021-10-22 15:50:00-04:00,459.65,0.000444,1035917.36
2021-10-22 15:55:00-04:00,862.31,0.000833,1036320.02


In [None]:
returns_erl = cumu_erl -1 
returns_dia = cumu_djia - 1
returns_dia = returns_dia[:returns_erl.shape[0]]
print('len of erl return: ', returns_erl.shape[0])
print('len of dia return: ', returns_dia.shape[0])

len of erl return:  312
len of dia return:  264


## plot and save

In [None]:
import matplotlib.pyplot as plt
plt.figure(dpi=1000)
plt.grid()
plt.grid(which='minor', axis='y')
plt.title('Stock Trading (Paper trading)', fontsize=20)
plt.plot(returns_erl, label = 'ElegantRL Agent', color = 'red')
#plt.plot(returns_sb3, label = 'Stable-Baselines3 Agent', color = 'blue' )
#plt.plot(returns_rllib, label = 'RLlib Agent', color = 'green')
plt.plot(returns_dia, label = 'DJIA', color = 'grey')
plt.ylabel('Return', fontsize=16)
plt.xlabel('Year 2021', fontsize=16)
plt.xticks(size = 14)
plt.yticks(size = 14)
ax = plt.gca()
ax.xaxis.set_major_locator(ticker.MultipleLocator(78))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(6))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.005))
ax.yaxis.set_major_formatter(ticker.PercentFormatter(xmax=1, decimals=2))
ax.xaxis.set_major_formatter(ticker.FixedFormatter(['','10-19','','10-20',
                                                    '','10-21','','10-22']))
plt.legend(fontsize=10.5)
plt.savefig('papertrading_stock.png')

In [7]:
print(elegantrl.__version__)

NameError: name 'elegantrl' is not defined

In [3]:
import pandas as pd