In [1]:
import datetime
import random
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.finance import candlestick_ohlc
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
from tgym.core import Env
%matplotlib

Using matplotlib backend: TkAgg




In [2]:
#We hack the NYSE Calendar extending the close until 4:15
class CMERTHCalendar(mcal.exchange_calendar_nyse.NYSEExchangeCalendar):
    @property
    def close_time(self):
        return datetime.time(16, 15)

In [155]:
class ESTradingEnv(Env):
    
    _actions = {
        'hold': np.array([1, 0, 0]),
        'buy': np.array([0, 1, 0]),
        'sell': np.array([0, 0, 1])
    }

    _positions = {
        'flat': np.array([1, 0, 0]),
        'long': np.array([0, 1, 0]),
        'short': np.array([0, 0, 1])
    }
    
    def __init__(self, tick_data, history_length=50, episode_length=20):
        """Initialisation function"""
        self._tick_data = tick_data
        self._five_min_data = self._make_5min_bars(tick_data)
        self._history_length = history_length
        self._episode_length = episode_length
        
        self._first_render = True
        self._observation = self.reset()
    
    def reset(self):
        """Reset the trading environment. Reset rewards, data generator...

        Returns:
            observation (numpy.array): observation of the state
        """
        
        self._iteration = 0
        
        #Find indices of the first bars of each day
        i = self._five_min_data[(self._five_min_data['date'].dt.hour == 9) & \
                 (self._five_min_data['date'].dt.minute == 35)].index.tolist()
        
        #Randomly pick a day to start (episode_length is the number of days per observation)
        self._start_index = random.choice(i[4:-self._episode_length])
     
        x_end = self._start_index + 1
        x_beg = x_end - self._history_length
        observation = self._five_min_data.iloc[x_beg:x_end]
        
        self._action = self._actions['hold']
        self._position = self._positions['flat']
        self._working_order = None
        self._order_price = 0
        self._target_price = 0
        self._stop_price = 0
        
        self._total_reward = 0
        self._total_pnl = 0
        self._winning_trades = 0.0
        self._total_trades = 0.0
        self._win_rate = 0.0
        
        return observation
    
    
    def step(self, action):
        """Take an action (buy/sell/hold) and computes the immediate reward.

        Args:
            action (numpy.array): Action to be taken, one-hot encoded.

        Returns:
            tuple:
                - observation (numpy.array): Agent's observation of the current environment.
                - reward (float) : Amount of reward returned after previous action.
                - done (bool): Whether the episode has ended, in which case further step() calls will return undefined results.
                - info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).

        """
        self._action = action
        self._iteration += 1
        
        # Let's move forward one step in time
        x_end = self._start_index + 1 + self._iteration
        x_beg = x_end - self._history_length
        price_series = self._five_min_data.iloc[x_beg:x_end]
        
        # Get tick data for the last bar in the price series 
        ts_end = price_series.iloc[-1]['date']
        ts_start = ts_end - pd.Timedelta(minutes=5)
        ticks = self._tick_data[(self._tick_data.index > ts_start) & \
                                (self._tick_data.index <= ts_end)]
        
        # Create order -- buy at low or sell at high of previous bar
        if all(self._action == self._actions['buy']):
            self._working_order = self._actions['buy']
            self._order_price = price_series.iloc[-2]['low']
        elif all(self._action == self._actions['sell']):
            self._working_order = self._actions['sell']
            self._order_price = price_series.iloc[-2]['high']
            
        # Simulate order execution by processing each tick in the last bar
        for index, row in ticks.iterrows():
            price = row['last']
            if all(self._position == self._positions['flat']):
                if all(self._working_order == self._actions['buy']):
                    if price < self._order_price:
                        self._position = self._positions['long']
                        self._target_price = self._order_price + 1
                        self._stop_price = self._order_price - 1
                        self._working_order = None
                        print("Buy Order Filled: %s" % self._order_price)
                elif all(self._working_order == self._actions['sell']):
                    if price > self._order_price:
                        self._position = self._positions['short']
                        self._target_price = self._order_price - 1
                        self._stop_price = self._order_price + 1
                        self._working_order = None
                        print("Sell Order Filled: %s" % self._order_price)
            elif all(self._position == self._positions['long']):
                if price > self._target_price:
                    print("Target Hit: %s" % self._target_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    self._total_pnl += 50
                    self._winning_trades += 1
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
                elif price <= self._stop_price:
                    print("Stop Hit: %s" % self._stop_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    self._total_pnl += -50
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
            elif all(self._position == self._positions['short']):
                if price < self._target_price:
                    print("Target Hit: %s" % self._target_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    self._total_pnl += 50
                    self._winning_trades += 1
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
                elif price >= self._stop_price:
                    print("Stop Hit: %s" % self._stop_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    self._total_pnl += -50
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
        
        self._observation = price_series
        
    
    def render(self):
        """Matlplotlib rendering of each step.
        """
        if self._first_render:
            self._f, self._ax = plt.subplots(figsize=(16,8))
            self._first_render = False
        
        #Format xaxis
        def format_hour(x, pos=None):
            thisind = np.clip(int(x + 0.5), 0, len(self._five_min_data.index) - 1)
            return self._five_min_data['date'][thisind].strftime('%b %-d %I:%M')
        self._ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_hour))
        
        price_series = self._observation #this will change
        curr_bar = price_series.iloc[-1]
        prev_bar = price_series.iloc[-2]
        curr_index = curr_bar.name
        curr_bar_ohlc = curr_bar[['open','high','low','close']]
        candle_data = [(curr_bar_ohlc.name,) + tuple(curr_bar_ohlc.values)]
        candlestick_ohlc(self._ax, candle_data, width=.5, colorup='g', colordown='r', alpha=1)
        
        # Adjust axes
        i = curr_index - self._start_index
        i_min = max(0,i-100) + self._start_index
        i_max = i + self._start_index + 1 if i < 100 else i_min + 101
        self._ax.set_xlim(i_min - 0.5, i_max + 0.5)
        y_max = self._five_min_data.loc[i_min:i_max]['high'].max()
        y_min = self._five_min_data.loc[i_min:i_max]['low'].min()
        self._ax.set_ylim(y_min - 1, y_max + 1)
        
        # Plot vertical lines indicating new trading day
        ts = curr_bar['date']
        if (ts.hour == 9) and (ts.minute == 35):
            self._ax.axvline(curr_index - 0.5, color='black', lw=0.5)
        
        # Plot ema
        self._ax.plot(self._five_min_data.loc[i_min:i_max].index.tolist(), 
                self._five_min_data.loc[i_min:i_max]['ema'].tolist(), 
                color='blue', lw=0.5) 
        
        # Plot action
        if (self._action == self._actions['buy']).all():
            #self._ax.scatter(curr_index + 1, curr_bar['low'], 
                             #color='lawngreen', marker='^', zorder=100)
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['low'], prev_bar['low']],
                         color='black', ls="-", zorder=100)
        elif (self._action == self._actions['sell']).all():
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['high'], prev_bar['high']],
                         color='black', ls="-", zorder=100)
            
        # Plot stats
        plt.suptitle('Total Reward: ' + "%.2f" % self._total_reward + ' ~ ' +
                     'Total PnL: ' + "%.2f" % self._total_pnl + ' ~ ' +
                     'Total Trades: ' + "%.2f" % self._total_trades + ' ~ ' +
                     'Win Rate: ' + "%.2f" % self._win_rate + ' ~ ' +
                     'Position: ' + ['flat', 'long', 'short'][list(self._position).index(1)])
        
        plt.pause(.01)
        
    
    def _make_5min_bars(self, tick_data):
        #Resample to get 5min bars
        five_min_data = pd.DataFrame(
            tick_data['last'].resample('5Min', loffset=datetime.timedelta(minutes=5)).ohlc())
        
        #Create RTH Calendar
        nyse = CMERTHCalendar()
        schedule = nyse.schedule(start_date=five_min_data.index.min(), 
                                 end_date=five_min_data.index.max())
        
        #Filter out those bars that occur during RTH
        five_min_data['dates'] = pd.to_datetime(five_min_data.index.to_datetime().date)
        five_min_data['valid_date'] = five_min_data['dates'].isin(schedule.index)
        five_min_data['valid_time'] = False
        during_rth = five_min_data['valid_date'] & \
                (five_min_data.index > schedule.loc[five_min_data['dates'],'market_open']) & \
                (five_min_data.index <= schedule.loc[five_min_data['dates'],'market_close'])
        five_min_data.loc[during_rth, 'valid_time'] = True
        five_min_data = five_min_data[five_min_data['valid_time'] == True]
        five_min_data.drop(['dates','valid_date','valid_time'], axis=1, inplace=True)
        
        #Add ema
        five_min_data['ema'] = five_min_data['close'].ewm(span=20, min_periods=20).mean()

        #Reset index
        five_min_data.reset_index(inplace=True)
        
        return five_min_data
        

In [4]:
tick_data = pd.read_feather('../data/processed/ES_tick.feather')
tick_data = tick_data[tick_data['date'] > '2017-07-29']
#Create Index from date column
tick_data.index = tick_data['date']
tick_data.drop(labels=['date'],axis=1,inplace=True)
tick_data.head()

Unnamed: 0_level_0,last,bid,ask,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,1
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,13
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,15
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,4
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,10


In [163]:
e = ESTradingEnv(tick_data)

In [26]:
td = e.reset().tail()
td

Unnamed: 0,date,open,high,low,close,ema
806,2017-08-11 16:00:00-04:00,2443.0,2443.0,2439.5,2440.75,2441.239413
807,2017-08-11 16:05:00-04:00,2440.5,2441.75,2439.75,2441.25,2441.240421
808,2017-08-11 16:10:00-04:00,2441.25,2441.5,2440.5,2441.0,2441.217524
809,2017-08-11 16:15:00-04:00,2441.0,2441.5,2440.25,2441.25,2441.220617
810,2017-08-14 09:35:00-04:00,2455.25,2459.0,2455.0,2458.25,2442.842463


In [37]:
ts_end = td.iloc[-1]['date']
ts_start = ts_end - pd.Timedelta(minutes=5)
ticks = tick_data[(tick_data.index > ts_start) & (tick_data.index <= ts_end)]
ticks['volume'].sum()

51283

In [40]:
%%time
i = 0
flag = True
for index, row in ticks.iterrows():
    if flag:
        i +=1
print(i)

12528
CPU times: user 188 ms, sys: 8 ms, total: 196 ms
Wall time: 187 ms


In [164]:
%%time
e.render()
for i in range(0, 500):
    #print(e._position)
    if all(e._position == np.array([1, 0, 0])): #if flat
        action = random.choice([np.array([0, 0, 1]), np.array([0, 1, 0])]) #buy
    else:
        action = np.array([1, 0, 0]) #hold    
    e.step(action)
    e.render()

Buy Order Filled: 2466.25
Stop Hit: 2465.25
Sell Order Filled: 2467.5
Stop Hit: 2468.5
Sell Order Filled: 2470.25
Target Hit: 2469.25
Buy Order Filled: 2468.75
Stop Hit: 2467.75
Sell Order Filled: 2467.25
Target Hit: 2466.25
Buy Order Filled: 2466.25
Target Hit: 2467.25
Sell Order Filled: 2467.5
Stop Hit: 2468.5
Buy Order Filled: 2467.25
Stop Hit: 2466.25
Sell Order Filled: 2467.0
Stop Hit: 2468.0
Sell Order Filled: 2468.0
Stop Hit: 2469.0
Sell Order Filled: 2469.0
Stop Hit: 2470.0
Sell Order Filled: 2469.75
Target Hit: 2468.75
Buy Order Filled: 2468.25
Target Hit: 2469.25
Buy Order Filled: 2468.5
Target Hit: 2469.5
Buy Order Filled: 2468.5
Target Hit: 2469.5
Sell Order Filled: 2470.25
Stop Hit: 2471.25
Sell Order Filled: 2472.0
Stop Hit: 2473.0
Buy Order Filled: 2471.75
Stop Hit: 2470.75
Sell Order Filled: 2473.0
Stop Hit: 2474.0
Sell Order Filled: 2474.25
Stop Hit: 2475.25
Buy Order Filled: 2472.75
Stop Hit: 2471.75
Buy Order Filled: 2471.75
Stop Hit: 2470.75
Buy Order Filled: 2470.2

In [82]:
gd = td.iloc[-1]
subset = gd[['open','high','low','close']]
subset

open      2565.5
high     2565.75
low      2562.25
close       2563
Name: 5058, dtype: object

In [87]:
tuples = [(subset.name, ) + tuple(subset.values)]
tuples

[(5058, 2565.5, 2565.75, 2562.25, 2563.0)]

In [59]:
random.choice(x[3:-20])

2592

In [63]:
e._five_min_data.iloc[0:3]

Unnamed: 0,date,open,high,low,close,ema
0,2017-07-31 09:35:00-04:00,2474.75,2475.75,2474.0,2475.5,
1,2017-07-31 09:40:00-04:00,2475.25,2476.0,2473.75,2475.5,
2,2017-07-31 09:45:00-04:00,2475.75,2475.75,2474.5,2474.75,


In [98]:
(0,) * 3

(0, 0, 0)