In [36]:
import datetime
import random
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.finance import candlestick_ohlc
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
#from tgym.core import Env
from gym import Env, spaces
%matplotlib

Using matplotlib backend: Qt5Agg


In [14]:
s = spaces.Box(low=-10000, high=10000, shape=(5,), dtype=np.float32)
s.sample()[0]

-400.45654

In [42]:
class ESTradingEnv(Env):
    
    _actions = {
        'hold': 0,
        'buy': 1,
        'sell': 2
    }

    _positions = {
        'flat': 0,
        'long': 1,
        'short': -1
    }
    
    def __init__(self, history_length=64, episode_length=20*81, commission=2,
                order_penalty=0, time_penalty=0):
        
        tick_data = pd.read_feather('../data/processed/ES_tick.feather')
        tick_data = tick_data[tick_data['date'] > '2017-07-29']
        #Create Index from date column
        tick_data.index = tick_data['date']
        tick_data.drop(labels=['date'],axis=1,inplace=True)

        """Initialisation function"""
        self._tick_data = tick_data
        self._five_min_data = self._make_5min_bars(tick_data)
        self._history_length = history_length
        self._episode_length = episode_length
        
        self._commission = commission
        self._order_penalty = order_penalty
        self._time_penalty = time_penalty
        
        # We can take 3 actions 0=hold, 1=buy, 2=sell
        self.action_space = spaces.Discrete(3)
        # Observation consists of history_length bars and 8 features
        # First of the 8 features is the position: 0=flat, 1=long, -1=short
        # Next 5 features are ohlc and the value of the 20 day ema
        # Last 2 features are sin_time and cos_time
        self.observation_space = spaces.Box(low=-9999, high=9999, 
                                            shape=(history_length,8), dtype=np.float32)
        
        self._first_render = True
        self._observation = self.reset()
    
    def reset(self):
        """Reset the trading environment. Reset rewards, data generator...

        Returns:
            observation (numpy.array): observation of the state
        """
        
        self._iteration = 0
        
        #Find indices of the first bars of each day
        i = self._five_min_data[(self._five_min_data['date'].dt.hour == 9) & \
                 (self._five_min_data['date'].dt.minute == 35)].index.tolist()
        
        #Randomly pick a day to start 
        self._start_index = random.choice(i[4:-math.ceil(self._episode_length/81)])
        
        observation = self._get_observation(index=self._start_index, 
                                            history_length=self._history_length,
                                            position=0)
        
        self._action = self._actions['hold']
        self._position = self._positions['flat']
        self._working_order = None
        self._order_price = 0
        self._target_price = 0
        self._stop_price = 0
        
        self._total_reward = 0
        self._total_pnl = 0
        self._winning_trades = 0.0
        self._total_trades = 0.0
        self._win_rate = 0.0
        
        self._done = False
        self._first_render = True
        
        return observation
    
    def _get_observation(self, index, history_length, position=0):
        x_end = index + 1
        x_beg = x_end - history_length
        
        df = self._five_min_data.iloc[x_beg:x_end]
        df = df.loc[:,['open','high','low','close','ema','sin_time','cos_time']]
        df.loc[:,'position'] = position
        
        return df.as_matrix()
        
        
    
    
    def step(self, action):
        """Take an action (buy/sell/hold) and computes the immediate reward.

        Args:
            action (numpy.array): Action to be taken, one-hot encoded.

        Returns:
            tuple:
                - observation (numpy.array): Agent's observation of the current environment.
                - reward (float) : Amount of reward returned after previous action.
                - done (bool): Whether the episode has ended, in which case further step() calls will return undefined results.
                - info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).

        """
        self._action = action
        self._iteration += 1
        pnl = 0
        reward = 0
        info = {}
        
        # Let's move forward one step in time
        x_end = self._start_index + 1 + self._iteration
        x_beg = x_end - self._history_length
        price_series = self._five_min_data.iloc[x_beg:x_end]
        
        # Get tick data for the last bar in the price series 
        ts_end = price_series.iloc[-1]['date']
        ts_start = ts_end - pd.Timedelta(minutes=5)
        ticks = self._tick_data[(self._tick_data.index > ts_start) & \
                                (self._tick_data.index <= ts_end)]
        
        
        # If we don't have a position and there is a buy or sell action, we create the order
        # If we have a position, we apply the time penalty and another large penalty if the
        # system has issued another buy or sell action
        if self._position == self._positions['flat']:
            # Create order -- buy at low or sell at high of previous bar
            if self._action == self._actions['buy']:
                self._working_order = self._actions['buy']
                self._order_price = price_series.iloc[-2]['low']
                reward -= self._order_penalty
            elif self._action == self._actions['sell']:
                self._working_order = self._actions['sell']
                self._order_price = price_series.iloc[-2]['high']
                reward -= self._order_penalty
        else:
            reward -= self._time_penalty
            if not self._action == self._actions['hold']:
                reward -= 500
                print("Position not flat -- action was buy or sell")
            
        # Simulate order execution by processing each tick in the last bar
        for index, row in ticks.iterrows():
            price = row['last']
            if self._position == self._positions['flat']:
                if self._working_order == self._actions['buy']:
                    if price < self._order_price:
                        self._position = self._positions['long']
                        self._target_price = self._order_price + 1
                        self._stop_price = self._order_price - 1
                        self._working_order = None
                        pnl -= self._commission
                        print("Buy Order Filled: %s" % self._order_price)
                elif self._working_order == self._actions['sell']:
                    if price > self._order_price:
                        self._position = self._positions['short']
                        self._target_price = self._order_price - 1
                        self._stop_price = self._order_price + 1
                        self._working_order = None
                        pnl -= self._commission
                        print("Sell Order Filled: %s" % self._order_price)
            elif self._position == self._positions['long']:
                if price > self._target_price:
                    print("Target Hit: %s" % self._target_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    pnl += 50 - self._commission
                    self._winning_trades += 1
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
                elif price <= self._stop_price:
                    print("Stop Hit: %s" % self._stop_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    pnl += -50 - self._commission
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
            elif self._position == self._positions['short']:
                if price < self._target_price:
                    print("Target Hit: %s" % self._target_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    pnl += 50 - self._commission
                    self._winning_trades += 1
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
                elif price >= self._stop_price:
                    print("Stop Hit: %s" % self._stop_price)
                    self._position = self._positions['flat']
                    self._target_price, self._stop_price, self._order_price = (0, ) * 3
                    pnl += -50 - self._commission
                    self._total_trades += 1
                    self._win_rate = (self._winning_trades / self._total_trades) * 100
        
        reward += pnl
        self._total_reward += reward
        self._total_pnl += pnl
        
        # End of episode logic
        if self._iteration >= self._episode_length:
            self._done = True
        elif self._total_pnl < -500:
            self._done = True
        
        
        observation = self._get_observation(index=self._start_index + self._iteration, 
                                            history_length=self._history_length,
                                            position=self._position)
        self._observation = observation
        
        return observation, reward, self._done, info
        
    
    def render(self):
        """Matlplotlib rendering of each step.
        """
        if self._first_render:
            self._f, self._ax = plt.subplots(figsize=(16,8))
            self._first_render = False
        
        #Format xaxis
        def format_hour(x, pos=None):
            thisind = np.clip(int(x + 0.5), 0, len(self._five_min_data.index) - 1)
            return self._five_min_data['date'][thisind].strftime('%b %-d %I:%M')
        self._ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_hour))
        
        curr_index = self._start_index + self._iteration
        curr_bar = self._five_min_data.iloc[curr_index]
        prev_bar = self._five_min_data.iloc[curr_index-1]
        curr_bar_ohlc = curr_bar[['open','high','low','close']]
        candle_data = [(curr_index,) + tuple(curr_bar_ohlc.values)]
        candlestick_ohlc(self._ax, candle_data, width=.5, colorup='g', colordown='r', alpha=1)
        
        # Adjust axes
        i = curr_index - self._start_index
        i_min = max(0,i-100) + self._start_index
        i_max = i + self._start_index + 1 if i < 100 else i_min + 101
        self._ax.set_xlim(i_min - 0.5, i_max + 0.5)
        y_max = self._five_min_data.loc[i_min:i_max]['high'].max()
        y_min = self._five_min_data.loc[i_min:i_max]['low'].min()
        self._ax.set_ylim(y_min - 1, y_max + 1)
        
        # Plot vertical lines indicating new trading day
        ts = curr_bar['date']
        if (ts.hour == 9) and (ts.minute == 35):
            self._ax.axvline(curr_index - 0.5, color='black', lw=0.5)
        
        # Plot ema
        self._ax.plot(self._five_min_data.loc[i_min:i_max].index.tolist(), 
                self._five_min_data.loc[i_min:i_max]['ema'].tolist(), 
                color='blue', lw=0.5) 
        
        # Plot action
        if self._action == self._actions['buy']:
            #self._ax.scatter(curr_index + 1, curr_bar['low'], 
                             #color='lawngreen', marker='^', zorder=100)
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['low'], prev_bar['low']],
                         color='black', ls="-", zorder=100)
        elif self._action == self._actions['sell']:
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['high'], prev_bar['high']],
                         color='black', ls="-", zorder=100)
            
        # Plot stats
        plt.suptitle('Episode Length: ' + "%.0f" % self._iteration + ' ~ ' +
                     'Total Reward: ' + "%.2f" % self._total_reward + ' ~ ' +
                     'Total PnL: ' + "%.2f" % self._total_pnl + ' ~ ' +
                     'Total Trades: ' + "%.0f" % self._total_trades + ' ~ ' +
                     'Win Rate: ' + "%.2f" % self._win_rate + ' ~ ' +
                     'Position: ' + "%.0f" % self._position)
        
        plt.pause(.01)
        
    
    def _make_5min_bars(self, tick_data):
        #Resample to get 5min bars
        five_min_data = pd.DataFrame(
            tick_data['last'].resample('5Min', loffset=datetime.timedelta(minutes=5)).ohlc())
        
        #Create RTH Calendar
        
        #We hack the NYSE Calendar extending the close until 4:15
        class CMERTHCalendar(mcal.exchange_calendar_nyse.NYSEExchangeCalendar):
            @property
            def close_time(self):
                return datetime.time(16, 15)
        
        nyse = CMERTHCalendar()
        schedule = nyse.schedule(start_date=five_min_data.index.min(), 
                                 end_date=five_min_data.index.max())
        
        #Filter out those bars that occur during RTH
        five_min_data['dates'] = pd.to_datetime(five_min_data.index.to_datetime().date)
        five_min_data['valid_date'] = five_min_data['dates'].isin(schedule.index)
        five_min_data['valid_time'] = False
        during_rth = five_min_data['valid_date'] & \
                (five_min_data.index > schedule.loc[five_min_data['dates'],'market_open']) & \
                (five_min_data.index <= schedule.loc[five_min_data['dates'],'market_close'])
        five_min_data.loc[during_rth, 'valid_time'] = True
        five_min_data = five_min_data[five_min_data['valid_time'] == True]
        five_min_data.drop(['dates','valid_date','valid_time'], axis=1, inplace=True)
        
        #Add ema
        five_min_data['ema'] = five_min_data['close'].ewm(span=20, min_periods=20).mean()

        #Reset index
        five_min_data.reset_index(inplace=True)
        
        #Add column for number of seconds elapsed in trading day
        five_min_data['sec'] = (five_min_data['date'].values 
                                - five_min_data['date'].values.astype('datetime64[D]')) / np.timedelta64(1,'s')

        #Calculate sin & cos time
        #24hr time is a cyclical continuous feature
        seconds_in_day = 24*60*60
        five_min_data['sin_time'] = np.sin(2*np.pi*five_min_data['sec']/seconds_in_day)
        five_min_data['cos_time'] = np.cos(2*np.pi*five_min_data['sec']/seconds_in_day)

        five_min_data.drop('sec', axis=1, inplace=True)
        
        return five_min_data
        

In [3]:
tick_data = pd.read_feather('../data/processed/ES_tick.feather')
tick_data = tick_data[tick_data['date'] > '2017-07-29']
#Create Index from date column
tick_data.index = tick_data['date']
tick_data.drop(labels=['date'],axis=1,inplace=True)
tick_data.head()

Unnamed: 0_level_0,last,bid,ask,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,1
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,13
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,15
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,4
2017-07-30 18:00:00.041000-04:00,2471.5,2471.5,2471.5,10


In [43]:
e = ESTradingEnv()
o = e._get_observation(10,5)
#df = o.loc[:,['open','high','low','close','sin_time','cos_time']]
#df.loc[:,'position'] = 0
o[-1]

array([ 2.47075000e+03,  2.47100000e+03,  2.47000000e+03,  2.47025000e+03,
       -5.91309648e-01, -8.06444604e-01,  0.00000000e+00])

In [45]:
e.reset()

array([[ 2.48225000e+03,  2.48250000e+03,  2.48175000e+03,
         2.48200000e+03, -7.22363962e-01, -6.91513056e-01,
         0.00000000e+00],
       [ 2.48225000e+03,  2.48275000e+03,  2.48175000e+03,
         2.48250000e+03, -7.37277337e-01, -6.75590208e-01,
         0.00000000e+00],
       [ 2.48250000e+03,  2.48300000e+03,  2.48175000e+03,
         2.48225000e+03, -7.51839807e-01, -6.59345815e-01,
         0.00000000e+00],
       [ 2.48200000e+03,  2.48325000e+03,  2.48175000e+03,
         2.48300000e+03, -7.66044443e-01, -6.42787610e-01,
         0.00000000e+00],
       [ 2.48300000e+03,  2.48325000e+03,  2.48200000e+03,
         2.48200000e+03, -7.79884483e-01, -6.25923472e-01,
         0.00000000e+00],
       [ 2.48225000e+03,  2.48300000e+03,  2.48200000e+03,
         2.48275000e+03, -7.93353340e-01, -6.08761429e-01,
         0.00000000e+00],
       [ 2.48250000e+03,  2.48425000e+03,  2.48250000e+03,
         2.48425000e+03, -8.06444604e-01, -5.91309648e-01,
         0.0000000

In [46]:
%%time
done = False
e.render()
while not done:
    #print(e._position)
    if e._position == 0: #if flat
        action = np.random.randint(1, 2 + 1) #randomly select buy or sell action
    else:
        action = 0 #hold    
    observation, reward, done, info = e.step(action)
    e.render()

Sell Order Filled: 2494.5
Stop Hit: 2495.5
Buy Order Filled: 2493.75
Target Hit: 2494.75
Buy Order Filled: 2494.0
Stop Hit: 2493.0
Sell Order Filled: 2494.25
Target Hit: 2493.25
Buy Order Filled: 2492.0
Target Hit: 2493.0
Sell Order Filled: 2492.0
Stop Hit: 2493.0
Buy Order Filled: 2492.5
Stop Hit: 2491.5
Sell Order Filled: 2492.25
Stop Hit: 2493.25
Sell Order Filled: 2493.25
Stop Hit: 2494.25
Sell Order Filled: 2494.5
Stop Hit: 2495.5
Buy Order Filled: 2495.25
Target Hit: 2496.25
Buy Order Filled: 2495.75
Stop Hit: 2494.75
Buy Order Filled: 2494.25
Stop Hit: 2493.25
Buy Order Filled: 2493.25
Target Hit: 2494.25
Buy Order Filled: 2493.25
Stop Hit: 2492.25
Sell Order Filled: 2493.75
Stop Hit: 2494.75
Sell Order Filled: 2495.0
Stop Hit: 2496.0
Sell Order Filled: 2497.25
Target Hit: 2496.25
Buy Order Filled: 2496.0
Target Hit: 2497.0
Sell Order Filled: 2494.75
Target Hit: 2493.75
Sell Order Filled: 2495.75
Stop Hit: 2496.75
Buy Order Filled: 2495.5
Target Hit: 2496.5
Buy Order Filled: 249

In [82]:
gd = td.iloc[-1]
subset = gd[['open','high','low','close']]
subset

open      2565.5
high     2565.75
low      2562.25
close       2563
Name: 5058, dtype: object

In [87]:
tuples = [(subset.name, ) + tuple(subset.values)]
tuples

[(5058, 2565.5, 2565.75, 2562.25, 2563.0)]

In [59]:
random.choice(x[3:-20])

2592

In [63]:
e._five_min_data.iloc[0:3]

Unnamed: 0,date,open,high,low,close,ema
0,2017-07-31 09:35:00-04:00,2474.75,2475.75,2474.0,2475.5,
1,2017-07-31 09:40:00-04:00,2475.25,2476.0,2473.75,2475.5,
2,2017-07-31 09:45:00-04:00,2475.75,2475.75,2474.5,2474.75,


In [98]:
(0,) * 3

(0, 0, 0)

In [41]:
int(1500/81)

18