In [1]:
import datetime
import random
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from matplotlib.finance import candlestick_ohlc
import numpy as np
import pandas as pd
import pandas_market_calendars as mcal
#from tgym.core import Env
from gym import Env, spaces
%matplotlib



Using matplotlib backend: Qt5Agg


In [8]:
class ESTradingEnv_v2(Env):
    
    _actions = {
        'hold': 0,
        'buy': 1,
        'sell': 2
    }

    _positions = {
        'flat': 0,
        'long': 1,
        'short': -1
    }
    
    def __init__(self, history_length=64, episode_length=20*81, commission=2,
                order_penalty=0, time_penalty=0):
        

        """Initialisation function"""
        self._five_min_data = pd.read_feather('../data/processed/ES_5mintrading.feather')
        self._five_min_data = self._five_min_data[self._five_min_data['date']>'1-1-2018']
        self._history_length = history_length
        self._episode_length = episode_length
        
        self._commission = commission
        self._order_penalty = order_penalty
        self._time_penalty = time_penalty
        
        # We can take 3 actions 0=hold, 1=buy, 2=sell
        self.action_space = spaces.Discrete(3)
        # Observation consists of history_length bars and 8 features
        # First of the 8 features is the position: 0=flat, 1=long, -1=short
        # Next 5 features are ohlc and the value of the 20 day ema
        # Last 2 features are sin_time and cos_time
        self.observation_space = spaces.Box(low=-9999, high=9999, 
                                            shape=(history_length,8), dtype=np.float32)
        
        self._first_render = True
        self._observation = self.reset()
    
    def reset(self):
        """Reset the trading environment. Reset rewards, data generator...

        Returns:
            observation (numpy.array): observation of the state
        """
        
        self._iteration = 0
        
        #Find indices of the first bars of each day
        i = self._five_min_data[(self._five_min_data['date'].dt.hour == 9) & \
                 (self._five_min_data['date'].dt.minute == 35)].index.tolist()
        
        #Randomly pick a day to start 
        #self._start_index = random.choice(i[4:-math.ceil(self._episode_length/81)])
        self._start_index = i[2]
        
        observation = self._get_observation(index=self._start_index, 
                                            history_length=self._history_length,
                                            position=0)
        
        self._action = self._actions['hold']
        self._position = self._positions['flat']
        self._working_order = None
        self._order_price = 0
        self._target_price = 0
        self._stop_price = 0
        
        self._total_reward = 0
        self._total_pnl = 0
        self._winning_trades = 0.0
        self._total_trades = 0.0
        self._win_rate = 0.0
        
        self._delayed_reward = 0
        self._delayed_reward_index = 0
        
        self._done = False
        self._first_render = True
        
        return observation
    
    def _get_observation(self, index, history_length, position=0):
        x_end = index + 1
        x_beg = x_end - history_length + 1
        
        df = self._five_min_data.loc[x_beg:x_end].copy()
        
        '''
        df['open'] = df['open'] / df['close'].iloc[-1]
        df['high'] = df['high'] / df['close'].iloc[-1]
        df['low'] = df['low'] / df['close'].iloc[-1]
        df['close'] = df['close'] / df['close'].iloc[-1]
        df['ema'] = df['ema'] / df['close'].iloc[-1]'''
        
        df = df.loc[:,['open','high','low','close','ema','sin_time','cos_time']]
        df.loc[:,'position'] = position
        
        return df.as_matrix()
        
        
    
    
    def step(self, action):
        """Take an action (buy/sell/hold) and computes the immediate reward.

        Args:
            action (numpy.array): Action to be taken, one-hot encoded.

        Returns:
            tuple:
                - observation (numpy.array): Agent's observation of the current environment.
                - reward (float) : Amount of reward returned after previous action.
                - done (bool): Whether the episode has ended, in which case further step() calls will return undefined results.
                - info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).

        """
        self._action = action
        pnl = 0
        reward = 0
        info = {}
        
        i = self._start_index + self._iteration
        
        
        if self._position == self._positions['flat']:
            if self._action == self._actions['buy']:
                reward -= self._order_penalty
                buy_r = self._five_min_data.loc[i, 'buy_r']
                buy_b = self._five_min_data.loc[i, 'buy_b']
                #Check to see if there is a reward and if it comes on this bar or in the future
                if buy_r != 0:
                    if buy_b == 0:
                        pnl += buy_r - 2 * self._commission
                        self._total_trades += 1
                    else:
                        self._position = self._positions['long']
                        self._delayed_reward = buy_r
                        self._delayed_reward_index = i + buy_b
                        pnl -= self._commission
            elif self._action == self._actions['sell']:
                reward -= self._order_penalty
                sell_r = self._five_min_data.loc[i, 'sell_r']
                sell_b = self._five_min_data.loc[i, 'sell_b']
                #Check to see if there is a reward and if it comes on this bar or in the future
                if sell_r != 0:
                    if sell_b == 0:
                        pnl += sell_r - 2 * self._commission
                        self._total_trades += 1
                    else:
                        self._position = self._positions['short']
                        self._delayed_reward = sell_r
                        self._delayed_reward_index = i + sell_b
                        pnl -= self._commission
        
        else: 
            reward -= self._time_penalty
            if i == self._delayed_reward_index:
                self._position = self._positions['flat']
                pnl += self._delayed_reward - self._commission
                self._total_trades += 1
                
            if not self._action == self._actions['hold']:
                reward -= 500
        
        #Calculate win rate -- hope this is right
        if pnl > 0:
            self._winning_trades += 1
        if self._total_trades > 0:
            self._win_rate = (self._winning_trades / self._total_trades) * 100
        
        self._iteration += 1
          
        reward += pnl
        self._total_reward += reward
        self._total_pnl += pnl
        
        # End of episode logic
        if self._iteration >= self._episode_length:
            self._done = True
        elif self._total_pnl < -500:
            self._done = True
        
        
        observation = self._get_observation(index=self._start_index + self._iteration, 
                                            history_length=self._history_length,
                                            position=self._position)
        self._observation = observation
        
        return observation, reward, self._done, info
        
    
    def render(self):
        """Matlplotlib rendering of each step.
        """
        if self._first_render:
            self._f, self._ax = plt.subplots(figsize=(16,8))
            self._first_render = False
        
        #Format xaxis
        def format_hour(x, pos=None):
            thisind = np.clip(int(x + 0.5), self._start_index, self._start_index + len(self._five_min_data.index))
            return self._five_min_data['date'][thisind].strftime('%b %-d %I:%M')
        self._ax.xaxis.set_major_formatter(ticker.FuncFormatter(format_hour))
        
        curr_index = self._start_index + self._iteration
        print(curr_index)
        curr_bar = self._five_min_data.loc[curr_index]
        prev_bar = self._five_min_data.loc[curr_index-1]
        curr_bar_ohlc = curr_bar[['open','high','low','close']]
        candle_data = [(curr_index,) + tuple(curr_bar_ohlc.values)]
        candlestick_ohlc(self._ax, candle_data, width=.5, colorup='g', colordown='r', alpha=1)
        
        # Adjust axes
        i = curr_index - self._start_index
        i_min = max(0,i-100) + self._start_index
        i_max = i + self._start_index + 1 if i < 100 else i_min + 101
        self._ax.set_xlim(i_min - 0.5, i_max + 0.5)
        y_max = self._five_min_data.loc[i_min:i_max]['high'].max()
        y_min = self._five_min_data.loc[i_min:i_max]['low'].min()
        self._ax.set_ylim(y_min - 1, y_max + 1)
        
        # Plot vertical lines indicating new trading day
        ts = curr_bar['date']
        if (ts.hour == 9) and (ts.minute == 35):
            self._ax.axvline(curr_index - 0.5, color='black', lw=0.5)
        
        # Plot ema
        self._ax.plot(self._five_min_data.loc[i_min:i_max].index.tolist(), 
                self._five_min_data.loc[i_min:i_max]['ema'].tolist(), 
                color='blue', lw=0.5) 
        
        # Plot action
        if self._action == self._actions['buy']:
            #self._ax.scatter(curr_index + 1, curr_bar['low'], 
                             #color='lawngreen', marker='^', zorder=100)
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['low'], prev_bar['low']],
                         color='black', ls="-", zorder=100)
        elif self._action == self._actions['sell']:
            self._ax.plot([curr_index - 1, curr_index], [prev_bar['high'], prev_bar['high']],
                         color='black', ls="-", zorder=100)
            
        # Plot stats
        plt.suptitle('Episode Length: ' + "%.0f" % self._iteration + ' ~ ' +
                     'Total Reward: ' + "%.2f" % self._total_reward + ' ~ ' +
                     'Total PnL: ' + "%.2f" % self._total_pnl + ' ~ ' +
                     'Total Trades: ' + "%.0f" % self._total_trades + ' ~ ' +
                     'Win Rate: ' + "%.2f" % self._win_rate + ' ~ ' +
                     'Position: ' + "%.0f" % self._position)
        
        plt.pause(.01)      

In [9]:
e = ESTradingEnv_v2()

In [10]:
%%time
done = False
e.render()
while not done:
#for i in range(0,50):
    #print(e._position)
    if e._position == 0: #if flat
        action = np.random.randint(1, 2 + 1) #randomly select buy or sell action
    else:
        action = 0 #hold    
    observation, reward, done, info = e.step(action)
    e.render()

8790
8791
8792
8793
8794
8795
8796
8797
8798
8799
8800
8801
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8832
8833
8834
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8850
8851
8852
8853
8854
8855
8856
8857
8858
8859
8860
8861
8862
8863
8864
8865
8866
8867
8868
8869
8870
CPU times: user 9.32 s, sys: 11.3 s, total: 20.6 s
Wall time: 7.09 s


In [35]:
e.reset()

array([[ 9.96997561e-01,  9.97185213e-01,  9.96716082e-01,
         9.96903734e-01,  2.65693356e+03, -8.76726756e-01,
        -4.80988769e-01,  0.00000000e+00],
       [ 9.96903734e-01,  9.96903734e-01,  9.96622256e-01,
         9.96716082e-01,  2.65682084e+03, -8.87010833e-01,
        -4.61748613e-01,  0.00000000e+00],
       [ 9.96716082e-01,  9.97091387e-01,  9.96622256e-01,
         9.97091387e-01,  2.65681409e+03, -8.96872742e-01,
        -4.42288690e-01,  0.00000000e+00],
       [ 9.97091387e-01,  9.97654344e-01,  9.96997561e-01,
         9.97654344e-01,  2.65695084e+03, -9.06307787e-01,
        -4.22618262e-01,  0.00000000e+00],
       [ 9.97560518e-01,  9.97748170e-01,  9.97560518e-01,
         9.97654344e-01,  2.65707457e+03, -9.15311479e-01,
        -4.02746690e-01,  0.00000000e+00],
       [ 9.97654344e-01,  9.97935823e-01,  9.97560518e-01,
         9.97841997e-01,  2.65723414e+03, -9.23879533e-01,
        -3.82683432e-01,  0.00000000e+00],
       [ 9.97841997e-01,  9.978419

In [82]:
gd = td.iloc[-1]
subset = gd[['open','high','low','close']]
subset

open      2565.5
high     2565.75
low      2562.25
close       2563
Name: 5058, dtype: object

In [87]:
tuples = [(subset.name, ) + tuple(subset.values)]
tuples

[(5058, 2565.5, 2565.75, 2562.25, 2563.0)]

In [59]:
random.choice(x[3:-20])

2592

In [63]:
e._five_min_data.iloc[0:3]

Unnamed: 0,date,open,high,low,close,ema
0,2017-07-31 09:35:00-04:00,2474.75,2475.75,2474.0,2475.5,
1,2017-07-31 09:40:00-04:00,2475.25,2476.0,2473.75,2475.5,
2,2017-07-31 09:45:00-04:00,2475.75,2475.75,2474.5,2474.75,


In [98]:
(0,) * 3

(0, 0, 0)

In [41]:
int(1500/81)

18

In [17]:
fd = pd.read_feather('../data/processed/ES_5mintrading.feather')
fd = fd[fd['date']>'1-1-2018']
i = fd[(fd['date'].dt.hour == 9) & \
                 (fd['date'].dt.minute == 35)].index.tolist()

In [22]:
df = fd.loc[8790-64:8790].copy()
#df = df.loc[:,['open','high','low','close','ema','sin_time','cos_time']]
#df.loc[:,'position'] = 0
df.tail()

Unnamed: 0,date,open,high,low,close,ema,sin_time,cos_time,sell_r,sell_b,buy_r,buy_b
8786,2018-01-03 16:00:00-05:00,2713.5,2713.75,2710.75,2712.5,2711.333001,-0.707107,0.707107,0,0,0,0
8787,2018-01-03 16:05:00-05:00,2712.25,2712.5,2711.5,2712.0,2711.396525,-0.691513,0.722364,50,1,0,0
8788,2018-01-03 16:10:00-05:00,2711.75,2712.75,2711.75,2712.25,2711.477808,-0.67559,0.737277,0,0,-50,0
8789,2018-01-03 16:15:00-05:00,2712.25,2712.75,2710.25,2710.25,2711.360874,-0.659346,0.75184,-50,0,0,0
8790,2018-01-04 09:35:00-05:00,2719.25,2721.25,2718.25,2721.0,2712.278886,-0.625923,-0.779884,-50,0,0,0


In [31]:
f = fd.iloc[:20].copy()
f['open'] = f['open'] / f['open'].iloc[-1]
f

Unnamed: 0,date,open,high,low,close,ema,sin_time,cos_time,sell_r,sell_b,buy_r,buy_b
0,2017-07-31 09:35:00-04:00,1.002329,2475.75,2474.0,2475.5,,-0.402747,-0.915311,50,0,50,0
1,2017-07-31 09:40:00-04:00,1.002531,2476.0,2473.75,2475.5,,-0.422618,-0.906308,0,0,0,0
2,2017-07-31 09:45:00-04:00,1.002734,2475.75,2474.5,2474.75,,-0.442289,-0.896873,0,0,-50,0
3,2017-07-31 09:50:00-04:00,1.002228,2475.0,2473.5,2473.75,,-0.461749,-0.887011,0,0,-50,1
4,2017-07-31 09:55:00-04:00,1.002025,2474.25,2472.75,2472.75,,-0.480989,-0.876727,0,0,-50,0
5,2017-07-31 10:00:00-04:00,1.001519,2473.25,2471.0,2471.25,,-0.5,-0.866025,0,0,-50,0
6,2017-07-31 10:05:00-04:00,1.001013,2471.75,2468.25,2468.75,,-0.518773,-0.854912,0,0,0,0
7,2017-07-31 10:10:00-04:00,0.999797,2470.75,2468.5,2470.5,,-0.5373,-0.843391,0,0,0,0
8,2017-07-31 10:15:00-04:00,1.000608,2470.75,2469.25,2470.5,,-0.55557,-0.83147,50,2,0,0
9,2017-07-31 10:20:00-04:00,1.000506,2471.25,2470.25,2470.5,,-0.573576,-0.819152,0,0,-50,1
