<img src="https://certificate.tpq.io/taim_logo.png" width="350px" align="right">

# Reinforcement Learning

## Execution & Deployment

Dr Yves J Hilpisch | The AI Machine

http://aimachine.io | http://twitter.com/dyjh

In [None]:
!git clone https://github.com/tpq-classes/rl_for_finance.git
import sys
sys.path.append('rl_for_finance')


In [None]:
import os
import numpy as np
import pandas as pd
from pylab import plt
from pprint import pprint
plt.style.use('seaborn-v0_8')
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.float_format', '{:.5f}'.format)
np.set_printoptions(suppress=True, precision=4)
os.environ['PYTHONHASHSEED'] = '0'

## Oanda Trading Platform

For details about `tpqoa` see: http://github.com/yhilpisch/tpqoa

In [None]:
import tpqoa

In [None]:
api = tpqoa.tpqoa('../../../data/pyalgo.cfg')

In [None]:
ins = api.get_instruments()

In [None]:
ins[:5]

### Historical Data

In [None]:
raw = api.get_history(instrument='EUR_USD',
                      start='2018-01-01',
                      end='2020-02-29',
                      granularity='D',
                      price='A')

In [None]:
raw.info()

In [None]:
raw.head()

In [None]:
raw['c'].plot(figsize=(10, 6));

In [None]:
raw = api.get_history(instrument='EUR_USD',
                      start='2020-02-01',
                      end='2020-02-29',
                      granularity='M1',
                      price='M') 

In [None]:
raw.info()

In [None]:
raw.tail()

In [None]:
raw['c'].plot(figsize=(10, 6));

### Streaming Data

In [None]:
api.stream_data('EUR_USD', stop=10)

### Order Placement

In [None]:
order = api.create_order('EUR_USD', units=25000,
                         suppress=True, ret=True)
pprint(order)

In [None]:
def print_details(order):
    details = (order['time'], order['instrument'], order['units'],
               order['price'], order['pl'])
    return details

In [None]:
print_details(order)

In [None]:
order = api.create_order('EUR_USD', units=-25000,
                         suppress=True, ret=True)
pprint(order)

In [None]:
print_details(order)

In [None]:
order = api.create_order('EUR_USD', units=25000,
                         sl_distance=0.005,
                         suppress=True, ret=True)

In [None]:
print_details(order)

In [None]:
sl_order = api.get_transaction(tid=int(order['id']) + 1)

In [None]:
sl_order

In [None]:
(sl_order['time'], sl_order['type'], order['price'],
 sl_order['price'], sl_order['distance'])

In [None]:
order = api.create_order('EUR_USD', units=-25000, suppress=True, ret=True)

In [None]:
print_details(order)

In [None]:
order = api.create_order('EUR_USD', units=25000,
                         tsl_distance=0.005,
                         suppress=True, ret=True)

In [None]:
print_details(order)

In [None]:
tsl_order = api.get_transaction(tid=int(order['id']) + 1)

In [None]:
tsl_order

In [None]:
(tsl_order['time'], tsl_order['type'],
 order['price'], tsl_order['distance'])

In [None]:
order = api.create_order('EUR_USD', units=-25000,
                         suppress=True, ret=True)

In [None]:
print_details(order)

In [None]:
order = api.create_order('EUR_USD', units=25000,
                         tp_price=order['price'] + 0.002,
                         suppress=True, ret=True)

In [None]:
print_details(order)

In [None]:
int(order['id'])

In [None]:
tp_order = api.get_transaction(tid=int(order['id']) + 1)

In [None]:
tp_order

In [None]:
(tp_order['time'], tp_order['type'],
 order['price'], tp_order['price'])

In [None]:
order = api.create_order('EUR_USD', units=-25000,
                         suppress=True, ret=True)

In [None]:
print_details(order)

### Transactions & Account

In [None]:
api.print_transactions(tid=int(order['id']) - 22)

In [None]:
api.get_account_summary()

## Oanda Environment

In [None]:
import oandaenv as oe

In [None]:
symbol = 'EUR_USD'

In [None]:
day = '2020-03-20'

In [None]:
%%time
learn_env = oe.OandaEnv(symbol=symbol,
                  start=f'{day} 06:00:00',
                  end=f'{day} 12:00:00',
                  granularity='S5',
                  price='M',
                  features=[symbol, 'r', 's', 'm', 'v'],
                  window=10,
                  lags=6,
                  leverage=20,
                  min_accuracy=0.4,
                  min_performance=0.8
                 )

In [None]:
learn_env.data.info()

In [None]:
valid_env = oe.OandaEnv(symbol=learn_env.symbol,
                  start=f'{day} 12:00:00',
                  end=f'{day} 14:00:00',
                  granularity=learn_env.granularity,
                  price=learn_env.price,
                  features=learn_env.features,
                  window=learn_env.window,
                  lags=learn_env.lags,
                  leverage=learn_env.leverage,
                  min_accuracy=learn_env.min_accuracy,
                  min_performance=learn_env.min_performance,
                  mu=learn_env.mu,
                  std=learn_env.std
                 )

In [None]:
valid_env.data.info()

In [None]:
test_env = oe.OandaEnv(symbol=learn_env.symbol,
                  start=f'{day} 14:00:00',
                  end=f'{day} 15:00:00',
                  granularity=learn_env.granularity,
                  price=learn_env.price,
                  features=learn_env.features,
                  window=learn_env.window,
                  lags=learn_env.lags,
                  leverage=learn_env.leverage,
                  min_accuracy=0,
                  min_performance=0,
                  mu=learn_env.mu,
                  std=learn_env.std
                 )

In [None]:
test_env.data.info()

In [None]:
ax = learn_env.data[learn_env.symbol].plot(figsize=(10, 6))
plt.axvline(learn_env.data.index[-1], ls='--')
valid_env.data[learn_env.symbol].plot(ax=ax, style='-.')
plt.axvline(valid_env.data.index[-1], ls='--')
test_env.data[learn_env.symbol].plot(ax=ax, style='--');

## Trading Bot

### Training the Bot

In [None]:
import tradingbot

In [None]:
tradingbot.set_seeds(100)
agent = tradingbot.TradingBot(32, 0.00001, learn_env, valid_env)

In [None]:
episodes = 31

In [None]:
%time agent.learn(episodes)

In [None]:
agent.epsilon

In [None]:
tradingbot.plot_performance(agent)

### Backtesting the Bot

In [None]:
import backtest as bt

In [None]:
env = test_env

In [None]:
bt.backtest(agent, env)

In [None]:
env.data['p'].iloc[env.lags:].value_counts()

In [None]:
sum(env.data['p'].iloc[env.lags:].diff() != 0)

In [None]:
(env.data[['r', 's']].iloc[env.lags:] * env.leverage).sum(
        ).apply(np.exp)

In [None]:
(env.data[['r', 's']].iloc[env.lags:] * env.leverage).sum(
        ).apply(np.exp) - 1

In [None]:
(env.data[['r', 's']].iloc[env.lags:] * env.leverage).cumsum(
        ).apply(np.exp).plot(figsize=(10, 6));

### Deployment

In [None]:
import tpqoa

In [None]:
class OandaTradingBot(tpqoa.tpqoa):
    def __init__(self, config_file, agent, granularity, units,
                 verbose=True):
        super(OandaTradingBot, self).__init__(config_file)
        self.agent = agent
        self.symbol = self.agent.learn_env.symbol
        self.env = agent.learn_env
        self.window = self.env.window
        if granularity is None:
            self.granularity = self.env.granularity
        else:
            self.granularity = granularity
        self.units = units
        self.trades = 0
        self.position = 0
        self.tick_data = pd.DataFrame()
        self.min_length = (self.env.window +
                           self.env.lags)
        self.pl = list()
        self.verbose = verbose
    def _prepare_data(self):
        self.data['r'] = np.log(self.data / self.data.shift(1))
        self.data.dropna(inplace=True)
        self.data['s'] = self.data[self.symbol].rolling(self.window).mean()
        self.data['m'] = self.data['r'].rolling(self.window).mean()
        self.data['v'] = self.data['r'].rolling(self.window).std()
        self.data.dropna(inplace=True)
        self.data_ = (self.data - self.env.mu) / self.env.std
    def _resample_data(self):
        self.data = self.tick_data.resample(self.granularity,
                                label='right').last().ffill().iloc[:-1]
        self.data = pd.DataFrame(self.data['mid'])
        self.data.columns = [self.symbol,]
        self.data.index = self.data.index.tz_localize(None)
    def _get_state(self):
        state = self.data_[self.env.features].iloc[-self.env.lags:]
        return np.reshape(state.values, [1, self.env.lags, self.env.n_features])
    def report_trade(self, time, side, order):
        self.trades += 1
        pl = float(order['pl'])
        self.pl.append(pl)
        cpl = sum(self.pl)
        print('\n' + 80 * '=')
        print(f'{time} | *** GOING {side} ({self.trades}) ***')
        print(f'{time} | PROFIT/LOSS={pl:.2f} | CUMULATIVE={cpl:.2f}')
        print(80 * '=')
        if self.verbose:
            pprint(order)
            print(80 * '=')
    def on_success(self, time, bid, ask):
        df = pd.DataFrame({'ask': ask, 'bid': bid, 'mid': (bid + ask) / 2},
                          index=[pd.Timestamp(time)])
        self.tick_data = pd.concat((self.tick_data, df))
        self._resample_data()
        if len(self.data) > self.min_length:
            self.min_length += 1
            self._prepare_data()
            state = self._get_state()
            prediction = np.argmax(self.agent.model.predict(state)[0, 0])
            signal = 1 if prediction == 1 else -1
            if self.position in [0, -1] and signal == 1:
                order = self.create_order(self.symbol,
                        units=(1 - self.position) * self.units,
                                suppress=True, ret=True)
                self.report_trade(time, 'LONG', order)
                self.position = 1
            elif self.position in [0, 1] and signal == -1:
                order = self.create_order(self.symbol,
                        units=-(1 + self.position) * self.units,
                                suppress=True, ret=True)
                self.report_trade(time, 'SHORT', order)
                self.position = -1

In [None]:
otb = OandaTradingBot('../../../data/pyalgo.cfg', agent, '5s', 25000, verbose=True)

In [None]:
otb.stream_data(agent.learn_env.symbol, stop=550)
print('\n' + 80 * '=')
print('*** CLOSING OUT ***')
order = otb.create_order(otb.symbol,
                units=-otb.position * otb.units,
                suppress=True, ret=True)
otb.report_trade(otb.time, 'NEUTRAL', order)
print(80 * '=')

In [None]:
pl = np.array(otb.pl)

In [None]:
pl

In [None]:
pl.cumsum()

In [None]:
plt.bar(np.arange(1, len(pl) + 1), pl);

In [None]:
plt.plot(np.arange(1, len(pl) + 1), pl.cumsum());

<img src='http://hilpisch.com/taim_logo.png' width="350px" align="right">

<br><br><br><a href="http://tpq.io" target="_blank">http://tpq.io</a> | <a href="http://twitter.com/dyjh" target="_blank">@dyjh</a> | <a href="mailto:ai@tpq.io">ai@tpq.io</a>