# **Installing Deps**

In [None]:
!pip install tensorflow
!pip install gym
!pip install keras
!pip install keras-rl2
!pip install finta 
!pip install oandapyV20

Collecting keras-rl2
[?25l  Downloading https://files.pythonhosted.org/packages/b6/fc/143ee05aed804b3b9052d7b17b13832bc7f3c28e7b1bc50edd09c29d8525/keras_rl2-1.0.5-py3-none-any.whl (52kB)
[K     |████████████████████████████████| 61kB 4.5MB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5
Collecting finta
  Downloading https://files.pythonhosted.org/packages/06/8b/94331e5e8f4e6ba2690658d4a65db0a254a89117756337316ce8f6b2026b/finta-1.3-py3-none-any.whl
Installing collected packages: finta
Successfully installed finta-1.3
Collecting oandapyV20
[?25l  Downloading https://files.pythonhosted.org/packages/fd/df/560a9bc4171eab3c3b16603387bc0cedc5a9aa07d4f8835f30f51a1b7158/oandapyV20-0.7.0.tar.gz (46kB)
[K     |████████████████████████████████| 51kB 4.6MB/s 
[?25hBuilding wheels for collected packages: oandapyV20
  Building wheel for oandapyV20 (setup.py) ... [?25l[?25hdone
  Created wheel for oandapyV20: filename=oandapyV20-0.7.0-cp37-none-any.whl size=7

#**Loading Data**

In [None]:
import pandas as pd
from finta import TA

import oandapyV20.endpoints.instruments as instruments
import oandapyV20

In [None]:
accountID = ""
access_token = ""

client = oandapyV20.API(access_token=access_token)

In [None]:
def get_data(symbol, timeframe, count):
    params = {"count": count, "granularity": timeframe}
    r = instruments.InstrumentsCandles(instrument=symbol, params=params)
    client.request(r)
    candles = r.response['candles']

    df = pd.json_normalize(candles)
    df['High'] = df['mid.h'].astype('float64').dropna()
    df['Low'] = df['mid.l'].astype('float64').dropna()
    df['Open'] = df['mid.o'].astype('float64').dropna()
    df['Close'] = df['mid.c'].astype('float64').dropna()
    df['Volume'] = df['volume'].astype('float64').dropna()
    df = df[['Open' , 'Close' , 'High' , 'Low' , 'Volume']]
    return df

In [None]:
def get_returns(pair , timeframe , count):
  pdf = get_data(pair , timeframe , count)
  returns = np.diff(np.log(pdf['Close'].to_numpy()))
  returns = np.insert(returns,0,0)
  return returns

In [None]:
df = get_data('XAU_USD' , 'D' , 5000)
# df = GOOG.copy()
# df = EURUSD.copy()
# df = hist

spread = df['High'] - df['Low']
average_spread = spread.rolling(20).mean()
df['spread_ratio'] = spread / average_spread

df['range'] = spread / df['Close'] - df['Low']

average_volume = df['Volume'].rolling(20).mean()
df['volume_ratio'] = df['Volume'] / average_volume

df['return'] = df['Close'].pct_change()

df['RSI'] = TA.RSI(df,20)
df['STOCH'] = TA.STOCH(df,20)
df['ADX'] = TA.ADX(df , 20)
df['MACD'] = TA.MACD(df)['MACD']

df.fillna(0,inplace=True)

In [None]:
df.tail()

Unnamed: 0,Open,Close,High,Low,Volume,spread_ratio,range,volume_ratio,return,RSI,STOCH,ADX,MACD
4309,1889.11,1898.445,1899.99,1869.945,55516.0,1.321136,-1869.929174,1.19246,0.005168,62.003997,81.22352,32.941307,21.071038
4310,1898.47,1877.82,1903.12,1874.54,47970.0,1.250859,-1874.52478,1.018423,-0.010864,56.14446,47.992493,31.690343,18.529681
4311,1876.845,1866.16,1878.08,1844.63,54296.0,1.442009,-1844.612075,1.138732,-0.006209,53.155156,29.909009,30.557615,15.397283
4312,1866.16,1858.82,1869.205,1851.665,44211.0,0.746818,-1851.655564,0.925573,-0.003933,51.343715,19.71244,29.481523,12.18213
4313,1859.48,1857.865,1860.855,1853.265,5027.0,0.345471,-1853.260915,0.114658,-0.000514,51.105194,18.385775,28.459236,9.448125


# **FOREX ENV**


In [None]:
from gym import Env
from gym.spaces import Discrete , Box
import numpy as np

In [None]:
class ForexEnv(Env):
  def __init__(self , df , capital , leverage , order_volume , frame_bound):
    self.df = df
    self.init_capital = capital
    self.capital = capital
    self.prev_capital = capital
    self.leverage = leverage
    self.order_volume = order_volume
    self.frame_bound = frame_bound
    self.returns , self.prices, self.signal_features = self.process_data()
    self.shape = (self.signal_features.shape[1],)
 
    # actions 0=buy 1=sell 2=hold
    self.action_space = Discrete(2)
    self.observation_space = Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

    self.open_position = False
    self.order = None
    self.ordered_at = None
    self.ordered_at_index = None
    self.closed_at = None
    self.total_reward = 0
    self.reward = 0
    self.total_trades = 0
    self.reward_index = 0

    self.end_tick = len(self.prices) - 30
    self.current_tick = None
    self.done = None

  def step(self , action):
    self.done = False
    self.current_tick += 1
    obs = self.signal_features[self.current_tick]

    if self.current_tick == self.end_tick:
      self.done = True
    elif self.capital <= 2*(self.init_capital / 3):
      self.done = True
    elif self.capital <  0.8 * self.prev_capital:
      self.done = True
    self.prev_capital = self.capital


    if self.ordered_at_index:
      if self.current_tick - self.ordered_at_index > 10:
        self.close(self.prices[self.current_tick])

    if not self.done:
      if action == 1:
        if self.order == 0:
          self.close(self.prices[self.current_tick])
          self.calculate_reward()
        self.sell(self.prices[self.current_tick])
      elif action == 0:
        if self.order == 1:
          self.close(self.prices[self.current_tick])
          self.calculate_reward()
        self.buy(self.prices[self.current_tick])

    # reward = self.reward/1000
    # reward *= self.capital
    # self.total_reward += self.reward


    info = {'initial capital' : self.init_capital , 'final capital': self.capital , 'total profit': (self.capital - self.init_capital) , 'total trades': self.total_trades}
    return obs , self.reward , self.done , info

  def render(self):
    pass

  def reset(self):
    print(self.capital , self.total_trades , self.total_reward , self.reward)
    self.done = False
    self.current_tick = 0
    self.capital = self.init_capital
    self.prev_capital = self.init_capital
    self.open_position = False
    self.order = None
    self.ordered_at = None
    self.ordered_at_index = None
    self.closed_at = None
    self.total_reward = 0
    self.reward = 0
    self.total_trades = 0
    self.reward_index = 0

    return self.signal_features[self.current_tick]


  def close(self , close_price):
      self.open_position = False
      self.closed_at = close_price

      order_amount = (self.capital * self.order_volume) / 100
      order_amount = order_amount * self.leverage

      #buy
      if self.order == 0:
          change = np.sum([self.returns[i] for i in range(self.ordered_at_index , self.current_tick) if self.returns[i]])
          profit = order_amount * change
          comission = order_amount / 10000
          self.capital = self.capital + profit - comission

      #sell
      elif self.order == 1:
          change = np.sum([self.returns[i] for i in range(self.ordered_at_index , self.current_tick) if self.returns[i]])
          change = -change
          profit = order_amount * change
          comission = order_amount / 10000
          self.capital = self.capital + profit - comission
  
  def buy(self , price):
      if self.open_position:
          pass
      else:
          self.total_trades += 1
          self.open_position = True
          self.order = 0
          self.ordered_at = price
          self.ordered_at_index = self.current_tick
  
  def sell(self , price):
      if self.open_position:
          pass
      else:
          self.total_trades += 1
          self.open_position = True
          self.order = 1
          self.ordered_at = price
          self.ordered_at_index = self.current_tick
  
  def calculate_reward(self):
    step_reward = 0
    if self.order == 0:
      if self.prices[self.current_tick] > self.ordered_at:
        step_reward = self.capital - self.init_capital
    elif self.order == 1:
      if self.prices[self.current_tick] < self.ordered_at:
        step_reward = self.capital - self.init_capital

    self.total_reward += step_reward

    
    # step_reward = 100*self.total_reward / self.total_trades
    self.reward = step_reward

  def process_data(self):
    start = self.frame_bound[0]
    end = self.frame_bound[1]
    prices = self.df.loc[:, 'Close'].to_numpy()[start:end]
    returns = np.diff(np.log(prices))
    signal_features = self.df.loc[:, ['spread_ratio' , 'range' , 'volume_ratio' , 'return'  , 'RSI']].to_numpy()[start:end]
    return returns , prices, signal_features


#**Create a Deep Learning Model with Keras**

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten , PReLU , Activation
from tensorflow.keras.optimizers import Adam

In [None]:
env = ForexEnv(df=df,frame_bound=(20,df.shape[0]) , capital=100 , leverage=50  , order_volume=1)

In [None]:
NODES = 16

In [None]:
def build_model(env):
  model = Sequential()
  model.add(Flatten(input_shape=(1,env.observation_space.shape[0])))
  model.add(Dense(24, activation='relu'))
  model.add(Dense(24, activation='relu'))
  model.add(Dense(env.action_space.n, activation='linear'))
  return model

In [None]:
model = build_model(env)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 5)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                144       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 50        
Total params: 794
Trainable params: 794
Non-trainable params: 0
_________________________________________________________________


In [None]:
# env = ForexEnv(df=df, frame_bound=(50,5000) , capital=100 , leverage=50  , order_volume=1)
# obs = env.reset()
# while True: 
#     obs = obs[np.newaxis, ...]
    
#     action , _ = model.predict(obs)
#     action = action[0]
#     # print(action)
#     obs, rewards, done, info = env.step(action)
#     if done:
#         print(info)
#         break

#**Build Agent with Keras-RL**

In [None]:
from rl.agents import DQNAgent
from rl.policy import LinearAnnealedPolicy , EpsGreedyQPolicy , BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
def build_agent(env):
  nb_actions = env.action_space.n

  policy = BoltzmannQPolicy()
  memory = SequentialMemory(limit=50000, window_length=1)
  dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                nb_actions=nb_actions, nb_steps_warmup=10, target_model_update=1e-2)
    # memory = SequentialMemory(limit=100000, window_length=1)

    # policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps',
    #                               value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=2000)
    # nb_actions = env.action_space.n  # set up number of actions (outputs)

    # # set up keras-rl agent
    # dqn = DQNAgent(model=model, gamma=0.99, nb_actions=nb_actions, memory=memory,
    #                batch_size=64, nb_steps_warmup=1000,
    #                target_model_update=1e-2, policy=policy, delta_clip=1)

  return dqn

In [None]:
dqn = build_agent(env)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=100000, visualize=False, verbose=1)

Training for 100000 steps ...
97.050226240494 17 0 0
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 13:27 - reward: 0.0000e+00



 1318/10000 [==>...........................] - ETA: 1:28 - reward: 0.0000e+0066.1981558028443 121 0 0
7 episodes - episode_reward: 0.000 [0.000, 0.000] - loss: 0.045 - mae: 15.957 - mean_q: 4.077 - initial capital: 100.000 - final capital: 83.557 - total profit: -16.443 - total trades: 58.638

Interval 2 (10000 steps performed)
  576/10000 [>.............................] - ETA: 1:32 - reward: 0.0000e+0066.1981558028443 121 0 0
 1896/10000 [====>.........................] - ETA: 1:20 - reward: 0.0000e+0066.1981558028443 121 0 0
8 episodes - episode_reward: 0.000 [0.000, 0.000] - loss: 0.021 - mae: 18.263 - mean_q: 1.449 - initial capital: 100.000 - final capital: 82.770 - total profit: -17.230 - total trades: 61.655

Interval 3 (20000 steps performed)
 1152/10000 [==>...........................] - ETA: 1:26 - reward: 0.0000e+0066.1981558028443 121 0 0
7 episodes - episode_reward: 0.000 [0.000, 0.000] - loss: 0.016 - mae: 17.775 - mean_q: 0.512 - initial capital: 100.000 - final capital

<tensorflow.python.keras.callbacks.History at 0x7f1b814a00d0>

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
192.6899098954048 334 2979.1378027809565 39.49658074198763
Episode 1: reward: 0.000, steps: 4264
160.2086874281787 388 0 0
Episode 2: reward: 0.000, steps: 4264
160.2086874281787 388 0 0


KeyboardInterrupt: ignored

In [None]:
dqn.predict()

AttributeError: ignored

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=False)

Testing for 15 episodes ...
79.15717396707971 79 0 0
Episode 1: reward: 0.000, steps: 1322
66.1981558028443 121 0 0
Episode 2: reward: 0.000, steps: 1322
66.1981558028443 121 0 0
Episode 3: reward: 0.000, steps: 1322
66.1981558028443 121 0 0


KeyboardInterrupt: ignored

#**save model**

In [None]:
dqn.save_weights('DQN_FOREX.h5f', overwrite=True)

#**Forex Env 2**


In [None]:
from gym import Env
from gym.spaces import Discrete , Box
from gym.utils import seeding
import numpy as np

In [None]:
# position constant
LONG = 0
SHORT = 1
FLAT = 2

# action constant
BUY = 0
SELL = 1
HOLD = 2

class ForexEnv2(Env):

    def __init__(self, df , frame_bound, show_trade=True):
        self.show_trade = show_trade
        self.df = df
        self.features = df.loc[:, ['spread_ratio' , 'range' , 'volume_ratio' , 'price_rate_of_change' , 'RSI']].to_numpy()[frame_bound[0]:frame_bound[1]]
        self.frame_bound = frame_bound
        self.actions = ["LONG", "SHORT", "FLAT"]
        self.fee = 0.0005
        self.seed()

        self.prices = self.process_data()
        self.shape = (self.features.shape[1] + 1,)

        # defines action space
        self.action_space = Discrete(len(self.actions))
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float32)

    def render(self, mode='human', verbose=False):
        return None

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):

        if self.done:
            return self.state, self.reward, self.done, {}
        self.reward = 0

        # action comes from the agent
        # 0 buy, 1 sell, 2 hold
        # single position can be opened per trade
        # valid action sequence would be
        # LONG : buy - hold - hold - sell
        # SHORT : sell - hold - hold - buy
        # invalid action sequence is just considered hold
        # (e.g.) "buy - buy" would be considred "buy - hold"
        self.action = HOLD  # hold
        if action == BUY: # buy
            if self.position == FLAT: # if previous position was flat
                self.position = LONG # update position to long
                self.action = BUY # record action as buy
                self.entry_price = self.closingPrice # maintain entry price
            elif self.position == SHORT: # if previous position was short
                self.position = FLAT  # update position to flat
                self.action = BUY # record action as buy
                self.exit_price = self.closingPrice
                self.reward += ((self.entry_price - self.exit_price)/self.exit_price + 1)*(1-self.fee)**2 - 1 # calculate reward
                self.krw_balance = self.krw_balance * (1.0 + self.reward) # evaluate cumulative return in krw-won
                self.entry_price = 0 # clear entry price
                self.n_short += 1 # record number of short
        elif action == SELL: # vice versa for short trade
            if self.position == FLAT:
                self.position = SHORT
                self.action = 1
                self.entry_price = self.closingPrice
            elif self.position == LONG:
                self.position = FLAT
                self.action = 1
                self.exit_price = self.closingPrice
                self.reward += ((self.exit_price - self.entry_price)/self.entry_price + 1)*(1-self.fee)**2 - 1
                self.krw_balance = self.krw_balance * (1.0 + self.reward)
                self.entry_price = 0
                self.n_long += 1

        self.current_tick += 1
        if(self.show_trade and self.current_tick%100 == 0):
            print("Tick: {0}/ Portfolio (USD): {1}".format(self.current_tick, self.portfolio))
            print("Long: {0}/ Short: {1}".format(self.n_long, self.n_short))
        self.history.append((self.action, self.current_tick, self.closingPrice, self.krw_balance, self.reward))
        self.updateState()
        if (self.current_tick > (self.prices.shape[0]) -1 -1):
            self.done = True
            self.reward = self.get_profit() # return reward at end of the game
        return self.state, self.reward, self.done, {'portfolio':np.array([self.krw_balance]),
                                                    "history":self.history,
                                                    "n_trades":{'long':self.n_long, 'short':self.n_short}}

    def get_profit(self):
        if(self.position == LONG):
            profit = ((self.closingPrice - self.entry_price)/self.entry_price + 1)*(1-self.fee)**2 - 1
        elif(self.position == SHORT):
            profit = ((self.entry_price - self.closingPrice)/self.closingPrice + 1)*(1-self.fee)**2 - 1
        else:
            profit = 0
        return profit

    def reset(self):
        try:
          print(self.krw_balance , self.n_long, self.n_short)
        except:
          print('oops')

        # self.current_tick = random.randint(0, self.df.shape[0]-1000)
        self.current_tick = 0

        # positions
        self.n_long = 0
        self.n_short = 0

        # clear internal variables
        self.history = [] # keep buy, sell, hold action history
        self.krw_balance = 1000 # initial balance, u can change it to whatever u like
        self.profit = 0

        self.action = HOLD
        self.position = FLAT
        self.done = False
        
        self.updateState() # returns observed_features +  opened position(LONG/SHORT/FLAT) + profit_earned(during opened position)
        return self.state


    def updateState(self):
        self.closingPrice = float(self.prices[self.current_tick])
        profit = self.get_profit()
        # append two
        self.state = np.concatenate((self.features[self.current_tick], [profit]))
        return self.state
  
    def process_data(self):
      start = self.frame_bound[0]
      end = self.frame_bound[1]
      prices = self.df.loc[:, 'Close'].to_numpy()[start:end]
      
      return prices

#**RL Model 2**

In [None]:
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import A2C , DQN

In [None]:
env = ForexEnv2(df=df,frame_bound=(20,df.shape[0]) , show_trade = False)
env_maker = lambda: env
env = DummyVecEnv([env_maker])

In [None]:
model = A2C('MlpLstmPolicy', env, verbose=1)
model.learn(total_timesteps=50000)

oops
---------------------------------
| explained_variance | 0.351    |
| fps                | 16       |
| nupdates           | 1        |
| policy_entropy     | 1.1      |
| total_timesteps    | 5        |
| value_loss         | 0.000745 |
---------------------------------
---------------------------------
| explained_variance | -0.0611  |
| fps                | 206      |
| nupdates           | 100      |
| policy_entropy     | 1.1      |
| total_timesteps    | 500      |
| value_loss         | 1.89e-05 |
---------------------------------
---------------------------------
| explained_variance | -1.19    |
| fps                | 169      |
| nupdates           | 200      |
| policy_entropy     | 1.1      |
| total_timesteps    | 1000     |
| value_loss         | 0.000379 |
---------------------------------
---------------------------------
| explained_variance | -0.0202  |
| fps                | 140      |
| nupdates           | 300      |
| policy_entropy     | 1.1      |
| total_t

<stable_baselines.a2c.a2c.A2C at 0x7ff17fc7dd50>

In [None]:
model.save("A2C_FOREX")