<a href="https://colab.research.google.com/github/keeprich/AI-Trading-Bot/blob/main/RL_Trading.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install stable-baselines3 tensorflow oandapyV20 matplotlib seaborn


Collecting stable-baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting oandapyV20
  Downloading oandapyV20-0.7.2.tar.gz (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.6/51.6 kB[0m [31m965.7 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gymnasium<0.30,>=0.28.1 (from stable-baselines3)
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium<0.30,>=0.28.1->stable-baselines3)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading stable_baselines3-2.3.2-py3-none-any.whl (182 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.3/182.3 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m15.7 MB/s[0m eta [36m0:00

In [None]:
import oandapyV20
from oandapyV20 import API
from oandapyV20.endpoints.instruments import InstrumentsCandles
import pandas as pd

API_KEY = "7881b59039beb34197d5c0d3d16f0a7a-fa4ffbfebacce78af23ec0227bef49bd"
ACCOUNT_ID = "101-004-30239848-002"
api = API(access_token=API_KEY)

def fetch_price_data(instrument, granularity="H1", count=500):
    params = {"granularity": granularity, "count": count}
    candles = InstrumentsCandles(instrument=instrument, params=params)
    data = api.request(candles)
    prices = []
    for candle in data["candles"]:
        prices.append({
            "open": float(candle["mid"]["o"]),
            "high": float(candle["mid"]["h"]),
            "low": float(candle["mid"]["l"]),
            "close": float(candle["mid"]["c"])
        })
    df = pd.DataFrame(prices)
    return df


In [None]:
import gym
from gym import spaces
import numpy as np

class TradingEnv(gym.Env):
    def __init__(self, data, initial_balance=1000):
        super(TradingEnv, self).__init__()
        self.data = data
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 0: no position, 1: long, -1: short
        self.current_step = 0
        self.total_profit = 0

        self.action_space = spaces.Discrete(3)  # Buy, Sell, Hold

        # Define the observation space with only numeric columns
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.data.shape[1],), dtype=np.float32)

    def reset(self):
        self.balance = self.initial_balance
        self.position = 0
        self.current_step = 0
        self.total_profit = 0
        return self.data.iloc[self.current_step].values

    def step(self, action):
        reward = 0
        done = False
        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done = True

        current_price = self.data.iloc[self.current_step]['close']

        if action == 1:  # Buy
            if self.position == 0:
                self.position = 1
                self.entry_price = current_price

        elif action == 2:  # Sell
            if self.position == 1:
                profit = current_price - self.entry_price
                reward += profit
                self.total_profit += profit
                self.position = 0

        next_state = self.data.iloc[self.current_step].values
        return next_state, reward, done, {}


In [None]:
!pip install gymnasium>=0.28.1

In [None]:
!pip install shimmy>=0.2.1

In [None]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Wrap the environment
env = make_vec_env(lambda: TradingEnv(df), n_envs=1)

# Initialize and train PPO agent
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)


Using cpu device


TypeError: float() argument must be a string or a real number, not 'Timestamp'

In [None]:
from oandapyV20.endpoints.orders import OrderCreate

def place_order(instrument, action, lot_size=1000):
    if action == 1:  # Buy
        units = str(lot_size)
    elif action == 2:  # Sell
        units = str(-lot_size)
    else:  # Hold (No action)
        return

    order_data = {
        "order": {
            "units": units,
            "instrument": instrument,
            "timeInForce": "FOK",
            "type": "MARKET",
            "positionFill": "DEFAULT"
        }
    }
    try:
        response = api.request(OrderCreate(ACCOUNT_ID, data=order_data))
        print("Order placed:", response)
    except oandapyV20.exceptions.V20Error as e:
        print("Error placing order:", e)


In [None]:
obs = env.reset()
for _ in range(len(df)):
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)

    # Place order based on action
    place_order("EUR_USD", action)

    if done:
        print("Total Profit:", env.total_profit)
        break


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot price with buy/sell signals
def plot_trading_signals(df, actions):
    df['Action'] = actions
    plt.figure(figsize=(14, 8))
    plt.plot(df['close'], label="Close Price", color="blue", alpha=0.5)

    # Plot buy and sell signals
    buy_signals = df[df['Action'] == 1]
    sell_signals = df[df['Action'] == 2]

    plt.scatter(buy_signals.index, buy_signals['close'], marker='^', color='green', label="Buy Signal", alpha=1)
    plt.scatter(sell_signals.index, sell_signals['close'], marker='v', color='red', label="Sell Signal", alpha=1)

    plt.title("Price with Trading Signals")
    plt.xlabel("Time")
    plt.ylabel("Price")
    plt.legend()
    plt.show()

# Assuming actions array is generated from trading
actions = [model.predict(env.reset())[0] for _ in range(len(df))]
plot_trading_signals(df, actions)


In [None]:
cumulative_profit = [env.total_profit]
plt.figure(figsize=(12, 6))
plt.plot(cumulative_profit, label="Cumulative Profit")
plt.title("Cumulative Profit Over Time")
plt.xlabel("Episodes")
plt.ylabel("Cumulative Profit")
plt.legend()
plt.show()
