In [1]:
import json
import numpy as np
import torch
import pandas as pd
import gym
from gym import spaces
from stable_baselines3 import PPO, DQN, SAC
from stable_baselines3.common.env_util import make_vec_env

In [2]:
def load_trade_data(file_path):
    """
    Reads JSON lines from a file and returns a processed DataFrame.
    """
    data = []
    with open(file_path, "r") as f:
        for line in f:
            try:
                trade = json.loads(line.strip())
                data.append(trade)
            except json.JSONDecodeError:
                continue  # Skip malformed lines

    df = pd.DataFrame(data)

    # Convert timestamps to datetime
    df["TIME"] = pd.to_datetime(df["TIME"], unit="ms")
    df["WINDOW_START"] = pd.to_datetime(df["WINDOW_START"], unit="ms")
    df["WINDOW_END"] = pd.to_datetime(df["WINDOW_END"], unit="ms")

    # Encode SIDE as +1 (BUY) and -1 (SELL)
    df["SIDE"] = df["SIDE"].map({"BUY": 1, "SELL": -1})

    # Compute price momentum (difference from avg price)
    df["PRICE_MOMENTUM"] = df["PRICE"] - df["AVG_PRICE_100S"]

    # Compute volume change (difference from previous trade)
    df["VOLUME_CHANGE"] = df["VOLUME_100S"].diff().fillna(0)

    return df

In [5]:
class TradingEnv(gym.Env):
    """
    A custom trading environment for reinforcement learning.
    """

    def __init__(self, trade_data):
        super(TradingEnv, self).__init__()

        self.seedValue = 42
        self.trade_data = trade_data
        self.current_step = 0
        self.initial_balance = 100000
        self.balance = self.initial_balance
        self.position = 0  # Position in BTC
        self.last_price = 0  # Last trade price

        # Define action space: 0 (Hold), 1 (Buy), 2 (Sell)
        self.action_space = spaces.Discrete(3)

        # Define state space (features)
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32
        )

    def seed(self, seed):
        """
        Sets the seed for the environment.
        """
        self.seedValue = seed

    def reset(self):
        """
        Resets the environment to start a new episode.
        """
        self.current_step = 0
        self.balance = self.initial_balance
        self.position = 0
        self.last_price = self.trade_data.iloc[0]["PRICE"]
        return self._next_observation()

    def _next_observation(self):
        """
        Retrieves the next state from the data.
        """
        row = self.trade_data.iloc[self.current_step]
        obs = np.array([
            row["PRICE"],
            row["SIDE"],
            row["VOLUME_100S"],
            row["PRICE_MOMENTUM"],
            row["VOLUME_CHANGE"]
        ], dtype=np.float32)
        return obs

    def step(self, action):
        """
        Executes an action and returns the new state, reward, and done flag.
        """
        row = self.trade_data.iloc[self.current_step]
        current_price = row["PRICE"]

        reward = 0
        if action == 1 and self.balance > current_price:  # Buy
            self.position += 1
            self.balance -= current_price
        elif action == 2 and self.position > 0:  # Sell
            self.position -= 1
            self.balance += current_price
            reward = current_price - self.last_price  # Profit/loss

        # Move to next step
        self.last_price = current_price
        self.current_step += 1

        # Check if the episode is done
        done = self.current_step >= len(self.trade_data) - 1

        return self._next_observation(), reward, done, {}

    def render(self, mode="human"):
        """
        Renders the environment (useful for debugging).
        """
        print(f"Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}")

In [6]:
file_path = "backtest.txt"  
trade_data = load_trade_data(file_path)

In [9]:
# Create Gym environment
env = TradingEnv(trade_data)
vec_env = make_vec_env(lambda: env, n_envs=1)

# Choose an RL model (PPO, DQN, or SAC)
model = DQN("MlpPolicy", vec_env, verbose=1, device="cuda")

# Train the model
model.learn(total_timesteps=100000)

# Save the trained model
model.save("model")



Using cuda device


In [12]:
# Test the trained model
# obs = torch.tensor(env.reset(), device="cuda", dtype=torch.float32)
obs = torch.tensor(env.reset(), dtype=torch.float32)
total_steps = 100000
for stp in range(total_steps):
    # action, _states = model.predict(obs.to("cuda"))
    action, _states = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    env.render()
    if done:
        break

Step: 1, Balance: 100000, Position: 0
Step: 2, Balance: 100000, Position: 0
Step: 3, Balance: 100000, Position: 0
Step: 4, Balance: 100000, Position: 0
Step: 5, Balance: 100000, Position: 0
Step: 6, Balance: 3079.6300000000047, Position: 1
Step: 7, Balance: 3079.6300000000047, Position: 1
Step: 8, Balance: 3079.6300000000047, Position: 1
Step: 9, Balance: 3079.6300000000047, Position: 1
Step: 10, Balance: 3079.6300000000047, Position: 1
Step: 11, Balance: 3079.6300000000047, Position: 1
Step: 12, Balance: 3079.6300000000047, Position: 1
Step: 13, Balance: 3079.6300000000047, Position: 1
Step: 14, Balance: 3079.6300000000047, Position: 1
Step: 15, Balance: 3079.6300000000047, Position: 1
Step: 16, Balance: 3079.6300000000047, Position: 1
Step: 17, Balance: 3079.6300000000047, Position: 1
Step: 18, Balance: 3079.6300000000047, Position: 1
Step: 19, Balance: 3079.6300000000047, Position: 1
Step: 20, Balance: 3079.6300000000047, Position: 1
Step: 21, Balance: 3079.6300000000047, Position: 

KeyboardInterrupt: 