Imports

In [1]:
import pandas as pd
import numpy as np
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.meta.preprocessor.preprocessors import FeatureEngineer
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS
from stable_baselines3.common.vec_env import DummyVecEnv

# Additional imports needed for RL with exploration strategies
from stable_baselines3 import PPO, A2C, DDPG, TD3, SAC
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt

Data Preprocessing

In [2]:
# Load and preprocess EUR/USD Forex data for FinRL

# Load CSV
df = pd.read_csv("data/eurusd_hourly.csv")

# Rename columns to FinRL-compatible format
df = df.rename(columns={
    'Gmt time': 'date',
    'Open': 'open',
    'High': 'high',
    'Low': 'low',
    'Close': 'close',
    'Volume': 'volume'
})

# Convert date to datetime object
df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y %H:%M:%S.%f')

# Add a ticker symbol (required even for one asset)
df['tic'] = 'EURUSD'

# Reorder columns
df = df[['date', 'tic', 'open', 'high', 'low', 'close', 'volume']]

# Drop any rows with missing values
df = df.dropna()

# Feature Engineering
fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=INDICATORS,  # e.g. macd, rsi_30, etc.
    use_turbulence=False,
    user_defined_feature=False
)

# Apply feature engineering
df_processed = fe.preprocess_data(df)

# Show processed head to verify
df_processed.head()


Successfully added technical indicators


Unnamed: 0,date,tic,open,high,low,close,volume,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
0,2023-01-01 22:00:00,EURUSD,1.0697,1.07066,1.06788,1.06929,2477.4199,0.0,1.070745,1.068595,100.0,66.666667,100.0,1.06929,1.06929
1,2023-01-01 23:00:00,EURUSD,1.06896,1.07047,1.06829,1.07005,4932.96,1.7e-05,1.070745,1.068595,100.0,66.666667,100.0,1.06967,1.06967
2,2023-01-02 00:00:00,EURUSD,1.07007,1.07058,1.06912,1.0704,3522.1299,3.3e-05,1.071048,1.068778,100.0,100.0,100.0,1.069913,1.069913
3,2023-01-02 01:00:00,EURUSD,1.07041,1.07087,1.06919,1.06977,432.28,1.6e-05,1.070815,1.06894,62.466736,55.724417,100.0,1.069878,1.069878
4,2023-01-02 02:00:00,EURUSD,1.06977,1.06977,1.06862,1.06904,1581.3,-2.2e-05,1.070815,1.068605,43.083297,-97.578348,21.592914,1.06971,1.06971


Data Split

In [None]:
# Chronologically split the dataset 80% train / 20% test
split_index = int(len(df_processed) * 0.8)

train_df = df_processed.iloc[:split_index]
test_df = df_processed.iloc[split_index:]

# Set up environment parameters
stock_dimension = len(train_df['tic'].unique())  # Should be 1 (EURUSD)
state_space = 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension

env_kwargs = {
    "hmax": 100,
    "initial_amount": 100000,
    "buy_cost_pct": 0.001,
    "sell_cost_pct": 0.001,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    "num_stock_shares": [0] * stock_dimension  # Initial holdings (0 for each asset)
}

# Create the training environment 
env_train = DummyVecEnv([lambda: StockTradingEnv(train_df, **env_kwargs)])

# (Optional) Print environment info
print(f"Training set length: {len(train_df)}, Testing set length: {len(test_df)}")
print("State space:", state_space)

Training set length: 10040, Testing set length: 2511
State space: 11


Training

In [None]:
from stable_baselines3 import DQN

model_dqn = DQN(
    policy="MlpPolicy",
    env=env_train,
    learning_rate=1e-4,
    buffer_size=10_000,
    learning_starts=100,
    batch_size=32,
    tau=1.0,
    gamma=0.99, 
    train_freq=(1, "step"),
    target_update_interval=250,
    exploration_fraction=0.1,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.01,
    policy_kwargs=dict(net_arch=[64, 64]),
    verbose=1,
    tensorboard_log="./dqn_eurusd_tensorboard/"
)

model_dqn.learn(total_timesteps=50_000)


Using cpu device


AssertionError: The algorithm only supports (<class 'gymnasium.spaces.discrete.Discrete'>,) as action spaces but Box(-1.0, 1.0, (1,), float32) was provided