In [1]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#from src.data.data_collector import DataCollector
from src.models.model_trainer_rl_v2 import ModelTrainerRL, TradingEnvRL
from src.models.backtester import PortfolioBacktester, PortfolioBacktesterRL
from src.utils.config_loader import load_config


config = load_config("config/config.yaml")

## PPO Agent

#### NEE

In [2]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/NEE_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()





Data loaded successfully.
Training PPO Agent...



In the future `np.object` will be defined as the corresponding NumPy scalar.

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 36.93%
INFO:BacktesterRL:Buy & Hold Return: 28.02%
INFO:BacktesterRL:Outperformance: 8.91%



--- Strategy Performance ---
Total Return (%): 36.9300
Annual Return (%): 69.2500
Sharpe Ratio: 1.9843
Sortino Ratio: 2.9889
Max Drawdown (%): -12.4200
Calmar Ratio: 5.5742
Win Rate (%): 57.9500
Total Trades: 88.0000
Final Value ($): 136928.8300


### AGX

In [3]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AGX_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()





Data loaded successfully.
Training PPO Agent...


INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps
INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...


INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: -38.78%
INFO:BacktesterRL:Buy & Hold Return: 123.31%
INFO:BacktesterRL:Outperformance: -162.10%



--- Strategy Performance ---
Total Return (%): -38.7800
Annual Return (%): -56.0300
Sharpe Ratio: -0.7313
Sortino Ratio: -0.9882
Max Drawdown (%): -58.7400
Calmar Ratio: -0.9538
Win Rate (%): 34.1500
Total Trades: 123.0000
Final Value ($): 61217.2300


In [4]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/AGX_processed.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...
Training Complete.
Generating Agent Predictions on Test Data...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 726, Predictions: 665
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 502.40%
INFO:BacktesterRL:Buy & Hold Return: 635.13%
INFO:BacktesterRL:Outperformance: -132.73%


Running Backtest...

--- Strategy Performance ---
Total Return (%): 502.4000
Annual Return (%): 167.9500
Sharpe Ratio: 2.0018
Sortino Ratio: 3.3019
Max Drawdown (%): -37.9500
Calmar Ratio: 4.4261
Win Rate (%): 73.5100
Total Trades: 151.0000
Final Value ($): 602398.3700


In [5]:
# Load Data
# (Ensure your CSV has columns like 'close', 'rsi', 'sma', etc.)
try:
    data = pd.read_csv('data/processed/MDU_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")

# Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# Initialize Trainer
trainer = ModelTrainerRL(config['reinforcement_learning'])

print("Training PPO Agent...")


env_params = config['reinforcement_learning']['environment']

env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

result = trainer.train_ppo(env_train)
model = result['model'] 

print("Training Complete.")

print("Generating Agent Predictions on Test Data...")


env_params = config['reinforcement_learning']['environment']


env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'  # or 'sharpe'
)

obs, _ = env_test.reset()
done = False
actions = []

# Manual Inference Loop
while not done:
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) 
    obs, _, terminated, truncated, _ = env_test.step(action)
    done = terminated or truncated


#  RUN BACKTEST ---
print("Running Backtest...")


backtester = PortfolioBacktesterRL(env_params) 

portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30) # Important for data alignment
)
comparison = backtester.compare_with_buy_and_hold_rl()
# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
portfolio.plot().show()



INFO:src.models.model_trainer_rl_v2:Training PPO for 200000 timesteps


Data loaded successfully.
Training PPO Agent...


INFO:BacktesterRL:Preparing Backtest. Raw Prices: 279, Predictions: 218
INFO:BacktesterRL:Running vectorbt simulation...
INFO:BacktesterRL:Backtest successfully completed.
INFO:BacktesterRL:Strategy Return: 13.56%
INFO:BacktesterRL:Buy & Hold Return: 18.21%
INFO:BacktesterRL:Outperformance: -4.66%


Training Complete.
Generating Agent Predictions on Test Data...
Running Backtest...

--- Strategy Performance ---
Total Return (%): 13.5600
Annual Return (%): 23.7200
Sharpe Ratio: 1.0240
Sortino Ratio: 1.4863
Max Drawdown (%): -11.8100
Calmar Ratio: 2.0089
Win Rate (%): 63.1100
Total Trades: 103.0000
Final Value ($): 113557.5600


In [6]:
import sys, os
# Add parent directory to path if running from src/models
sys.path.insert(0, os.path.abspath('..'))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# --- NEW IMPORTS REQUIRED FOR INFERENCE ---
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
# ------------------------------------------

from src.models.model_trainer_rl_v2_2 import ModelTrainerRL, TradingEnvRL
from src.models.backtester import PortfolioBacktesterRL
from src.utils.config_loader import load_config

# 1. Load Configuration
config = load_config("config/config.yaml")

# 2. Load Data
try:
    # ensuring your path is correct
    data = pd.read_csv('data/processed/MDU_processed_2022.csv')
    if 'Date' in data.columns:
        data['Date'] = pd.to_datetime(data['Date'])
        data.set_index('Date', inplace=True)
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: Data file not found. Check path.")
    sys.exit()

# 3. Split Train/Test
split_idx = int(len(data) * 0.7)
train_df = data.iloc[:split_idx]
test_df = data.iloc[split_idx:]

# --- TRAINING PHASE ---
print("Training PPO Agent...")

# Initialize Trainer (Pass config if needed, or defaults)
trainer = ModelTrainerRL(config['reinforcement_learning'])
env_params = config['reinforcement_learning']['environment']

# Create Training Environment
env_train = TradingEnvRL(
    train_df, 
    initial_balance=env_params.get('initial_balance', 10000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit'
)

# Train and Save
# This automatically handles the VecNormalize wrapper creation
result = trainer.train_ppo(env_train) 

# IMPORTANT: Save the model and the normalization stats to disk
# We will reload them for the test phase to ensure consistency
save_path = "models/"
trainer.save_models(save_path)
print("Training Complete. Models saved.")

# --- INFERENCE PHASE (UPDATED) ---
print("Generating Agent Predictions on Test Data...")

# 1. Load the Trained Model
model = PPO.load(os.path.join(save_path, "ppo_model"))

# 2. Create the Test Environment
# Must have the same observation space (columns) as training
env_test = TradingEnvRL(
    test_df, 
    initial_balance=env_params.get('initial_balance', 100000),
    commission=env_params.get('commission', 0.001),
    lookback_window=env_params.get('lookback_window', 30),
    reward_func='profit' 
)

# 3. Wrap it in DummyVecEnv (Required for SB3)
vec_env_test = DummyVecEnv([lambda: env_test])

# 4. Load Normalization Statistics
# This applies the Training Mean/Variance to the Test Data (Crucial!)
norm_path = os.path.join(save_path, "ppo_vecnormalize.pkl")
if os.path.exists(norm_path):
    vec_env_test = VecNormalize.load(norm_path, vec_env_test)
    vec_env_test.training = False     # Do not update stats on test data
    vec_env_test.norm_reward = False  # Do not normalize rewards for testing
else:
    print("WARNING: Normalization stats not found. Model predictions may be garbage.")

# 5. Run Inference Loop
obs = vec_env_test.reset()
done = [False]  # VecEnv returns done as a list/array
actions = []

while not done[0]:
    # Predict action (obs is now correctly scaled)
    action, _ = model.predict(obs, deterministic=True)
    actions.append(action[0]) # Extract single action
    
    # Step the environment
    obs, _, done, _ = vec_env_test.step(action)

print(f"Generated {len(actions)} actions.")


# --- BACKTESTING PHASE ---
print("Running Backtest...")

backtester = PortfolioBacktesterRL(env_params) 

# Note: 'actions' corresponds to the period [lookback_window : end]
# Ensure your backtester handles this alignment. 
# Usually, passing the full price_data and the lookback_window allows 
# the backtester to slice the prices correctly to match the actions.
portfolio = backtester.run_backtest(
    price_data=test_df['close'], 
    predicted_weights=np.array(actions),
    lookback_window=env_params.get('lookback_window', 30)
)

comparison = backtester.compare_with_buy_and_hold_rl()

# Show Metrics
metrics = backtester.get_performance_metrics()
print("\n--- Strategy Performance ---")
for k, v in metrics.items():
    print(f"{k}: {v:.4f}")

# Plot
# Check if portfolio object supports plot() directly or needs .plot()
if hasattr(portfolio, 'plot'):
    try:
        portfolio.plot()
        plt.show() # Ensure plot is displayed
    except Exception as e:
        print(f"Plotting error: {e}")





Data loaded successfully.
Training PPO Agent...


TypeError: PPO.__init__() got an unexpected keyword argument 'total_timesteps'