In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os
import sys
import logging
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pytz

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager
import pandas as pd
from datetime import datetime, timedelta
import pytz


project_dir = Path.cwd() / "forex_models"  # Create in current working directory
project_dir.mkdir(exist_ok=True)

# 1. Load and prepare data
dataset_manager = DatasetManager()

pair = "EUR_USD"
parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
df = pd.read_parquet(parquet_path)


train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

print(f"Data splits:")
print(f"Training: {len(train_df)} samples")
print(f"Validation: {len(val_df)} samples")
print(f"Test: {len(test_df)} samples")

# 3. Initialize model manager
model_manager = ModelManager(
    base_path=str(project_dir),
    n_envs=1,
    verbose=0
    )



# 5. Train model

print("\nStarting model training...")
model, metrics = model_manager.train_model(
    df=train_df,
    pair=pair,
    total_timesteps=1_000_000,  # Start with smaller number for testing
    eval_freq=50_000
)

# 6. Evaluate on validation set
print("\nEvaluating on validation set...")
val_metrics = model_manager.evaluate_model(
    model=model,
    df=val_df,
    pair=pair,
    n_evaluations=5
)

print("\nValidation Metrics:")
print(f"Total PnL: {val_metrics.total_pnl:.2f}")
print(f"Win Rate: {val_metrics.win_rate:.2%}")
print(f"Sharpe Ratio: {val_metrics.sharpe_ratio:.2f}")
print(f"Max Drawdown: {val_metrics.max_drawdown:.2%}")
    

    




Dataset split sizes:
Training: 101768 samples (70.0%)
Validation: 21808 samples (15.0%)
Test: 21808 samples (15.0%)
Data splits:
Training: 101768 samples
Validation: 21808 samples
Test: 21808 samples

Starting model training...




Output()

In [4]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env import ForexTradingEnv

Simple Training mainly to check if ENV works

In [None]:
import os
import sys

import pandas as pd

from datetime import datetime, timedelta
from pathlib import Path


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from trading.environments.forex_env2 import ForexTradingEnv
from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor
from data_management.dataset_manager import DatasetManager

pair = "EUR_USD"
# parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
# parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
df = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/normalized/eur_norm_robut.parquet')

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



saving_path = f'./logs/20nov/'
os.makedirs(saving_path, exist_ok=True)

def make_train_env():
    env = ForexTradingEnv(
        df=train_df,
        pair='EUR_USD',

    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=True)
    return env

def make_eval_env():
    env = ForexTradingEnv(

        df=val_df,
        pair='EUR_USD',
        # resample_interval='1h'
    )
    env = Monitor(env)
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=False)
    env.training = False
    return env

train_env = make_train_env()
eval_env = make_eval_env()
eval_callback = EvalCallback(
    eval_env,
    best_model_save_path=saving_path,
    log_path=saving_path,
    eval_freq=100_000,  # Adjust as needed
    deterministic=True,
    render=False
)

model = PPO(
    'MultiInputPolicy',
    train_env,
    verbose=0,
    tensorboard_log=f'{saving_path}tensorboard/',
)

model.learn(
    total_timesteps=500_000,  # Adjust as needed
    callback=eval_callback
)

model.save(f'{saving_path}best_model.zip')
train_env.save(f'{saving_path}vec_normalize.pkl')

Dataset split sizes:
Training: 101768 samples (70.0%)
Validation: 21808 samples (15.0%)
Test: 21808 samples (15.0%)

Episode Summary:
Final Return: -5.36%
Total PnL: -53590.60
Total Trades: 3308
Winning Trades: 1218
Win Rate: 36.82%
Initial Balance: 1000000.00
Final Balance: 946409.40
--------------------------------------------------

Episode Summary:
Final Return: -6.97%
Total PnL: -69664.18
Total Trades: 12919
Winning Trades: 5147
Win Rate: 39.84%
Initial Balance: 1000000.00
Final Balance: 930335.82
--------------------------------------------------

Episode Summary:
Final Return: -8.76%
Total PnL: -87585.20
Total Trades: 14464
Winning Trades: 5842
Win Rate: 40.39%
Initial Balance: 1000000.00
Final Balance: 912414.80
--------------------------------------------------

Episode Summary:
Final Return: -0.19%
Total PnL: -1928.39
Total Trades: 373
Winning Trades: 151
Win Rate: 40.48%
Initial Balance: 1000000.00
Final Balance: 998071.61
--------------------------------------------------



In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager
from trading.agents.evaluate_model import ModelEvaluator, TradeLedger
from pathlib import Path
from trading.environments.forex_env import ForexTradingEnv
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
from stable_baselines3 import PPO
from typing import Dict, List, Tuple
import pandas as pd



eval_path = Path("model_evaluations")
eval_path.mkdir(exist_ok=True)
evaluator = ModelEvaluator(base_path=eval_path)


dataset_manager = DatasetManager()
pair = "EUR_USD"
df = dataset_manager.load_parquet_dataset(pair)
train_df, val_df, test_df = dataset_manager.split_dataset(df)

def load_model_for_evaluation(model_path: Path, env_path: Path) -> Tuple[PPO, VecNormalize]:
    """
    Load a trained model and its normalization parameters.
    
    Args:
        model_path: Path to the saved model
        env_path: Path to the saved VecNormalize stats
    """
    # Create a dummy environment (required for loading)
    def make_env():
        def _init():
            env = ForexTradingEnv(
                df=test_df,  # Empty DataFrame for now
                pair="EUR_USD"
            )
            return env
        return _init
    
    # Create vectorized environment
    vec_env = DummyVecEnv([make_env()])
    
    # Load the saved normalization statistics
    vec_env = VecNormalize.load(
        env_path,
        vec_env
    )
    
    # Don't update normalization statistics during evaluation
    vec_env.training = False
    vec_env.norm_reward = False
    
    # Load the model
    model = PPO.load(model_path)
    
    # Set the correct environment
    model.set_env(vec_env)
    
    return model, vec_env

model, vec_env = load_model_for_evaluation(
    model_path= Path('/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/logs/20nov/best_model'), 
    env_path= Path('/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/logs/20nov/vec_normalize.pkl')
    )


Dataset split sizes:
Training: 101768 samples (70.0%)
Validation: 21808 samples (15.0%)
Test: 21808 samples (15.0%)


In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os
import sys
import logging
from pathlib import Path
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import pytz

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager
import pandas as pd
from datetime import datetime, timedelta
import pytz
from trading.agents.evaluate_model import ModelEvaluator, TradeLedger
from pathlib import Path
from datetime import datetime

# Create evaluation directory
result_dir = Path("model_evaluation") / f"EUR_USD_eval_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
result_dir.mkdir(parents=True)

# Initialize evaluator 
evaluator = ModelEvaluator(base_path=Path("model_evaluation"))

# Evaluate model
metrics, ledger = evaluator._evaluate_single_dataset(
    model=model,
    df=test_df,
    pair="EUR_USD",
    save_dir=result_dir
)

# Get trade analysis
trades_df = ledger.to_dataframe()
trades_df.to_csv(result_dir / "trades.csv")

print(f"\nEvaluation Results:")
print(f"Total Trades: {len(trades_df)}")
print(f"Win Rate: {metrics['win_rate']:.2%}")
print(f"Total PnL: {metrics['total_pnl']:.2f}")


Episode Summary:
Final Return: -36.57%
Total PnL: -365657.64
Total Trades: 19809
Winning Trades: 6775
Win Rate: 34.20%
Initial Balance: 1000000.00
Final Balance: 634342.36
--------------------------------------------------
Total trades recorded: 19809
Ledger metrics: {'total_trades': 19809, 'winning_trades': 6775, 'losing_trades': 11938, 'win_rate': 0.3420162552375183, 'total_pnl': -365657.6364306434, 'average_pnl': -18.459166865093817, 'max_drawdown': 366323.447455183, 'avg_trade_duration': Timedelta('0 days 01:33:09.095865515'), 'best_trade': 1641.1270390509203, 'worst_trade': -2871.1286830462177, 'long_trades': 9904, 'short_trades': 9905, 'profit_factor': 0.5634835058529779}

Evaluation Results:
Total Trades: 19809
Win Rate: 34.20%
Total PnL: -365657.64


In [3]:
metrics

{'total_trades': 19809,
 'winning_trades': 6775,
 'losing_trades': 11938,
 'win_rate': 0.3420162552375183,
 'total_pnl': -365657.6364306434,
 'average_pnl': -18.459166865093817,
 'max_drawdown': 366323.447455183,
 'avg_trade_duration': Timedelta('0 days 01:33:09.095865515'),
 'best_trade': 1641.1270390509203,
 'worst_trade': -2871.1286830462177,
 'long_trades': 9904,
 'short_trades': 9905,
 'profit_factor': 0.5634835058529779}

Try Out reward function

In [None]:
# Monitor reward distributions
#! env not unwraped
rewards = []
realized_pnls = []
unrealized_pnls = []

for episode in range(10):
    obs = train_env.reset()
    done = False
    
    while not done:
        action = model.predict(obs, deterministic=True)[0]
        obs, reward, done, info = env.step(action)
        
        rewards.append(reward)
        if info.get('trade_closed'):
            realized_pnls.append(info['trade_pnl'])
        if info.get('unrealized_pnl'):
            unrealized_pnls.append(info['unrealized_pnl'])

print(f"Reward stats:")
print(f"Mean: {np.mean(rewards):.4f}")
print(f"Std: {np.std(rewards):.4f}")
print(f"Min: {np.min(rewards):.4f}")
print(f"Max: {np.max(rewards):.4f}")

In [None]:
# Reward distribution monitoring
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def monitor_rewards(model, env, n_episodes=10):
    """
    Monitor reward distributions and trading behavior with proper env unwrapping.
    """
    # Unwrap to get the base environment
    if hasattr(env, 'envs'):
        # VecEnv unwrapping
        base_env = env.envs[0]
        if hasattr(base_env, 'env'):
            # Possible Monitor wrapper
            base_env = base_env.env
        if hasattr(base_env, 'env'):
            # Possible other wrappers
            base_env = base_env.env
    else:
        base_env = env
        
    # Storage for metrics
    metrics = {
        'rewards': [],
        'realized_pnls': [],
        'unrealized_pnls': [],
        'trade_durations': [],
        'drawdowns': [],
        'balance_trajectory': []
    }
    
    for episode in range(n_episodes):
        obs = env.reset()
        done = False
        episode_rewards = []
        
        print(f"\nStarting episode {episode + 1}/{n_episodes}")
        
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, info = env.step(action)
            
            # Store raw reward
            metrics['rewards'].append(reward[0])  # Unwrap from vectorized form
            
            # Get info from base environment
            metrics['balance_trajectory'].append(base_env.balance)
            
            if base_env.position is not None:
                unrealized_pnl = base_env._calculate_pnl(
                    base_env.position.type,
                    base_env.position.entry_price,
                    base_env.df.iloc[base_env.current_step]['close'],
                    base_env.position.size
                )
                metrics['unrealized_pnls'].append(unrealized_pnl)
            
            # Store trade information when a trade is closed
            info = info[0] if isinstance(info, tuple) else info  # Unwrap info
            # Store available metrics from info dict
            metrics['balances'].append(info['balance'])
            metrics['total_pnl'].append(info['total_pnl'])
            metrics['unrealized_pnl'].append(info['unrealized_pnl'])
            metrics['win_rates'].append(info['win_rate'])
            metrics['drawdowns'].append(info['drawdown'])
        
        # Episode summary
        print(f"Episode {episode + 1} completed:")
        print(f"Final Balance: {info['balance']:.2f}")
        print(f"Total PnL: {info['total_pnl']:.2f}")
        print(f"Win Rate: {info['win_rate']:.2%}")
        print(f"Max Drawdown: {info['drawdown']:.2%}")

    # Create analysis plots
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Reward distribution
    sns.histplot(metrics['rewards'], bins=50, ax=axes[0, 0])
    axes[0, 0].set_title('Reward Distribution')
    axes[0, 0].set_xlabel('Reward')
    
    # PnL trajectory
    axes[0, 1].plot(metrics['total_pnl'])
    axes[0, 1].set_title('Total PnL Trajectory')
    axes[0, 1].set_xlabel('Step')
    axes[0, 1].set_ylabel('PnL')
    
    # Balance trajectory
    axes[1, 0].plot(metrics['balances'])
    axes[1, 0].set_title('Balance Trajectory')
    axes[1, 0].set_xlabel('Step')
    axes[1, 0].set_ylabel('Balance')
    
    # Win rate trajectory
    axes[1, 1].plot(metrics['win_rates'])
    axes[1, 1].set_title('Win Rate Trajectory')
    axes[1, 1].set_xlabel('Step')
    axes[1, 1].set_ylabel('Win Rate')
    
    plt.tight_layout()
    plt.show()

    # Print statistics
    print("\nReward Statistics:")
    print(f"Mean Reward: {np.mean(metrics['rewards']):.4f}")
    print(f"Std Reward: {np.std(metrics['rewards']):.4f}")
    print(f"Min Reward: {np.min(metrics['rewards']):.4f}")
    print(f"Max Reward: {np.max(metrics['rewards']):.4f}")
    
    print("\nTrading Statistics:")
    print(f"Final Total PnL: {metrics['total_pnl'][-1]:.2f}")
    print(f"Final Win Rate: {metrics['win_rates'][-1]:.2%}")
    print(f"Max Drawdown: {max(metrics['drawdowns']):.2%}")
    
    return metrics


eval_freq = 50000
total_timesteps = 100_0000

model_dir = ''
vec_normalize_path = ''
model = PPO.load(model_dir)

# Create and normalize evaluation environment
def make_env():
    return ForexTradingEnv(
        df=test_df,  # Your test DataFrame
        pair='EUR_USD',
        initial_balance=1_000_000.0,
        trade_size=100_000.0,
        random_start=False
    )

# Create vectorized environment
env = DummyVecEnv([make_env])
env = VecNormalize(
    env,
    norm_obs=True,
    norm_reward=False,  # Disable reward normalization for evaluation

)

# Load saved normalization stats
env = VecNormalize.load(vec_normalize_path, env)
env.training = False  # Disable training mode
env.norm_reward = False  # Make sure reward normalization is disabled

# Now run the monitoring
reward_metrics = monitor_rewards(
    model=model,
    env=env,
    n_episodes=1  # Number of episodes to evaluate
)

Try the model incl using trade ledge

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os, sys
from pathlib import Path
import pandas as pd
from datetime import datetime

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)

from trading.agents.evaluate_model import ModelEvaluator
from trading.agents.trade_ledger import TradeLedger
from trading.environments.forex_env import ForexTradingEnv
from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
# Setup paths
eval_path = Path("model_evaluation")
eval_path.mkdir(exist_ok=True)

dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



saving_path = f'./logs/20nov/'

def create_eval_env(df: pd.DataFrame, pair: str) -> VecNormalize:
    """Create and normalize evaluation environment."""
    def make_env():
        return ForexTradingEnv(
            df=df,
            pair='EUR_USD',
            initial_balance=1_000_000.0,
            trade_size=100_000.0,
            random_start=False  # Important for evaluation
        )
    
    env = DummyVecEnv([make_env])
    env = VecNormalize(
        env,
        norm_obs=True,
        norm_reward=False,
     
    )
    return env

# Setup paths
# model_dir = Path(f"{saving_path}/best_model.zip")
eval_path = Path("model_evaluation")
eval_path.mkdir(exist_ok=True)

# Initialize evaluator
evaluator = ModelEvaluator(base_path=eval_path)

# Load model and normalization
pair = "EUR_USD"
model_path = f"{saving_path}/best_model.zip"
vec_normalize_path = f"{saving_path}/best_model.zp"

# Load the model
model = PPO.load(model_path)

# Create evaluation environment and load normalization stats
eval_env = create_eval_env(test_df, pair)
eval_env = VecNormalize.load(vec_normalize_path, eval_env)
# Important: don't update normalization stats during evaluation
eval_env.training = False


# Run evaluation
print("Running model evaluation...")
results, trade_ledger = evaluator.evaluate_model(
    model=model,
    env=eval_env,  # Pass the normalized environment
    df=test_df,
    pair=pair,
    version_id="v1"
)

# Save detailed results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = eval_path / f"{pair}_evaluation_20nov_{timestamp}"
results_dir.mkdir(exist_ok=True)

# Save trade log to Excel
output_file = results_dir / "trade_analysis.xlsx"
trade_ledger['test'].export_to_excel(output_file)

# Convert trade ledger to DataFrame for analysis
trades_df = trade_ledger['test'].to_dataframe()

# Print summary metrics
print("\nEvaluation Results:")
print(f"Total Trades: {len(trades_df)}")
print(f"Win Rate: {(trades_df['pnl'] > 0).mean():.2%}")
print(f"Total PnL: {trades_df['pnl'].sum():.2f}")
print(f"Average PnL per Trade: {trades_df['pnl'].mean():.2f}")
print(f"Sharpe Ratio: {results['test']['sharpe_ratio']:.2f}")
print(f"Max Drawdown: {results['test']['max_drawdown']:.2%}")

# Analyze trades by session
session_analysis = trades_df.groupby('session').agg({
    'pnl': ['count', 'mean', 'sum'],
    'duration': 'mean'
}).round(2)

print("\nSession Analysis:")
display(session_analysis)

# Analyze trades by holding period
duration_bins = [0, 1, 4, 8, 24, float('inf')]
duration_labels = ['0-1h', '1-4h', '4-8h', '8-24h', '>24h']
trades_df['duration_category'] = pd.cut(
    trades_df['duration'], 
    bins=duration_bins, 
    labels=duration_labels
)

duration_analysis = trades_df.groupby('duration_category').agg({
    'pnl': ['count', 'mean', lambda x: (x > 0).mean()],
    'duration': 'mean'
}).round(3)

print("\nDuration Analysis:")
display(duration_analysis)

# Save trades to CSV
trades_df.to_csv(results_dir / "trades.csv", index=True)

# Create visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Cumulative PnL
cumulative_pnl = trades_df['pnl'].cumsum()
axes[0, 0].plot(cumulative_pnl.index, cumulative_pnl.values)
axes[0, 0].set_title('Cumulative PnL')
axes[0, 0].set_xlabel('Trade Number')
axes[0, 0].set_ylabel('Cumulative PnL')

# PnL Distribution
sns.histplot(data=trades_df, x='pnl', bins=50, ax=axes[0, 1])
axes[0, 1].set_title('PnL Distribution')

# Win Rate by Session
session_win_rates = trades_df.groupby('session')['pnl'].apply(lambda x: (x > 0).mean())
session_win_rates.plot(kind='bar', ax=axes[1, 0])
axes[1, 0].set_title('Win Rate by Session')
axes[1, 0].set_ylabel('Win Rate')

# Trade Duration Distribution
sns.histplot(data=trades_df, x='duration', bins=50, ax=axes[1, 1])
axes[1, 1].set_title('Trade Duration Distribution')
axes[1, 1].set_xlabel('Duration (hours)')

plt.tight_layout()
plt.savefig(results_dir / "analysis_plots.png")
plt.close()

print(f"\nResults saved to: {results_dir}")
print(f"Trade log: {output_file}")
print(f"Trades CSV: {results_dir / 'trades.csv'}")
print(f"Analysis plots: {results_dir / 'analysis_plots.png'}")

# Close environment
eval_env.close()

In [None]:
metrics

In [None]:
val_metrics

In [12]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager
from trading.agents.evaluate_model import ModelEvaluator, TradeLedger
from pathlib import Path
from trading.environments.forex_env import ForexTradingEnv
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv, VecNormalize
from stable_baselines3 import PPO
from typing import Dict, List, Tuple
import pandas as pd



eval_path = Path("model_evaluations")
eval_path.mkdir(exist_ok=True)
evaluator = ModelEvaluator(base_path=eval_path)


dataset_manager = DatasetManager()
pair = "EUR_USD"
df = dataset_manager.load_parquet_dataset(pair)
train_df, val_df, test_df = dataset_manager.split_dataset(df)

def load_model_for_evaluation(model_path: Path, env_path: Path) -> Tuple[PPO, VecNormalize]:
    """
    Load a trained model and its normalization parameters.
    
    Args:
        model_path: Path to the saved model
        env_path: Path to the saved VecNormalize stats
    """
    # Create a dummy environment (required for loading)
    def make_env():
        def _init():
            env = ForexTradingEnv(
                df=test_df,  # Empty DataFrame for now
                pair="EUR_USD"
            )
            return env
        return _init
    
    # Create vectorized environment
    vec_env = DummyVecEnv([make_env()])
    
    # Load the saved normalization statistics
    vec_env = VecNormalize.load(
        env_path,
        vec_env
    )
    
    # Don't update normalization statistics during evaluation
    vec_env.training = False
    vec_env.norm_reward = False
    
    # Load the model
    model = PPO.load(model_path)
    
    # Set the correct environment
    model.set_env(vec_env)
    
    return model, vec_env

load_model_for_evaluation(
    model_path= Path('/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/forex_models/deployed/EUR_USD/model.zip'), 
    env_path= Path('/Users/floriankockler/Code/GitHub.nosync/ai6-gcp-bot/forex_trading_system/notebooks/forex_models/deployed/EUR_USD/vec_normalize.pkl')
    )


Dataset split sizes:
Training: 101768 samples (70.0%)
Validation: 21808 samples (15.0%)
Test: 21808 samples (15.0%)


AssertionError: The number of environments to be set is different from the number of environments in the model: (1 != 3), whereas `set_env` requires them to be the same. To load a model with a different number of environments, you must use `PPO.load(path, env)` instead

In [8]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from data_management.dataset_manager import DatasetManager
from trading.model_manager import ModelManager

# Import the evaluation code
from trading.agents.evaluate_model import ModelEvaluator, TradeLedger
from pathlib import Path


model_eval = ModelEvaluator(base_path=Path("model_evaluation"))
# Setup paths
base_path = Path("model_evaluation")
base_path.mkdir(exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
eval_path = base_path / f"evaluation_{timestamp}"
eval_path.mkdir(exist_ok=True)

# Load your trained model
model_manager = ModelManager()
dataset_manager = DatasetManager()
pair = "EUR_USD"

model, version = model_manager.get_deployed_model(pair)

# Load and split your data
df = dataset_manager.load_parquet_dataset(pair)
train_df, val_df, test_df = dataset_manager.split_dataset(df)

# Evaluate on test set
print("Evaluating model on test set...")
test_metrics, test_ledger = model_eval.evaluate_model(
    model=model,
    df=test_df,
    pair=pair,
    output_path=eval_path / 'test'
)

# Print key metrics
print("\nTest Set Performance:")
print(f"Total Trades: {test_metrics['total_trades']}")
print(f"Win Rate: {test_metrics['win_rate']:.2%}")
print(f"Total PnL: {test_metrics['total_pnl']:.2f}")
print(f"Sharpe Ratio: {test_metrics['sharpe_ratio']:.2f}")
print(f"Max Drawdown: {test_metrics['max_drawdown']:.2%}")

# Analyze trade patterns
test_df = test_ledger.to_dataframe()
print("\nTrade Analysis:")
print("\nAverage PnL by Hour:")
print(test_df.groupby(test_df['entry_time'].dt.hour)['pnl'].mean().round(2))

print("\nPosition Type Performance:")
print(test_df.groupby('position_type').agg({
    'pnl': ['count', 'mean', 'sum'],
    'holding_period': 'mean'
}).round(2))

# Show the plots
test_ledger.plot_analysis()

FileNotFoundError: [Errno 2] No such file or directory: 'model_evaluation/evaluation_results'