In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, Optional, Tuple
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.policies import obs_as_tensor
from trading.environments.forex_env2_flat import ForexTradingEnv
import torch
from datetime import datetime
import logging

logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class ModelEvaluator:
    def __init__(
        self,
        model_path: str,
        vec_normalize_path: str,
        test_df: pd.DataFrame,
        pair: str,
        save_path: Optional[str] = None,
        sequence_length: int = 5
    ):
        self.pair = pair
        self.sequence_length = sequence_length
        self.test_df = test_df
        self.save_path = Path(save_path) if save_path else Path("evaluation_results")
        self.save_path.mkdir(exist_ok=True, parents=True)
        
        # Load model and normalization
        self.model = PPO.load(model_path)
        self.vec_normalize = VecNormalize.load(vec_normalize_path, None)
        
        # Create evaluation environment
        self.env = self._create_test_env()
        
    def _create_test_env(self) -> VecNormalize:
        """Creates a properly normalized test environment."""
        def make_env():
            env = ForexTradingEnv(
                df=self.test_df,
                pair=self.pair,
                sequence_length=self.sequence_length,
                random_start=False
            )
            return Monitor(env)
        
        vec_env = DummyVecEnv([make_env])
        
        test_env = VecNormalize(
            vec_env,
            training=False,
            norm_obs=self.vec_normalize.norm_obs,
            norm_reward=False,
  
            epsilon=self.vec_normalize.epsilon
        )
        
        # Copy normalization statistics
        test_env.obs_rms = self.vec_normalize.obs_rms
        test_env.ret_rms = self.vec_normalize.ret_rms
        
        return test_env
    
    def get_action_probabilities(self, observation: np.ndarray) -> Dict[str, float]:
        """Gets action probabilities using the proper SB3 method."""
        # Convert observation to tensor on correct device
        obs_tensor = obs_as_tensor(observation, self.model.policy.device)
        
        with torch.no_grad():
            # Get distribution from policy
            dist = self.model.policy.get_distribution(obs_tensor)
            # Get raw probabilities
            probs = dist.distribution.probs
            probs_np = probs.cpu().numpy()[0]
            
            # Convert to dictionary
            action_map = {0: 'NO_POSITION', 1: 'LONG', 2: 'SHORT'}
            prob_dict = {action_map[i]: float(p) for i, p in enumerate(probs_np)}
            
            # Calculate entropy for uncertainty measure
            entropy = -np.sum(probs_np * np.log(probs_np + 1e-10))
            prob_dict['entropy'] = float(entropy)
            
            return prob_dict
    
    def evaluate(self, n_episodes: int = 1) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """Runs evaluation and returns both step and trade data."""
        all_steps_data = []
        all_trades_data = []
        
        for episode in range(n_episodes):
            steps_data, trades_data = self._run_episode()
            all_steps_data.extend(steps_data)
            all_trades_data.extend(trades_data)
            
            # Convert to DataFrames
            steps_df = pd.DataFrame(all_steps_data)
            trades_df = pd.DataFrame(all_trades_data) if all_trades_data else pd.DataFrame()
            
            # Print episode summary
            self._print_episode_summary(steps_df, trades_df)
            
            # Save results
            steps_df.to_csv(self.save_path / f"{self.pair}_steps_analysis.csv", index=False)
            if not trades_df.empty:
                trades_df.to_csv(self.save_path / f"{self.pair}_trades_analysis.csv", index=False)
            
        return steps_df, trades_df
    
    def _run_episode(self) -> Tuple[list, list]:
        """Executes single evaluation episode."""
        obs = self.env.reset()
        done = False
        steps_data = []
        trades_data = []
        
        while not done:
            # Get action probabilities
            action_probs = self.get_action_probabilities(obs)
            
            # Get deterministic action (highest probability)
            action = max(
                {k: v for k, v in action_probs.items() if k != 'entropy'}.items(),
                key=lambda x: x[1]
            )[0]
            action_idx = {'NO_POSITION': 0, 'LONG': 1, 'SHORT': 2}[action]
            
            # Take action
            next_obs, reward, done, info = self.env.step([action_idx])
            # Extract from vectorized env
            reward, done, info = reward[0], done[0], info[0]
            
            # Record step data
            step_info = {
                'timestamp': info['timestamp'],
                'action': action,
                'highest_prob': max(v for k, v in action_probs.items() if k != 'entropy'),
                'no_position_prob': action_probs['NO_POSITION'],
                'long_prob': action_probs['LONG'],
                'short_prob': action_probs['SHORT'],
                'entropy': action_probs['entropy'],
                'position_type': info['position_type'],
                'balance': info['balance'],
                'unrealized_pnl': info['unrealized_pnl'],
                'net_worth_chg': info['net_worth_chg'],
                'reward': reward
            }
            steps_data.append(step_info)
            
            # Record trade data if trade was closed
            if info.get('trade_closed', False):
                trade_info = {
                    'entry_time': info['entry_time'],
                    'exit_time': info['exit_time'],
                    'trade_duration': (info['exit_time'] - info['entry_time']).total_seconds() / 3600,
                    'position_type': info['position_type'],
                    'entry_price': info['entry_price'],
                    'exit_price': info['exit_price'],
                    'entry_prob': action_probs[info['position_type']],
                    'exit_prob': action_probs['NO_POSITION'],
                    'pnl': info['trade_pnl'],
                    'entry_entropy': action_probs['entropy']
                }
                trades_data.append(trade_info)
            
            obs = next_obs
            
        return steps_data, trades_data
    
    def _print_episode_summary(self, steps_df: pd.DataFrame, trades_df: pd.DataFrame):
        """Prints comprehensive episode summary with conviction analysis."""
        logger.info("\nEpisode Summary:")
        logger.info("-" * 50)
        
        # Performance metrics
        total_return = ((steps_df['balance'].iloc[-1] / steps_df['balance'].iloc[0]) - 1) * 100
        
        # Base metrics
        logger.info(f"Total Return: {total_return:.2f}%")
        logger.info(f"Initial Balance: ${steps_df['balance'].iloc[0]:,.2f}")
        logger.info(f"Final Balance: ${steps_df['balance'].iloc[-1]:,.2f}")
        
        if not trades_df.empty:
            # Trade metrics
            total_trades = len(trades_df)
            winning_trades = (trades_df['pnl'] > 0).sum()
            win_rate = (winning_trades / total_trades) * 100
            
            logger.info(f"\nTrade Analysis:")
            logger.info(f"Total Trades: {total_trades}")
            logger.info(f"Win Rate: {win_rate:.1f}%")
            logger.info(f"Average Trade PnL: ${trades_df['pnl'].mean():.2f}")
            logger.info(f"Average Trade Duration: {trades_df['trade_duration'].mean():.1f} hours")
            
            # Conviction analysis
            logger.info(f"\nConviction Analysis:")
            logger.info(f"Average Entry Conviction: {trades_df['entry_prob'].mean():.3f}")
            logger.info(f"Winning Trades Conviction: {trades_df[trades_df['pnl'] > 0]['entry_prob'].mean():.3f}")
            logger.info(f"Losing Trades Conviction: {trades_df[trades_df['pnl'] <= 0]['entry_prob'].mean():.3f}")
            
        logger.info("-" * 50)

def evaluate_model(
    model_path: str,
    vec_normalize_path: str,
    test_df: pd.DataFrame,
    pair: str,
    save_path: Optional[str] = None
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Convenience function for model evaluation."""
    evaluator = ModelEvaluator(
        model_path=model_path,
        vec_normalize_path=vec_normalize_path,
        test_df=test_df,
        pair=pair,
        save_path=save_path
    )
    
    return evaluator.evaluate(n_episodes=1)


steps_df, trades_df = evaluate_model(
    model_path="models/EUR_USD_model.zip",
    vec_normalize_path="models/EUR_USD_vec_normalize.pkl",
    test_df=test_df,
    pair="EUR_USD",
    save_path="evaluation_results"
)

In [1]:
import pandas as pd

df = pd.read_parquet('/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/CHF_JPY_5T_indics_1H_norm.parquet')
df.columns

Index(['open', 'high', 'low', 'close', 'sma_20', 'sma_50', 'rsi', 'macd',
       'macd_signal', 'macd_hist', 'bb_upper', 'bb_middle', 'bb_lower',
       'bb_bandwidth', 'bb_percent', 'atr', 'plus_di', 'minus_di', 'adx',
       'senkou_span_a', 'senkou_span_b', 'tenkan_sen', 'kijun_sen'],
      dtype='object')

In [None]:
from data_management.dataset_manager import DatasetManager

TICKER = 'AUD_JPY'
dataframe_dir = f'/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes/{TICKER}_5T_indics_1H_norm.parquet'



df = pd.read_parquet(dataframe_dir)
dataset_manager = DatasetManager()
train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

# Load your trained model and normalization wrapper
def make_env():
    return ForexTradingEnv(
        df=test_df,  # Your test DataFrame
        pair=TICKER,
        initial_balance=1_000_000.0,
        # trade_size=100_000.0,
        random_start=False
    )

# Create vectorized environment
env = DummyVecEnv([make_env])
env = VecNormalize(
    env,
    norm_obs=True,
    norm_reward=False,  # Disable reward normalization for evaluation

)
# Print current environment's observation space shape
print("New environment observation space shape:", env.observation_space.shape)

model_and_env_path = '/Volumes/ssd_fat2/ai6_trading_bot/datasets/5min/best_dataframes_true_cost/models_and_vecs'


model = PPO.load(f"{model_and_env_path}/{TICKER}_best_model.zip")
vec_normalize = VecNormalize.load(f"{model_and_env_path}/{TICKER}_vec_normalize.pkl", env)
print("Saved environment observation space shape:", vec_normalize.observation_space.shape)


env = DummyVecEnv([make_env])


# Run evaluation
results_df = evaluate_trained_model(
    model=model,
    vec_normalize=vec_normalize,  # Pass the saved normalization wrapper
    test_df=test_df,
    pair=TICKER,
    save_path="model_evaluation"
)

In [None]:
from stable_baselines3.common.policies import obs_as_tensor
import torch
import numpy as np
from typing import Dict, Tuple, Optional

class PolicyAnalyzer:
    """Analyzes policy decisions and confidence using proper SB3 methods."""
    
    def get_action_probabilities(self, pair: str, observation: np.ndarray) -> Dict[str, float]:
        """
        Get probability distribution over actions using SB3's native methods.
        
        Args:
            pair: Currency pair
            observation: Current observation vector
            
        Returns:
            Dictionary mapping action names to their probabilities
        """
        if pair not in self.models:
            raise KeyError(f"No model loaded for {pair}")
            
        model = self.models[pair]
        
        # Convert observation to tensor on correct device
        obs_tensor = obs_as_tensor(observation.reshape(1, -1), model.policy.device)
        
        # Get distribution from policy
        with torch.no_grad():
            dist = model.policy.get_distribution(obs_tensor)
            # Get probabilities from distribution
            probs = dist.distribution.probs
            probs_np = probs.cpu().numpy()[0]  # Move to CPU and convert to numpy
            
        # Map probabilities to actions
        action_map = {0: 'NO_POSITION', 1: 'LONG', 2: 'SHORT'}
        return {action_map[i]: float(prob) for i, prob in enumerate(probs_np)}
    
    def predict_with_confidence(
        self, 
        pair: str, 
        observation: np.ndarray,
        conviction_threshold: float = 0.6
    ) -> Tuple[str, float, Dict[str, float]]:
        """
        Make a prediction and return action with confidence metrics.
        
        Args:
            pair: Currency pair
            observation: Current observation
            conviction_threshold: Minimum probability required for action
            
        Returns:
            Tuple of (chosen action, confidence score, all probabilities)
        """
        # Get probability distribution
        action_probs = self.get_action_probabilities(pair, observation)
        
        # Find action with highest probability
        max_action = max(action_probs.items(), key=lambda x: x[1])
        chosen_action, confidence = max_action
        
        # Calculate entropy as uncertainty measure
        probs = np.array(list(action_probs.values()))
        entropy = -np.sum(probs * np.log(probs + 1e-10))
        
        return chosen_action, confidence, {
            'probabilities': action_probs,
            'entropy': entropy,
            'exceeds_threshold': confidence >= conviction_threshold
        }

def trading_cycle(self):
    """Execute trading cycle with improved probability analysis."""
    logger.info("Starting trading cycle")
    current_time = datetime.now(timezone.utc)
    
    for pair in currency_pairs:
        try:
            if pair not in self.models:
                continue
                
            # Update market data
            if self.data_manager.update_pair_data(pair):
                with self.positions_lock:
                    current_position_type = self.positions.get(pair, 'NO_POSITION')
                    
                # Get prediction data
                observation, last_timestamp = self.data_manager.get_prediction_data(
                    pair=pair,
                    sequence_length=5,
                    current_position=self.position_to_float(current_position_type)
                )
                
                # Get prediction with confidence analysis
                action, confidence, metrics = self.predict_with_confidence(
                    pair, 
                    observation,
                    conviction_threshold=0.6
                )
                
                # Log detailed prediction metrics
                logger.info(f"{pair} prediction analysis:")
                logger.info(f"Action probabilities: {metrics['probabilities']}")
                logger.info(f"Chosen action: {action} with confidence: {confidence:.3f}")
                logger.info(f"Decision entropy: {metrics['entropy']:.3f}")
                
                # Execute trade only if confidence exceeds threshold
                if metrics['exceeds_threshold'] and action != current_position_type:
                    if self.execute_trade(pair, current_position_type, action):
                        # Record trade with confidence metrics
                        if hasattr(self, '_last_trade_info') and self._last_trade_info:
                            self._last_trade_info.update({
                                'confidence': confidence,
                                'entropy': metrics['entropy'],
                                'all_probabilities': metrics['probabilities']
                            })
                else:
                    logger.info(f"Skipping trade for {pair} due to insufficient confidence")
                    
        except Exception as e:
            logger.error(f"Error in trading cycle for {pair}: {str(e)}")
            continue

In [None]:
class ConfidenceAnalyzer:
    """Analyzes the relationship between prediction confidence and trade outcomes."""
    
    def __init__(self, num_bins: int = 10):
        self.num_bins = num_bins
        self.trade_records = []
        
    def record_trade(self, trade_info: Dict):
        """Record a trade with its confidence metrics and outcome."""
        self.trade_records.append({
            'confidence': trade_info['confidence'],
            'entropy': trade_info['entropy'],
            'action_probs': trade_info['all_probabilities'],
            'pnl': trade_info['trade_pnl'],
            'position_type': trade_info['position_type'],
            'trade_duration': (trade_info['exit_time'] - trade_info['entry_time']).total_seconds() / 3600
        })
        
    def analyze_confidence_levels(self) -> pd.DataFrame:
        """Analyze trade performance across different confidence levels."""
        if not self.trade_records:
            return pd.DataFrame()
            
        df = pd.DataFrame(self.trade_records)
        
        # Create confidence bins
        df['confidence_bin'] = pd.qcut(df['confidence'], self.num_bins)
        
        # Analyze performance by confidence level
        analysis = df.groupby('confidence_bin').agg({
            'pnl': ['count', 'mean', 'std', lambda x: (x > 0).mean()],
            'trade_duration': 'mean',
            'entropy': 'mean'
        })
        
        # Rename columns for clarity
        analysis.columns = [
            'num_trades', 'avg_pnl', 'pnl_std', 'win_rate', 'avg_duration', 'avg_entropy'
        ]
        
        return analysis
        
    def get_optimal_threshold(self) -> float:
        """Calculate optimal confidence threshold based on historical performance."""
        if not self.trade_records:
            return 0.6  # Default threshold
            
        df = pd.DataFrame(self.trade_records)
        
        # Calculate cumulative metrics at different thresholds
        thresholds = np.linspace(0.4, 0.9, 50)
        metrics = []
        
        for threshold in thresholds:
            subset = df[df['confidence'] >= threshold]
            if len(subset) < 20:  # Require minimum number of trades
                continue
                
            metrics.append({
                'threshold': threshold,
                'num_trades': len(subset),
                'win_rate': (subset['pnl'] > 0).mean(),
                'avg_pnl': subset['pnl'].mean(),
                'sharpe': subset['pnl'].mean() / (subset['pnl'].std() + 1e-10)
            })
            
        if not metrics:
            return 0.6
            
        # Find threshold that maximizes Sharpe ratio
        metrics_df = pd.DataFrame(metrics)
        optimal_threshold = metrics_df.loc[metrics_df['sharpe'].idxmax(), 'threshold']
        
        return optimal_threshold