In [None]:
import os, sys
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.append(project_root)
from trading.environments.forex_env2_flat import ForexTradingEnv, Actions
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Dict, List, Optional
import matplotlib.pyplot as plt
from pprint import pprint

class TransactionLogger:
    """Tracks detailed transaction data throughout an episode."""
    
    def __init__(self):
        self.transactions = []
        
    def log_step(
        self,
        step: int,
        pre_step_price: float,
        post_step_price: float,
        action: str,
        reward: float,
        # realized_pnl: float,
        # unrealized_pnl: float,
        net_worth_chg: float,
        position_type: str,
        balance: float
    ):
        """Log transaction data for each step."""
        self.transactions.append({
            'step': step,
            'pre_step_price': pre_step_price,
            'post_step_price': post_step_price,
            'action': action,
            'reward': reward,
            'net_worth_chg': net_worth_chg,
            # 'realized_pnl': realized_pnl,
            # 'unrealized_pnl': unrealized_pnl,
            'position': position_type,
            'balance': balance
        })
    
    def to_dataframe(self) -> pd.DataFrame:
        """Convert transaction log to pandas DataFrame."""
        df = pd.DataFrame(self.transactions)
        
        # Calculate price changes for analysis
        if not df.empty:
            df['price_change'] = df['pre_step_price'].diff()
            df['price_change_pct'] = df['pre_step_price'].pct_change()
            
            # Calculate cumulative PnL
            df['cumulative_realized_pnl'] = df['net_worth_chg'].cumsum()
            
        return df

class EnvironmentDebugger:
    """Helper class to debug and visualize the Forex trading environment."""
    
    def __init__(self, env: ForexTradingEnv):
        self.env = env
        self.episode_history = []
        self.current_episode = []
        self.transaction_logger = TransactionLogger()
        
    def run_debug_episode(
        self,
        max_steps: int = 100,
        action_strategy="random",
        custom_actions: List[int] = None
    ):
        """Run debug episode with specified action strategy."""
        observation, info = self.env.reset()
        self.current_episode = []
        self.transaction_logger = TransactionLogger()
        
        print("\n=== Starting New Debug Episode ===")
        
        for step in range(max_steps):
            # Get price before step (price at which we make decision)
            pre_step_price = self.env.current_price
            print(f"Step: {step}, Price pre step: {pre_step_price}")
            
            # Get action based on strategy
            if custom_actions is not None and step < len(custom_actions):
                action = custom_actions[step]
            elif action_strategy == "random":
                action = self.env.action_space.sample()
            elif action_strategy == "cycle":
                action = step % len(Actions)
            else:
                raise ValueError("Unknown action strategy")
            
            # Take step in environment
            next_obs, reward, terminated, truncated, info = self.env.step(action)
            
            # Now post_step_price is the current price after stepping
            post_step_price = info['current_price']
            net_worth_chg = info['net_worth_chg']
            
            # Log transaction data
            self.transaction_logger.log_step(
                step=step,
                pre_step_price=pre_step_price,   # Price when decision was made
                post_step_price=post_step_price, # Price used for PnL calculation
                action=Actions(action).name,
                reward=reward,
                net_worth_chg= net_worth_chg,
                # realized_pnl=info.get('trade_pnl', 0.0) if info.get('trade_closed', False) else 0.0,
                # unrealized_pnl=info['unrealized_pnl'],
                position_type=info['position_type'],
                balance=info['balance']
            )
            
            if terminated or truncated:
                print(f"\nEpisode ended after {step + 1} steps")
                break
                
            observation = next_obs
            
        # Print transaction log
        # self._print_transaction_log()
        
        return self.transaction_logger.to_dataframe()
    
    def _print_transaction_log(self):
        """Print detailed transaction log."""
        df = self.transaction_logger.to_dataframe()
        
        pd.set_option('display.max_rows', None)
        pd.set_option('display.float_format', lambda x: '%.5f' % x)
        
        print("\nDetailed Transaction Log:")
        print("Step | Pre-Price  | Post-Price | Action | Reward | Realized PnL | Unrealized PnL")
        print("-" * 80)
        for _, row in df.iterrows():
            print(f"{row['step']:4d} | {row['pre_step_price']:.5f} | {row['post_step_price']:.5f} | "
                  f"{row['action']:<6} | {row['reward']:.5f} | {row['realized_pnl']:.5f} | "
                  f"{row['unrealized_pnl']:.5f}")
            
        # Print summary statistics
        print("\nSummary Statistics:")
        print(f"Total Steps: {len(df)}")
        print(f"Total Realized PnL: {df['realized_pnl'].sum():.5f}")
        print(f"Final Unrealized PnL: {df['unrealized_pnl'].iloc[-1]:.5f}")
        print(f"Final Balance: {df['balance'].iloc[-1]:.2f}")
        
        pd.reset_option('display.max_rows')
        pd.reset_option('display.float_format')
    
    def plot_transaction_analysis(self):
        """Create detailed plots of price movements, PnL, and actions."""
        df = self.transaction_logger.to_dataframe()
        
        fig, axes = plt.subplots(3, 1, figsize=(15, 12))
        
        # Plot price and positions
        ax1 = axes[0]
        ax1.plot(df['step'], df['current_price'], label='Price', color='blue')
        ax1.set_title('Price and Positions')
        
        # Add position markers
        for idx, row in df.iterrows():
            if row['action'] != 'NO_POSITION':
                color = 'green' if row['action'] == 'LONG' else 'red'
                ax1.axvline(x=row['step'], color=color, alpha=0.2)
        
        # Plot PnL
        ax2 = axes[1]
        ax2.plot(df['step'], df['realized_pnl'].cumsum(), label='Cumulative Realized PnL', color='green')
        ax2.plot(df['step'], df['unrealized_pnl'], label='Unrealized PnL', color='blue', alpha=0.5)
        ax2.set_title('PnL Analysis')
        ax2.legend()
        
        # Plot rewards
        ax3 = axes[2]
        ax3.plot(df['step'], df['reward'], label='Reward', color='purple')
        ax3.set_title('Rewards')
        
        plt.tight_layout()
        plt.show()


pair = "EUR_USD"
parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
file_5min = '/Users/floriankockler/Downloads/EUR_USD_5T_indics_norm2.parquet'
df = pd.read_parquet(file_5min)

# Create environment
env = ForexTradingEnv(
    df=df,
    pair=pair,
    initial_balance=1_000_000,
    sequence_length=10,
    random_start=False
)

# Create debugger
debugger = EnvironmentDebugger(env)

# Run with custom actions for testing
custom_actions = [
    Actions.LONG.value,    # Open long
    Actions.LONG.value,    # Hold long
    Actions.SHORT.value,   # Switch to short
    Actions.SHORT.value,   # Hold short
    Actions.NO_POSITION.value  # Close position
]

transaction_df = debugger.run_debug_episode(
    max_steps=len(custom_actions),
    custom_actions=custom_actions
)
transaction_df
# Plot analysis
# debugger.plot_transaction_analysis()

In [None]:
pair = "EUR_USD"
parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
df = pd.read_parquet(parquet_path)
df.head(15)

In [None]:
pair = "EUR_USD"
parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
df = pd.read_parquet(parquet_path)

# Create environment
env = ForexTradingEnv(
    df=df,
    pair=pair,
    initial_balance=1_000_000,
    sequence_length=10,
    random_start=False  # Disable random start for debugging
)

# Create debugger
debugger = EnvironmentDebugger(env)

# Run debug episode with different action strategies
print("\nRunning episode with random actions...")
debugger.run_debug_episode(max_steps=50, action_strategy="random")
debugger.print_episode_summary()
debugger.plot_episode_results()

In [None]:
# import os
# import sys

# import pandas as pd

# from datetime import datetime, timedelta
# from pathlib import Path


# # Add the project root to the Python path
# project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
# if project_root not in sys.path:
#     sys.path.append(project_root)
# from stable_baselines3 import PPO
# from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv
# from trading.environments.forex_env2_flat import ForexTradingEnv
# from stable_baselines3.common.callbacks import EvalCallback
# from stable_baselines3.common.monitor import Monitor
# from data_management.dataset_manager import DatasetManager

# pair = "EUR_USD"
# # parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
# # parquet_path = Path("/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h") / f"{pair}.parquet"
# norm_robust_path = Path('/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/normalized/eur_norm_robut.parquet')
# eur_standard = Path('/Volumes/ssd_fat2/ai6_trading_bot/datasets/1h/EUR_USD.parquet')
# df = pd.read_parquet(eur_standard)

# dataset_manager = DatasetManager()
# train_df, val_df, test_df = dataset_manager.split_dataset(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)



# saving_path = f'./logs/26nov/not_norm_flat/'
# os.makedirs(saving_path, exist_ok=True)

# def make_train_env():
#     env = ForexTradingEnv(
#         df=train_df,
#         pair='EUR_USD',

#     )
#     env = Monitor(env)
#     env = DummyVecEnv([lambda: env])
#     env = VecNormalize(env, norm_obs=True, norm_reward=True)
#     return env

# def make_eval_env():
#     env = ForexTradingEnv(

#         df=val_df,
#         pair='EUR_USD',
#         # resample_interval='1h'
#     )
#     env = Monitor(env)
#     env = DummyVecEnv([lambda: env])
#     env = VecNormalize(env, norm_obs=True, norm_reward=False)
#     env.training = False
#     return env

# train_env = make_train_env()
# eval_env = make_eval_env()
# eval_callback = EvalCallback(
#     eval_env,
#     best_model_save_path=saving_path,
#     log_path=saving_path,
#     eval_freq=100_000,  # Adjust as needed
#     n_eval_episodes=5,
#     deterministic=True,
#     render=False
# )

# model = PPO(
#     'MlpPolicy',
#     train_env,
#     verbose=0,
#     tensorboard_log=f'{saving_path}tensorboard/',
# )

# model.learn(
#     total_timesteps=5_000_000,  # Adjust as needed
#     callback=eval_callback
# )

# model.save(f'{saving_path}best_model.zip')
# train_env.save(f'{saving_path}vec_normalize.pkl')

In [None]:
# import os, sys
# project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
# if project_root not in sys.path:
#     sys.path.append(project_root)
# from trading.environments.forex_env2_flat import ForexTradingEnv, Actions
# import pandas as pd
# import numpy as np
# from pathlib import Path
# import matplotlib.pyplot as plt
# from pprint import pprint

# class EnvironmentDebugger:
#     """Helper class to debug and visualize the Forex trading environment."""
    
#     def __init__(self, env: ForexTradingEnv):
#         self.env = env
#         self.episode_history = []
#         self.current_episode = []
        
#     def run_debug_episode(self, max_steps: int = 100, action_strategy="random"):
#         """Run a debug episode with specified action strategy."""
#         # Handle the reset tuple return (observation, info)
#         observation, info = self.env.reset()
#         self.current_episode = []
        
#         print("\n=== Starting New Debug Episode ===")
#         print(f"Initial observation shape: {observation.shape}")
#         print("\nInitial Info:")
#         pprint(info)
        
#         # Print initial state
#         self._print_step_info(observation, None, 0, info, 0)
        
#         for step in range(max_steps):
#             # Get action based on strategy
#             if action_strategy == "random":
#                 action = self.env.action_space.sample()
#             elif action_strategy == "cycle":
#                 action = step % len(Actions)
#             else:
#                 raise ValueError("Unknown action strategy")
            
#             # Take step in environment and unpack return values
#             next_obs, reward, terminated, truncated, info = self.env.step(action)
            
#             # Store step information
#             step_data = {
#                 'step': step,
#                 'action': Actions(action).name,
#                 'observation': next_obs.copy(),
#                 'reward': reward,
#                 'info': info.copy()
#             }
#             self.current_episode.append(step_data)
            
#             # Print step information
#             self._print_step_info(next_obs, action, reward, info, step + 1)
            
#             if terminated or truncated:
#                 print(f"\nEpisode ended after {step + 1} steps")
#                 print("Final Info:")
#                 pprint(info)
#                 break
                
#             observation = next_obs
        
#         self.episode_history.append(self.current_episode)
        
#     def _print_step_info(self, obs, action, reward, info, step):
#         """Print detailed information about the current step."""
#         print(f"\nStep {step}")
#         print("-" * 50)
        
#         # Print action if not initial step
#         if action is not None:
#             print(f"Action taken: {Actions(action).name}")
#             print(f"Reward received: {reward:.6f}")
        
#         # Print observation components
#         market_size = self.env.sequence_length * self.env.market_features
#         print("\nObservation breakdown:")
#         print(f"Market features shape: {obs[:market_size].shape}")
#         print(f"Position info: {obs[market_size:]}")  # Balance and position direction
        
#         # Print current market data
#         current_timestamp = info.get('timestamp', None)
#         current_price = info.get('current_price', None)
#         if current_timestamp and current_price:
#             print(f"\nCurrent Time: {current_timestamp}")
#             print(f"Current Price: {current_price}")
        
#         # Print trading state
#         position_type = info.get('position_type', 'none')
#         balance = info.get('balance', 0.0)
#         unrealized_pnl = info.get('unrealized_pnl', 0.0)
        
#         print(f"\nTrading State:")
#         print(f"Position: {position_type}")
#         print(f"Balance: ${balance:,.2f}")
#         print(f"Unrealized PnL: ${unrealized_pnl:,.2f}")
        
#         print("-" * 50)
    
#     def plot_episode_results(self):
#         """Plot key metrics from the most recent episode."""
#         if not self.current_episode:
#             print("No episode data to plot")
#             return
        
#         # Extract data
#         steps = [data['step'] for data in self.current_episode]
#         rewards = [data['reward'] for data in self.current_episode]
#         balances = [data['info']['balance'] for data in self.current_episode]
#         prices = [data['info']['current_price'] for data in self.current_episode]
        
#         # Create plot
#         fig, axes = plt.subplots(3, 1, figsize=(12, 12))
        
#         # Plot rewards
#         axes[0].plot(steps, rewards, label='Reward', color='blue')
#         axes[0].set_title('Rewards per Step')
#         axes[0].set_xlabel('Step')
#         axes[0].set_ylabel('Reward')
#         axes[0].grid(True)
        
#         # Plot balance
#         axes[1].plot(steps, balances, label='Account Balance', color='green')
#         axes[1].set_title('Account Balance')
#         axes[1].set_xlabel('Step')
#         axes[1].set_ylabel('Balance')
#         axes[1].grid(True)
        
#         # Plot price
#         axes[2].plot(steps, prices, label='Price', color='red')
#         axes[2].set_title('Price Movement')
#         axes[2].set_xlabel('Step')
#         axes[2].set_ylabel('Price')
#         axes[2].grid(True)
        
#         plt.tight_layout()
#         plt.show()
        
#     def print_episode_summary(self):
#         """Print summary statistics for the most recent episode."""
#         if not self.current_episode:
#             print("No episode data to summarize")
#             return
            
#         first_step = self.current_episode[0]['info']
#         last_step = self.current_episode[-1]['info']
        
#         print("\n=== Episode Summary ===")
#         print(f"Number of steps: {len(self.current_episode)}")
#         print(f"Initial balance: ${first_step['balance']:,.2f}")
#         print(f"Final balance: ${last_step['balance']:,.2f}")
#         print(f"Total PnL: ${last_step['total_pnl']:,.2f}")
#         print(f"Total trades: {last_step['total_trades']}")
#         if last_step['total_trades'] > 0:
#             print(f"Win rate: {last_step['win_rate']:.2%}")
#         print("=" * 30)