In [106]:
import pandas as pd
import numpy as np
import os 
from typing import Dict, Any


def convert_1m_csv_to_5m_df(file_path: str) -> pd.DataFrame:
    # Column Names (·Äû·ÄÑ·Ä∑·Ä∫·Äõ·Ä≤·Ä∑ Data ·Ä°·ÄÖ·ÄÆ·Ä°·ÄÖ·Äâ·Ä∫·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏)
    COLUMN_NAMES = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    DATETIME_FORMAT = '%Y.%m.%d %H:%M'

    """ CSV File ·Äô·Äæ 1-Minute Data ·ÄÄ·Ä≠·ÄØ Load ·Äï·Äº·ÄÆ·Ä∏ 5-Minute Candle ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤·Äû·Ää·Ä∫·Åã """
    if not os.path.exists(file_path):
        print(f"üö® Error: File not found at path: {file_path}")
        return pd.DataFrame()

    try:
        df = pd.read_csv(file_path, sep=',', header=None, names=COLUMN_NAMES,
                         dtype={'Open': np.float64, 'High': np.float64, 'Low': np.float64, 'Close': np.float64})
        
        # Volume column ·ÄÄ·Ä≠·ÄØ ·Äö·Ä¨·Äö·ÄÆ float ·Ä°·Äî·Ä±·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Äº·ÄÆ·Ä∏ NA ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ 0 ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Ä°·ÄÖ·Ä¨·Ä∏·Äë·Ä≠·ÄØ·Ä∏·Äû·Ää·Ä∫·Åã
        df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce').fillna(0)
        df['Volume'] = df['Volume'].astype(np.int64) 
        
    except Exception as e:
        print(f"üö® Error loading CSV file: {e}")
        return pd.DataFrame()

    # Datetime Index ·ÄÄ·Ä≠·ÄØ ·Äê·Ää·Ä∫·ÄÜ·Ä±·Ä¨·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    df['Datetime'] = df['Date'].astype(str) + ' ' + df['Time'].astype(str)
    df['Datetime'] = pd.to_datetime(df['Datetime'], format=DATETIME_FORMAT, errors='coerce')
    df.set_index('Datetime', inplace=True)
    df.drop(columns=['Date', 'Time'], inplace=True)
    #df.dropna(subset=[df.index.name], inplace=True) # Invalid Datetime ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äñ·Äö·Ä∫·Äõ·Äæ·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏

    ohlcv_aggregation_rules: Dict[str, Any] = {
        'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'
    }
    df_5m = df.resample('5Min').agg(ohlcv_aggregation_rules)
    df_5m.dropna(inplace=True)
    df_5m = df_5m[df_5m['Volume'] > 0]
    
    print(f"‚úÖ Conversion successful! 5-Min rows: {len(df_5m)}")
    return df_5m

In [107]:
import json

class EnvConfig():
    """environment configuration from json file
       tgym requires you configure your own parameters in json file.
        Args:
            config_file path/file.json

    """
    def __init__(self,config_file):
        self.config = {}
        with open(config_file) as j:
            self.config = json.load(j)

    def env_parameters(self,item=''):
        """environment variables
        """
        if item:
            return self.config["env"][item]
        else:
            return self.config["env"]

    def symbol(self, asset="GBPUSD", item='') :
        """get trading pair (symbol) information

        Args:
            asset (str, optional): symbol in config. Defaults to "GBPUSD".
            item (str, optional): name of item, if '' return dict, else return item value. Defaults to ''.

        Returns:
            [type]: [description]
        """
        if item:
            return self.config["symbol"][asset][item]
        else:
            return self.config["symbol"][asset]

    def data_processing_parameters(self, item=''):
        """Get data processing config"""
        if item:
            return self.config["data_processing"][item]
        return self.config["data_processing"]

    def trading_hour(self,place="NewYork"):
        """forex trading hour from different markets

        Args:
            place (str, optional): [Sydney,Tokyo,London] Defaults to "New York".

        Returns:
            [dict]: from time, to time
        """
        if place:
            return self.config["trading_hour"][place]
        else:
            return self.config["trading_hour"]
        
    def indicator(self,place="sma_fast_period"):
        """forex trading hour from different markets

        Args:
            place (str, optional): [Sydney,Tokyo,London] Defaults to "New York".

        Returns:
            [dict]: from time, to time
        """
        if place:
            return self.config["data_processing"]["indicator"][place]
        else:
            return self.config["data_processing"]["indicator"]



In [108]:

import pandas as pd
import logging
from finta import TA
from sklearn.preprocessing import StandardScaler
import holidays
import os
import json

import logging
# Configure logging
logger = logging.getLogger(__name__)

def patch_missing_data(df, dt_col_name='time', cf=None):
    min_bars = cf.data_processing_parameters("min_bars_per_week")

    # ["time","open", "high", "low", "close"]
    required_cols = cf.data_processing_parameters("required_cols")

    # df ·Äô·Äæ·Ä¨ 6 columns ·Äõ·Äæ·Ä≠·Äõ·ÄÑ·Ä∫ vol ·Äï·Ä´·Äë·Ää·Ä∑·Ä∫·Äô·Äö·Ä∫
    if df.shape[1] == 6:
        df.columns = required_cols + ['vol']
    elif df.shape[1] == 5:
        df.columns = required_cols
    else:
        raise ValueError(f"Invalid number of columns: {df.shape[1]} =>{required_cols}")

    logger.warning(f"shape of  column: {df.shape[1]}")
    # 1. Column validation
    if missing := set(required_cols) - set(df.columns):
        raise ValueError(f"Missing columns: {missing}")

    # 2. Auto-detect datetime column
    dt_candidates = {'time', 'timestamp', 'date', 'datetime'}
    if dt_col_name not in df.columns:
        found = list(dt_candidates & set(df.columns))
        if not found:
            raise KeyError(f"No datetime column found. Tried: {dt_candidates}")
        dt_col_name = found[0]
        logger.info(f"Using datetime column: {dt_col_name}")

    # 3. Convert to datetime index
    df[dt_col_name] = pd.to_datetime(df[dt_col_name], utc=True)
    df = df.set_index(dt_col_name).sort_index()
    groups = df.groupby(pd.Grouper(freq='W-SUN'))

    patched_weeks = []  # patched weekly df storage

    for w, week_df in groups:
        if week_df.empty:
            continue

        if len(week_df) != min_bars:
            logger.warning(f"Week {w} has {len(week_df)}/{min_bars} bars")

        # Create 5-minute frequency index
        new_index = pd.date_range(
            start=week_df.index.min(),
            end=week_df.index.max(),
            freq='5min',
            tz='UTC'
        )

        # Reindex + forward fill
        week_df = week_df.reindex(new_index)
        week_df.index = week_df.index.tz_localize(None)
        fill_limit = 12 # ·Ä•·Äï·Äô·Ä¨: 1 ·Äî·Ä¨·Äõ·ÄÆ (12 bars) ·Äë·ÄÄ·Ä∫·Äï·Ä≠·ÄØ·Äê·Ä≤·Ä∑ ·ÄÄ·ÄΩ·ÄÄ·Ä∫·Äú·Äï·Ä∫·ÄÄ·Ä≠·ÄØ ·Äô·Äñ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´
        fill_cols = ['open', 'high', 'low', 'close', 'vol'] if 'vol' in df.columns else ['open', 'high', 'low', 'close']
        # FFill: ·Äõ·Äæ·Ä±·Ä∑·ÄÄ data ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Äñ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´
        week_df[fill_cols] = week_df[fill_cols].ffill(limit=fill_limit)
        patched_weeks.append(week_df)

    # Merge back all weeks
    if patched_weeks:
        all_df = pd.concat(patched_weeks)
    else:
        all_df = df.copy()

    return all_df

In [109]:

def add_time_feature(df_5m: pd.DataFrame, cf=None, source_tz='UTC') -> pd.DataFrame:
    """
    5M Data Frame (DatetimeIndex ·Äï·Ä´·Äù·ÄÑ·Ä∫·Äû·Ää·Ä∫·Äü·ÄØ ·Äö·Ä∞·ÄÜ·Äï·Ä´) ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Temporal features ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    """
    
    # DataFrame ·Åè Index ·ÄÄ·Ä≠·ÄØ DatetimeIndex ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if not isinstance(df_5m.index, pd.DatetimeIndex):
         raise TypeError("DataFrame ·Åè Index ·Äû·Ää·Ä∫ DatetimeIndex ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã")

    df_5m.index = df_5m.index.tz_localize(None)
    # Index ·ÄÄ·Ä≠·ÄØ Timezone aware (UTC) ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Äû·Ä±·ÄÅ·Äª·Ä¨·Ä°·Ä±·Ä¨·ÄÑ·Ä∫·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if df_5m.index.tz is None:
        # Timezone-Naive data ·ÄÄ·Ä≠·ÄØ ·Äô·Ä∞·Äõ·ÄÑ·Ä∫·Ä∏ Source Timezone ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ localize
        # Dukascopy data ·ÄÜ·Ä≠·ÄØ·Äõ·ÄÑ·Ä∫ 'UTC' ·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄÆ·Ä∏·Åä Broker data ·ÄÜ·Ä≠·ÄØ·Äõ·ÄÑ·Ä∫ 'GMT+3' ·Äú·Ä≠·ÄØ·Äô·Äª·Ä≠·ÄØ·Ä∏ ·Äû·ÄØ·Ä∂·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫
        df = df_5m.tz_localize(source_tz, ambiguous='NaT', nonexistent='NaT')
        df = df.tz_convert('UTC')
    else:
        df = df_5m.copy()
    
    # ----------------------------------------------------
    # I. ·Ä°·ÄÅ·Äº·Ä±·ÄÅ·Ä∂ features ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Cyclical Encoding ·Äô·Äª·Ä¨·Ä∏ (Hour ·ÄÄ·Ä≠·ÄØ Index ·Äô·Äæ ·Äê·Ä≠·ÄØ·ÄÄ·Ä∫·Äõ·Ä≠·ÄØ·ÄÄ·Ä∫·Äö·Ä∞·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
    # ----------------------------------------------------

    df['weekday'] = df.index.dayofweek 
    df['day'] = df.index.day
    df['week'] = df.index.isocalendar().week.astype(int)
    df['month'] = df.index.month
    df['year'] = df.index.year
    df['hour'] = df.index.hour
    
    # ·Äî·Ä¨·Äõ·ÄÆ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Cyclical features
    df['hour_sin'] = np.sin(2 * np.pi * df['hour']/24).round(6)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour']/24).round(6)

    # ----------------------------------------------------
    # III. DST-Aware Market Sessions (Timezone Handling)
    # ----------------------------------------------------
    
    # ·Äî·Ä¨·Äõ·ÄÆ·ÄÄ·Ä≠·ÄØ local time zone ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤ (Timezone Aware Index ·Äô·Äæ·Äû·Ä¨ tz_convert ·Äú·ÄØ·Äï·Ä∫·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫)
    london_time = df.index.tz_convert('Europe/London')
    ny_time = df.index.tz_convert('America/New_York')

    # Session Hours (cf ·Äô·Äæ Local Time ·Äî·Ä¨·Äõ·ÄÆ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä≠·ÄØ·Ä∑·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã)
    ny = cf.trading_hour('NewYork')
    ldn = cf.trading_hour('London')

    # London Session (Local Time: 08:00 - 16:00)
    df['london_session'] = ((london_time.hour >= ldn['from']) & (london_time.hour < ldn['to'])).astype(int)
    
    # NY Session (Local Time: 13:00 - 21:00 UTC/GMT) -> (9:00 - 17:00 EST/EDT)
    # cf ·Äô·Äæ Local NY Time (·Ä•·Äï·Äô·Ä¨: 9, 17) ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä≠·ÄØ·Ä∑·Äõ·Äô·Ää·Ä∫
    df['ny_session'] = ((ny_time.hour >= ny['from']) & (ny_time.hour < ny['to'])).astype(int)

    df['overlap_session'] = (df['london_session'] & df['ny_session']).astype(int)

    # ... (IV. Holiday features ·ÄÄ·Ä≠·ÄØ ·ÄÜ·ÄÄ·Ä∫·Äú·ÄÄ·Ä∫·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫) ...
    
    #df['symbol'] = symbol
    
    # ·Äö·Ä¨·Äö·ÄÆ columns ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äñ·Äö·Ä∫·Äõ·Äæ·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    df = df.drop(columns=['minute_block_15'], errors='ignore') # minute_block_15 ·Äû·Ää·Ä∫ 1M data ·Äô·Äæ ·Äú·Ä¨·Äú·Äª·Äæ·ÄÑ·Ä∫·Äû·Ä¨ ·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äû·Ää·Ä∫·Åã 5M ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äô·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äï·Ä´·Åã
    
    # Index ·ÄÄ·Ä≠·ÄØ reset ·Äô·Äú·ÄØ·Äï·Ä∫·Äò·Ä≤ ·Äï·Äº·Äî·Ä∫·Äï·Ä≠·ÄØ·Ä∑·Äï·Ä´ (Env ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Datetime Index ·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äû·Ää·Ä∫)
    return df

In [110]:
from finta import TA

def tech_indicators(df, cf=None):
    """
    Forex RL ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Price Action·Åä Momentum ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Long-Term Trend Features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    """
    
    sma_fast_period = cf.indicator('sma_fast_period')
    sma_mid_period = cf.indicator('sma_mid_period')
    sma_slow_period = cf.indicator('sma_slow_period')
    atr_period = cf.indicator('atr_period')
    rsi_period = cf.indicator('rsi_period')
    macd_fast_period = cf.indicator('macd_fast_period')
    macd_slow_period = cf.indicator('macd_slow_period')
    macd_signal_period = cf.indicator('macd_signal_period')
    adx_period = cf.indicator('adx_period')
    stoch_period = cf.indicator('stoch_period')

    
    # --- ·ÅÅ·Åã Volatility Measure (ATR ·ÄÄ·Ä≠·ÄØ Base ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Ä°·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄØ·Äõ·Äî·Ä∫) ---
    df['atr_base'] = TA.ATR(df, period=atr_period).ffill()

    # --- ·ÅÇ·Åã Price Action Features ---
    df['log_returns'] = np.log(df['close'] / df['close'].shift(1)).ffill().round(6)
    df['price_norm'] = (df['close'] - df['close'].shift(sma_fast_period)) / df['atr_base'] # 100-bar SMA ·ÄÄ·Ä≠·ÄØ ·ÄÅ·Äî·Ä∑·Ä∫·Äô·Äæ·Äî·Ä∫·Ä∏·ÄÅ·Äº·Ä±·Äû·ÄØ·Ä∂·Ä∏
    df['spread_ratio'] = (df['high'] - df['low']) / df['atr_base']
    df['body_ratio'] = (df['close'] - df['open']) / (df['high'] - df['low']).replace(0, 1e-6)

    # --- ·ÅÉ·Åã Momentum & Trend Features (SMA 100/200 Cross ·ÄÄ·Ä≠·ÄØ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏) ---

    df['rsi'] = TA.RSI(df, period=rsi_period).ffill().round(6)
    macd_data = TA.MACD(df, period_fast=macd_fast_period, period_slow=macd_slow_period, signal=macd_signal_period)
    df['macd_hist'] = macd_data.SIGNAL.ffill().round(6)
    
    # Trend ·Äõ·Äæ·Ä≠·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏/·Äô·Äõ·Äæ·Ä≠·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Trend ·Åè ·ÄÅ·Ä≠·ÄØ·ÄÑ·Ä∫·Äô·Ä¨·Äô·Äæ·ÄØ ·ÄÄ·Ä≠·ÄØ ·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏·Äê·Ä¨·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    # SMA Cross ·ÄÄ Trend Direction ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äû·Ä±·Ä¨·Ä∫·Äú·Ää·Ä∫·Ä∏·Åä 
    # ADX ·ÄÄ Direction ·Äõ·Ä≤·Ä∑ Strength ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äû·Ää·Ä∫·Åã 
    # ADX ·Äî·Ä≠·Äô·Ä∑·Ä∫·Äï·Ä´·ÄÄ Range/Sideways ·Äñ·Äº·ÄÖ·Ä∫·Äï·Äº·ÄÆ·Ä∏·Åä 
    # ADX ·Äô·Äº·ÄÑ·Ä∑·Ä∫·Äï·Ä´·ÄÄ Strong Trend ·Äñ·Äº·ÄÖ·Ä∫·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏ Agent ·ÄÄ·Ä≠·ÄØ ·Äû·Ä≠·ÄÖ·Ä±·Äû·Ää·Ä∫·Åã
    df['adx'] = TA.ADX(df, period=adx_period)  
    
    # RSI ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Overbought/Oversold ·ÄÄ·Ä≠·ÄØ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·Äï·Äº·ÄÆ·Ä∏·Åä 
    # Stochastic ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Momentum Change ·ÄÄ·Ä≠·ÄØ ·Ä°·Äê·Ää·Ä∫·Äï·Äº·ÄØ·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫·Åã 
    # ·Äî·Äæ·ÄÖ·Ä∫·ÄÅ·ÄØ·ÄÖ·Äú·ÄØ·Ä∂·Ä∏ Extreme Level ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äõ·Äæ·Ä≠·Äî·Ä±·Äï·Ä´·ÄÄ Reversal ·Äñ·Äº·ÄÖ·Ä∫·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·ÄÅ·Äº·Ä± ·Äï·Ä≠·ÄØ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏ Agent ·ÄÄ ·Äû·ÄÑ·Ä∫·Äö·Ä∞·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫·Åã
    df['stoch_k'] = TA.STOCH(df, period=stoch_period)   
    # *** ·Äû·ÄÑ·Ä∫·Äê·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·ÄÜ·Ä≠·ÄØ·Äû·Ä±·Ä¨ Long-Term SMA Cross Feature ·Ä°·Äû·ÄÖ·Ä∫ ***

    df['ma_fast'] = TA.SMA(df, period=sma_mid_period)
    df['ma_slow'] = TA.SMA(df, period=sma_slow_period)
    
    # SMA 100 ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ SMA 200 ·Äê·Ä≠·ÄØ·Ä∑·Åè ·ÄÄ·ÄΩ·Ä¨·ÄÅ·Äº·Ä¨·Ä∏·ÄÅ·Äª·ÄÄ·Ä∫·ÄÄ·Ä≠·ÄØ ATR ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Normalization ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    df['ma_cross'] = ((df['ma_fast'] - df['ma_slow']) / df['atr_base']).ffill().round(6)
    
    # --- ·ÅÑ·Åã Data Cleaning ---
    df = df.replace([np.inf, -np.inf], np.nan).fillna(0) 

    numeric_cols = df.select_dtypes(include=[np.number]).columns
    df[numeric_cols] = df[numeric_cols].clip(lower=-1e5, upper=1e5).round(6)
    
    return df

In [115]:
import pandas as pd
import numpy as np

def add_news_feature(df_price: pd.DataFrame, df_news: pd.DataFrame, ahead_bars: int = 6, behind_bars: int = 6) -> pd.DataFrame:
    """
    High-Impact News Event ·Äô·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Äô·ÄÆ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ ·Ä°·Äï·Äº·ÄÆ·Ä∏·Äê·ÄΩ·ÄÑ·Ä∫ Binary Feature (1) ·ÄÄ·Ä≠·ÄØ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã 
    searchsorted ·ÄÄ·Ä≠·ÄØ·Äû·ÄØ·Ä∂·Ä∏·Åç Timezone Conflict ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äõ·Äæ·Ä±·Ä¨·ÄÑ·Ä∫·Äõ·Äæ·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    """
    
    # 1. News Feature Column ·ÄÄ·Ä≠·ÄØ 0 ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·ÄÖ·Äê·ÄÑ·Ä∫·Äñ·Äº·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if 'is_high_impact_news' not in df_price.columns:
        df_price['is_high_impact_news'] = 0

    # 2. News 'Start' Column ·ÄÄ·Ä≠·ÄØ Datetime ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    try:
        df_news['Start_dt'] = pd.to_datetime(df_news['Start'])
    except Exception as e:
        print(f"Error converting news 'Start' column to datetime: {e}")
        return df_price

    # 3. Timezone ·ÄÄ·Ä≠·ÄØ ·ÄÄ·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·ÄΩ·Äö·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (Price Index Timezone ·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏ ·Ää·Äæ·Ä≠·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
    price_index = df_price.index
    
    # Price Index ·Äô·Äæ·Ä¨ Timezone ·Äõ·Äæ·Ä≠·Äô·Äõ·Äæ·Ä≠ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if price_index.tz is not None:
        # News Time ·ÄÄ·Ä≠·ÄØ Price Index ·Äõ·Ä≤·Ä∑ Timezone ·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤·Äï·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        df_news['Start_dt'] = df_news['Start_dt'].dt.tz_localize('UTC').dt.tz_convert(price_index.tz) 
    else:
        # Price Index ·Äô·Äæ·Ä¨ Timezone ·Äô·Äõ·Äæ·Ä≠·Äõ·ÄÑ·Ä∫ News Time ·ÄÄ·Äî·Ä± Timezone ·ÄÄ·Ä≠·ÄØ ·Äñ·Äö·Ä∫·Äë·ÄØ·Äê·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        df_news['Start_dt'] = df_news['Start_dt'].dt.tz_localize(None)

    # 4. Binary Search (searchsorted) ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Location ·Äõ·Äæ·Ä¨·Äï·Äº·ÄÆ·Ä∏ Feature ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    
    for news_time in df_news['Start_dt']:
        
        # searchsorted ·Äû·Ää·Ä∫ Integer Index Location ·ÄÄ·Ä≠·ÄØ ·Äï·Äº·Äî·Ä∫·Äï·Ä±·Ä∏·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫ get_loc() ·Åè method argument error ·ÄÄ·Ä≠·ÄØ ·Äõ·Äæ·Ä±·Ä¨·ÄÑ·Ä∫·Äõ·Äæ·Ä¨·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫
        loc = price_index.searchsorted(news_time, side='left') 
        
        # loc ·Äû·Ää·Ä∫ Index ·Äõ·Ä≤·Ä∑ ·Ä°·Äï·Äº·ÄÑ·Ä∫·Äò·ÄÄ·Ä∫·ÄÄ·Ä≠·ÄØ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äû·ÄΩ·Ä¨·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫ ·Ä°·Äî·Ä¨·Ä∏·Äû·Äê·Ä∫ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        if loc >= len(price_index):
            continue

        # News Event ·Äô·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Äô·ÄÆ bars
        start_index = max(0, loc - ahead_bars)
        
        # News Event ·Ä°·Äï·Äº·ÄÆ·Ä∏ bars
        end_index = min(len(price_index), loc + behind_bars)
        
        # 1 Value ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ Range ·Ä°·Äê·ÄΩ·ÄÑ·Ä∫·Ä∏ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (Integer Location ·Äñ·Äº·ÄÑ·Ä∑·Ä∫)
        df_price.iloc[start_index:end_index, df_price.columns.get_loc('is_high_impact_news')] = 1

    return df_price

In [111]:
file_path_from_data = "./drive/MyDrive/data/raw/EURUSD_2020_all.csv"
file_path_to_data = "./drive/MyDrive/data/raw/EURUSD_2020_all_5.csv"

df_all_5m_data = convert_1m_csv_to_5m_df(file_path_from_data)
df_all_5m_data.to_csv(file_path_to_data)


‚úÖ Conversion successful! 5-Min rows: 149715


In [112]:
cf = EnvConfig('./drive/MyDrive/configure.json')

raw = pd.read_csv(file_path_to_data)

df = patch_missing_data(raw,cf=cf)

shape of  column: 6
Week 2020-01-05 00:00:00+00:00 has 576/1440 bars
Week 2020-01-12 00:00:00+00:00 has 1439/1440 bars
Week 2020-03-29 00:00:00+00:00 has 1433/1440 bars
Week 2020-09-20 00:00:00+00:00 has 1439/1440 bars
Week 2020-10-04 00:00:00+00:00 has 1436/1440 bars
Week 2020-12-27 00:00:00+00:00 has 1152/1440 bars
Week 2021-01-03 00:00:00+00:00 has 1142/1440 bars
Week 2021-05-23 00:00:00+00:00 has 1439/1440 bars
Week 2021-05-30 00:00:00+00:00 has 1437/1440 bars
Week 2021-06-06 00:00:00+00:00 has 1439/1440 bars
Week 2021-09-19 00:00:00+00:00 has 1439/1440 bars
Week 2021-10-10 00:00:00+00:00 has 1439/1440 bars
Week 2021-12-12 00:00:00+00:00 has 1439/1440 bars
Week 2021-12-26 00:00:00+00:00 has 1438/1440 bars
Week 2022-01-02 00:00:00+00:00 has 1428/1440 bars


In [113]:
# Broker Data (00:00 ·Äô·Äæ ·ÄÖ·Äû·Ä±·Ä¨) ·ÄÄ·Ä≠·ÄØ ·ÄÅ·Ä±·Ä´·Ä∫·ÄÜ·Ä≠·ÄØ·Äû·Ää·Ä∑·Ä∫·Ä°·ÄÅ·Ä´
# GMT+2/GMT+3 ·ÄÄ·Ä≠·ÄØ ·Ä°·Äú·Ä≠·ÄØ·Ä°·Äú·Äª·Ä±·Ä¨·ÄÄ·Ä∫ ·ÄÄ·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·ÄΩ·Äö·Ä∫·Äï·Ä±·Ä∏·Äõ·Äî·Ä∫
axiory_tz = 'Europe/Kiev'  

dft = add_time_feature(df, cf=cf, source_tz=axiory_tz)

In [114]:
df = tech_indicators(dft, cf=cf)

In [116]:
file_path_from_news = "./drive/MyDrive/data/raw/2020_2021_News.csv"
news = pd.read_csv(file_path_from_news)
df_news = add_news_feature(df, news)
df_news.to_csv('final.csv')

In [None]:


cf = EnvConfig('./drive/MyDrive/configure.json')

raw = pd.read_csv(file_path_to_data)

df = patch_missing_data(raw,cf=cf)

symbol = 'EURUSD'
# Broker Data (00:00 ·Äô·Äæ ·ÄÖ·Äû·Ä±·Ä¨) ·ÄÄ·Ä≠·ÄØ ·ÄÅ·Ä±·Ä´·Ä∫·ÄÜ·Ä≠·ÄØ·Äû·Ää·Ä∑·Ä∫·Ä°·ÄÅ·Ä´
# GMT+2/GMT+3 ·ÄÄ·Ä≠·ÄØ ·Ä°·Äú·Ä≠·ÄØ·Ä°·Äú·Äª·Ä±·Ä¨·ÄÄ·Ä∫ ·ÄÄ·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·ÄΩ·Äö·Ä∫·Äï·Ä±·Ä∏·Äõ·Äî·Ä∫
axiory_tz = 'Europe/Kiev'  

dft = add_time_feature(df, cf=cf, source_tz=axiory_tz)
dft.head(3)



cf = EnvConfig('./drive/MyDrive/configure.json')

df = tech_indicators(dft, cf=cf)
df.to_csv('tech.csv')

Unnamed: 0,Id,Start,Name,Impact,Currency
0,be36572c-0dbf-4b11-aca1-bf854160ba4a,01/03/2020 13:00:00,Harmonized Index of Consumer Prices (YoY),HIGH,EUR
1,5b125588-01ca-4aca-a365-8902ef7839f1,01/03/2020 15:00:00,ISM Manufacturing PMI,HIGH,USD
2,3fa72395-467b-48b9-b0a3-29529d398237,01/03/2020 19:00:00,FOMC Minutes,HIGH,USD
3,2d6440df-69b8-4322-b175-002ae17088e3,01/07/2020 10:00:00,Core Harmonized Index of Consumer Prices (YoY),HIGH,EUR


In [91]:
df.head(1)

Unnamed: 0,open,high,low,close,vol,weekday,day,week,month,year,...,atr_base,log_returns,price_norm,spread_ratio,body_ratio,rsi,macd_hist,adx,stoch_k,ma_cross
2020-01-01 22:00:00+00:00,1.12117,1.12128,1.12087,1.12114,31.0,2,1,1,1,2020,...,0.0,0.0,0.0,0.0,-0.073171,0.0,0.0,0.0,0.0,0.0


In [92]:
a = df.index.get_loc('01/03/2020 13:00:00')
a # 2020-01-03 12:50:00+00:00,

468

In [None]:
df_news = add_news_feature(df, news)
df_news.to_csv('df_new.csv')

In [103]:
df_news.head(4)
df_news.to_csv('df_new.csv')

In [97]:
import pandas as pd
print(pd.__version__)

2.2.3


In [90]:
for news_time in news['Start']:
    print(type(news_time))
    print(news_time)

<class 'str'>
01/03/2020 13:00:00
<class 'str'>
01/03/2020 15:00:00
<class 'str'>
01/03/2020 19:00:00
<class 'str'>
01/07/2020 10:00:00
<class 'str'>
01/07/2020 10:00:00
<class 'str'>
01/07/2020 15:00:00
<class 'str'>
01/10/2020 13:30:00
<class 'str'>
01/10/2020 13:30:00
<class 'str'>
01/14/2020 13:30:00
<class 'str'>
01/14/2020 13:30:00
<class 'str'>
01/16/2020 07:00:00
<class 'str'>
01/16/2020 12:30:00
<class 'str'>
01/16/2020 18:00:00
<class 'str'>
01/20/2020 18:30:00
<class 'str'>
01/21/2020 09:00:00
<class 'str'>
01/21/2020 10:00:00
<class 'str'>
01/23/2020 12:45:00
<class 'str'>
01/23/2020 12:45:00
<class 'str'>
01/23/2020 13:30:00
<class 'str'>
01/24/2020 08:30:00
<class 'str'>
01/24/2020 08:30:00
<class 'str'>
01/24/2020 09:00:00
<class 'str'>
01/24/2020 10:30:00
<class 'str'>
01/28/2020 13:30:00
<class 'str'>
01/29/2020 19:00:00
<class 'str'>
01/29/2020 19:30:00
<class 'str'>
01/30/2020 13:00:00
<class 'str'>
01/30/2020 13:30:00
<class 'str'>
01/31/2020 10:00:00
<class 'str'>


In [104]:
def render_to_file(**kwargs):
    log_header                  =   kwargs.get("log_header",False)
    log_filename                =   kwargs.get("log_filename","")
    printout                    =   kwargs.get("printout",False)
    balance                     =   kwargs.get("balance")
    balance_initial             =   kwargs.get("balance_initial")
    transaction_close_this_step =   kwargs.get("transaction_close_this_step",[])
    done_information            =   kwargs.get("done_information","")
    profit                      =   balance - balance_initial

    tr_lines                    =   ""
    tr_lines_comma              =   ""
    _header                     =   ""
    _header_comma               =   ""
    if log_header:
        _header = f'{"Ticket":>8} {"Type":>4} {"ActionStep":16} \
                    {"ActionPrice":>12} {"CloseStep":8} {"ClosePrice":>12} \
                    {"OpenBal":>12} {"CloseBal":>12} {"Status":8} {"Info":>8} {"PIPS":>6} {"SL":>6} {"PT":>6} {"DeltaStep":8}\n'


        _header_comma = f'{"Ticket,Type,ActionTime,ActionStep,ActionPrice,CloseTime,ClosePrice, OpenBal, CloseBal, Status, Info, PIPS,SL,PT,CloseStep,DeltaStep"}\n'
    if transaction_close_this_step:
        for _tr in transaction_close_this_step:
            if _tr["CloseStep"] >=0:
                tr_lines += f'{_tr["Ticket"]:>8} {_tr["Type"]:>4} {_tr["ActionStep"]:16} \
                    {_tr["ActionPrice"]:.5f} {_tr["CloseStep"]:8} {_tr["ClosePrice"]:.5f} \
                    {_tr["OpenBal"]:.2f} {_tr["CloseBal"]:.2f} {_tr["Status"]:8}  {_tr["Info"]:>8}  {_tr["PIPS"]:4.0f} {_tr["SL"]:4.0f} {_tr["PT"]:4.0f} {_tr["DeltaStep"]:8}\n'

                tr_lines_comma += f'{_tr["Ticket"]},{_tr["Type"]},{_tr["ActionTime"]},{_tr["ActionStep"]}, \
                    {_tr["ActionPrice"]},{_tr["CloseTime"]},{_tr["ClosePrice"]}, \
                    {_tr["OpenBal"]},{_tr["CloseBal"]}, {_tr["Status"]},{_tr["Info"]},{_tr["PIPS"]},{_tr["SL"]},{_tr["PT"]},{_tr["CloseStep"]},{_tr["DeltaStep"]}\n'

    log = _header_comma + tr_lines_comma
    # log = f"Step: {current_step}   Balance: {balance}, Profit: {profit} \
    #     MDD: {max_draw_down_pct}\n{tr_lines_comma}\n"
    if done_information:
        log += done_information
    if log:
        # os.makedirs(log_filename, exist_ok=True)
        dir_path = os.path.dirname(log_filename)
        if dir_path and not os.path.exists(dir_path):
            os.makedirs(dir_path, exist_ok=True)
        with open(log_filename, 'a+') as _f:
            _f.write(log)
            _f.close()

    tr_lines = _header + tr_lines
    if printout and tr_lines:
        print(tr_lines)
        if done_information:
            print(done_information)

In [None]:
import os
import json

import logging
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# from stable_baselines3.common.callbacks import LearningRateSchedule
from stable_baselines3 import PPO
import gymnasium as gym
from gymnasium import spaces
import torch.nn.functional as F
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import datetime

class ForexTradingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, file, cf, asset, logger_show=False, save_plot=False, scaler=None):
        # scaler parameter ·ÄÄ·Ä≠·ÄØ ·Äë·Äï·Ä∫·Äë·Ää·Ä∑·Ä∫·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã ·Åé·ÄÑ·Ä∫·Ä∏·Äû·Ää·Ä∫ Global Train Set ·Äê·ÄΩ·ÄÑ·Ä∫ Fit ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ Scaler ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        # 'scaler' ·Äû·Ää·Ä∫ Global Train Set ·Äê·ÄΩ·ÄÑ·Ä∫ Fit ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ StandardScaler instance ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        self.scaler = scaler
        if self.scaler is None:
             raise ValueError("A fitted StandardScaler instance must be provided to the Environment.")  
         
        super(ForexTradingEnv, self).__init__()
        # ·ÄÄ·Ä≠·Äî·Ä∫·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·ÄÖ·Äê·ÄÑ·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self._initialize_parameters(file, cf, asset, logger_show, save_plot)
        
        # [NEW ACTION] Raw Data ·ÄÄ·Ä≠·ÄØ Scaler ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Transform ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        # OHLCV features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äï·Ä´·Äô·Äö·Ä∫ (·Ä•·Äï·Äô·Ä¨: open, high, low, close, vol, atr_base, log_returns, price_norm, etc.)
        # Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äî·Ä∫ features ·Äô·Äª·Ä¨·Ä∏·ÄÖ·Ä¨·Äõ·ÄÑ·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Ä´·Åã
        # ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ ·ÄÄ·Äª·ÄΩ·Äî·Ä∫·Äê·Ä±·Ä¨·Ä∫·Äê·Ä≠·ÄØ·Ä∑·Äû·Ää·Ä∫ OHLCV ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Indicator Features ·Ä°·Ä¨·Ä∏·Äú·ÄØ·Ä∂·Ä∏·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äô·Ää·Ä∫·Äü·ÄØ ·Äö·Ä∞·ÄÜ·Äï·Ä´·Äô·Ää·Ä∫·Åã
        self._scale_data()
        
        
        # Action ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Observation Spaces ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self._initialize_spaces()
        # Environment ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÖ·Äï·Äº·ÄØ·Ä°·ÄÅ·Äº·Ä±·Ä°·Äî·Ä±·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äú·Ää·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self.reset()


    def _scale_data(self):
        """Raw Data (self.data) ·Äô·Äæ features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Global Scaler ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Transform ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏"""
        # [self.features] ·Äê·ÄΩ·ÄÑ·Ä∫ OHLCV ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Indicator Features ·Ä°·Ä¨·Ä∏·Äú·ÄØ·Ä∂·Ä∏ ·Äï·Ä´·Äù·ÄÑ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        # [NOTE]: 'time' ·ÄÄ·Ä≤·Ä∑·Äû·Ä≠·ÄØ·Ä∑·Äû·Ä±·Ä¨ Non-Numeric features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ self.features ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äô·Äï·Ä´·Äù·ÄÑ·Ä∫·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏ ·Äû·Ä±·ÄÅ·Äª·Ä¨·Äï·Ä´·ÄÖ·Ä±·Åã
        if not self.scaler.scale_.any():
             logger.warning("Scaler is not properly fitted. Continuing with raw data.")
             return
             
        # Scaled Features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Original DataFrame ·Äê·ÄΩ·ÄÑ·Ä∫ ·Ä°·ÄÖ·Ä¨·Ä∏·Äë·Ä≠·ÄØ·Ä∏·Äû·Ää·Ä∫·Åã
        # Scaled Data ·Äê·ÄΩ·ÄÑ·Ä∫ NaN/Inf ·Äô·Äñ·Äº·ÄÖ·Ä∫·ÄÖ·Ä±·Äõ·Äî·Ä∫ Data Frame ·ÄÄ·Ä≠·ÄØ ·ÄÄ·Äº·Ä≠·ÄØ·Äê·ÄÑ·Ä∫·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·Äë·Ä¨·Ä∏·Äû·ÄÑ·Ä∑·Ä∫·Äï·Ä´·Äû·Ää·Ä∫·Åã
        self.data[self.features_scaled] = self.scaler.transform(self.data[self.features_scaled])
        # logger.info(f"Data scaled successfully using fitted StandardScaler.")
        
    # ·ÄÄ·Ä≠·Äî·Ä∫·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·ÄÖ·Äê·ÄÑ·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def _initialize_parameters(self, file, cf, asset, logger_show, save_plot):
        # Params to variables
        self.csv_file               =   file
        self.cf                     =   cf
        self.symbol_col             =   asset
        self.features_scaled        =   self.cf.env_parameters('features_scaled') # Time-Series Features List
        self.features_unscaled      =   self.cf.env_parameters('features_unscaled')
        self.features_filter        =   self.cf.env_parameters('features_filter')
        # Scaled Data Frame ·Åè Feature List
        self.obs_features           =   self.features_scaled + self.features_unscaled
        self.sequence_length        =   self.cf.data_processing_parameters("sequence_length") # Transformer Lookback Window (100)
        self.logger_show            =   logger_show
        self.save_plot              =   save_plot

        self.data_raw = pd.read_csv(file)
        if 'time' in self.data_raw.columns:
            self.data_raw = self.data_raw.set_index(pd.to_datetime(self.data_raw['time'], utc=True)).drop(columns=['time'])
        
        # self.data ·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äî·Ä∫ Copy ·Äö·Ä∞·Äï·Ä´·Äô·Ää·Ä∫·Åã
        self.data = self.data_raw.copy()
            
        # We use sequence transformer, so max steps will be this
        self.max_steps              =   len(self.data) - self.sequence_length - 1

        # Configs to variables
        # Agent ·ÄÄ Action ·ÄÄ Continuous Action ·ÄÄ·Ä≠·ÄØ Discrete Action ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä±·Ä∏·Äû·Ä±·Ä¨ threshold
        self.action_threshold       =   self.cf.env_parameters('action_threshold')
        self.balance_initial        =   self.cf.env_parameters('balance')

        # position close ·Äô·Äñ·Äº·ÄÖ·Ä∫·Äû·Ä±·Ä∏·Äõ·ÄÑ·Ä∫
        # buy ·Äë·Ä¨·Ä∏·Äï·Äº·ÄÆ·Ä∏ price up ·Äñ·Äº·ÄÖ·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ reward ·Äï·Ä±·Ä∏·Åã sell ·Äë·Ä¨·Ä∏·Äï·Äº·ÄÆ·Ä∏ price down ·Äñ·Äº·ÄÖ·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ reward ·Äï·Ä±·Ä∏
        # position management ·Äô·Äæ·Ä¨·Äú·Ää·Ä∫·Ä∏ ·Äû·ÄØ·Ä∂·Ä∏·Åã
        # buy ·Äô·Äæ·Ä¨ ·Äô·Äº·Äê·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Äï·Ä±·Ä´·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Äï·Ä±·Ä´·Ä∫·Äõ·ÄΩ·Ä±·Ä∑·Åã  ·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Äï·Ä±·Ä´·Ä∫·Äê·ÄÑ·Ä∫,
        # sell ·Äô·Äæ·Ä¨ ·Äô·Äº·Äê·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑·Åã ·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Äï·Ä±·Ä´·Ä∫·Äê·ÄÑ·Ä∫ sl ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·ÄÅ·Äª
        self.good_position_reward_scale = self.cf.env_parameters("good_position_reward_scale") # ·Ä•·Äï·Äô·Ä¨: 0.01
        # ·Äõ·Ää·Ä∫·Äõ·ÄΩ·Äö·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫ ·ÅÇ: SL/PT Trailing ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äê·Äî·Ä∫·Äñ·Ä≠·ÄØ·Ä∏ (Move Step Size)
        self.trailing_distance = self.cf.env_parameters("trailing_stop_distance_points")

        # ·Ä°·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä≤·Ä∑·Ä°·Äô·Äº·Äê·Ä∫ ·Äô·Äª·Äæ·Äê·Äô·Äæ·ÄØ·Äõ·Äæ·Ä≠·Äê·Ä≤·Ä∑ trading performance ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äï·Ä±·Ä∏·Äê·Ä≤·Ä∑ bonus reward 0.01
        # self.consistency_reward = self.cf.env_parameters("consistency_reward")
        self.stop_loss = self.cf.symbol(self.symbol_col, "stop_loss_max")
        self.profit_taken = self.cf.symbol(self.symbol_col, "profit_taken_max")
        self.point = self.cf.symbol(self.symbol_col, "point")
        self.transaction_fee = self.cf.symbol(self.symbol_col, "transaction_fee")
        self.over_night_penalty = self.cf.symbol(self.symbol_col, "over_night_penalty")
        self.max_current_holding = self.cf.symbol(self.symbol_col, "max_current_holding")
        # Drawdown Penalty Factor
        self.drawdown_penalty_factor = self.cf.env_parameters("drawdown_penalty_factor")
        self.margin_requirement = self.cf.env_parameters('margin_requirement')


    # Action ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Observation Spaces ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def _initialize_spaces(self):
        # Continuous actions: [1 -> 0.5] LONG | [0.5 -> -0.5] HOLD |[-0.5 -> -1] SHORT
        self.action_space = spaces.Box(
            low=-1,
            high=1,
            shape=(1,),
            dtype=np.float32
        )

        # [MODIFIED] Transformer Observation Space: Time Series (100) + Context (4)
        N_FEATURES_TS = len(self.obs_features)
        N_FEATURES_CONTEXT = 4 # [Equity, Drawdown, Open_Pos_Ratio, Time_Context]
        
        # [MODIFIED] Observation Space (Time Series Sequence Only)
        # Transformer ·Äû·ÄØ·Ä∂·Ä∏·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ features ·Äê·ÄΩ·Ä±·Äõ·Ä≤·Ä∑ previous sequence length candle ·ÄÄ·Ä≠·ÄØ·Äï·Ä´ ·Äê·Äï·Äº·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·Ää·Ä∫·Ä∏·ÄÄ·Äº·Ää·Ä∑·Ä∫
        obs_shape = (self.sequence_length, N_FEATURES_TS + N_FEATURES_CONTEXT)
        
        self.observation_space = spaces.Box(
            low=-np.inf, 
            high=np.inf, # Scaled Data ·Äô·Äª·Ä¨·Ä∏·Äû·Ää·Ä∫ Theoretical Inf/ -Inf ·Äõ·Äæ·Ä≠·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫ np.inf ·ÄÄ·Ä≠·ÄØ ·Äû·ÄØ·Ä∂·Ä∏·Äï·Ä´
            shape=obs_shape,
            dtype=np.float32
        )

    # Environment ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÖ·Äï·Äº·ÄØ·Ä°·ÄÅ·Äº·Ä±·Ä°·Äî·Ä±·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äú·Ää·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def reset(self, *, seed = None, options = None):
        super().reset(seed=seed, options=options)

        self.ticket_id          =   0
        self.ttl_rewards        =   0 # total rewards

        self.balance            =   self.balance_initial
        self.positions          =   []

        # equity tracking
        self.equity_curve       =   [self.balance_initial] # Starting with initial balance
        # ·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äê·Ä≤·Ä∑ eq value
        self.peak_equity        =   self.balance_initial # Start with initial balance as peak

        self.max_drawdown       =   0.0
        self.current_drawdown   =   0.0

        # transformer ·Ä°·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄØ·Äë·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∑
        self.current_step       =   self.sequence_length
        logger.info(f"--- Environment reset. Starting at step {self.current_step} --total rewards: {self.ttl_rewards}")

        observation             =   self._next_observation()
        info                    =   {}
        return  observation, info


# AI model ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ market condition ·ÄÄ·Ä≠·ÄØ·ÄÄ·Ä≠·ÄØ·Äö·Ä∫·ÄÖ·Ä¨·Ä∏·Äï·Äº·ÄØ·Äê·Ä≤·Ä∑ observation data ·ÄÄ·Ä≠·ÄØ ·Äï·Äº·ÄÑ·Ä∫·ÄÜ·ÄÑ·Ä∫·Äï·Ä±·Ä∏·Äñ·Ä≠·ÄØ·Ä∑·Äñ·Äº·ÄÖ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫·Åã
    def _next_observation(self):
        
        # 1. Time Series Observation (Scaled Data)
        obs_ts = self.data.iloc[
            self.current_step - self.sequence_length: self.current_step
        ][self.obs_features].values # Shape: (100, N_Features_TS)

        # 2. Account State (Non-Time-Series / Context Vector)
        current_equity = self._calculate_current_equity()
        open_positions_count = sum(1 for p in self.positions if p['Status'] == 0)

        obs_context = np.array([
            current_equity / self.balance_initial, # 1. Normalized Equity
            self.current_drawdown,                 # 2. Current Drawdown (Percentage)
            open_positions_count / self.max_current_holding, # 3. Open Positions Ratio
            self.data.iloc[self.current_step]['hour_cos']   # 4. Time Context (Scaled)
        ], dtype=np.float32) # Shape: (4,)

        # 3. Final Observation Construction (Time Series + Context)
        
        # Context features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Sequence Length (100) ·Ä°·Äú·Ä≠·ÄØ·ÄÄ·Ä∫ ·Äñ·Äº·Äî·Ä∑·Ä∫·ÄÄ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (Broadcasting)
        obs_context_expanded = np.tile(obs_context, (self.sequence_length, 1)) # Shape: (100, 4)
        
        # Horizontal Stack (Sequence length, N_Features_TS + N_Features_Context)
        obs_final = np.hstack([obs_ts, obs_context_expanded])

        # 4. PyTorch/Device Conversion and Validation
        try:
            # NumPy array ‚Üí PyTorch tensor ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äô·Äö·Ä∫
            obs_tensor = torch.tensor(obs_final, dtype=torch.float32).to(device)
            # Data Validation ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
            if torch.isnan(obs_tensor).any() or torch.isinf(obs_tensor).any():
                logger.error(f"Invalid observation (NaN/Inf) at step {self.current_step}")
                raise ValueError(f"Invalid observation (NaN/Inf) at step {self.current_step}")
            return obs_tensor.cpu().numpy()
            
        except NameError:
            # Torch ·ÄÄ·Ä≠·ÄØ ·Äô·Äû·ÄØ·Ä∂·Ä∏·Äï·Ä´·ÄÄ NumPy ·ÄÄ·Ä≠·ÄØ·Äû·Ä¨ ·Äï·Äº·Äî·Ä∫·Äï·Ä±·Ä∏·Äï·Ä´·Åã
            if np.isnan(obs_final).any() or np.isinf(obs_final).any():
                logger.error(f"Invalid observation (NaN/Inf) at step {self.current_step}")
                raise ValueError(f"Invalid observation (NaN/Inf) at step {self.current_step}")
            return obs_final # Final NumPy array
        
            
    def _ray_mask(self, a, c, bounds):
        """
        Ray Mask ·Äú·ÄØ·Äï·Ä∫·Äï·Ä´·Åã
        a: ·Äô·Ä∞·Äõ·ÄÑ·Ä∫·Ä∏ action (np.array)
        c: ·Äû·ÄÄ·Ä∫·ÄÜ·Ä≠·ÄØ·ÄÑ·Ä∫·Äõ·Ä¨ ·Ä°·ÄÖ·ÄØ ·Äõ·Ä≤·Ä∑ ·Ä°·Äú·Äö·Ä∫·Äó·Äü·Ä≠·ÄØ
        A_r_boundary_func: lambda_A_r ·Äê·ÄΩ·ÄÄ·Ä∫·Äê·Ä≤·Ä∑ func
        A_boundary_func: lambda_A ·Äê·ÄΩ·ÄÄ·Ä∫·Äê·Ä≤·Ä∑ func
        """
        if np.allclose(a, c):
            return c  # ·Ä°·Äú·Äö·Ä∫·Äô·Äæ·Ä¨ ·ÄÜ·Ä≠·ÄØ ·Äô·Äõ·ÄΩ·Äæ·Ä±·Ä∑

        direction = a - c
        norm_dir = direction / np.linalg.norm(direction)

        lambda_A_r = bounds[1] - c if norm_dir > 0 else c - bounds[0]
        lambda_A = 1 - c if norm_dir > 0 else c - (-1)


        if lambda_A_r <= 0 or lambda_A <= 0:
            return c  # ·Ä°·Äô·Äæ·Ä¨·Ä∏ ·Äõ·Äæ·Ä±·Ä¨·ÄÑ·Ä∫

        scale = lambda_A_r / lambda_A
        a_r = c + scale * direction
        return np.clip(a_r, -1, 1)  # Action space ·ÄÄ·Äî·Ä∫·Ä∑·Äû·Äê·Ä∫


    def _get_action_name(self, _action, ma_first, ma_slow):

        c = 0.0  # ·Ä°·Äú·Äö·Ä∫·Äó·Äü·Ä≠·ÄØ (hold)
        if ma_first > ma_slow:  # Uptrend: buy only [0, 1]
            bounds = [0, 1]
        else:  # Downtrend: sell only [-1, 0]
            bounds = [-1, 0]

        a_masked = self._ray_mask(_action, c, bounds)

        """Convert continuous action to discrete action name"""
        if a_masked >= self.action_threshold:
            return "BUY"
        elif a_masked <= -self.action_threshold:
            return "SELL"
        else:
            return "HOLD"

    def step(self, action):
        # self.data ·Äû·Ää·Ä∫ Index ·Äê·ÄΩ·ÄÑ·Ä∫ 'time' ·ÄÄ·Ä≠·ÄØ ·Äë·Ä¨·Ä∏·Äõ·Äæ·Ä≠·Äï·Äº·ÄÆ·Ä∏ drop ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫·Åä Index ·Äô·Äæ time ·ÄÄ·Ä≠·ÄØ ·Äö·Ä∞·Äõ·Äî·Ä∫·Äú·Ä≠·ÄØ·Äû·Ää·Ä∫·Åã
        current_row_raw = self.data_raw.iloc[self.current_step]
        
        # Unscaled Price Features
        _o, _h, _l, _c, ma_fast, ma_slow = current_row_raw[['open', 'high', 'low', 'close', 'ma_fast', 'ma_slow']]
        
        _t = self.data.index[self.current_step] # Get time from index    
            
        reward                      =   0 # ·Äí·ÄÆ step ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ reward
        position_reward             =   0 # Position ·Äï·Ä≠·Äê·Ä∫·Äõ·ÄÑ·Ä∫ ·Äõ·Äê·Ä≤·Ä∑ reward
        action_hold_reward          =   0 # Hold action ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ reward/penalty

        _msg                        =   []
        _action                     =   action[0] # action value eg. [0.75]
        open_position               =   0
        for position in self.positions:
            if position['Status']   ==  0:
                position_reward, closed, _msg   =   self._calculate_reward(position)
                if not closed: open_position += 1  # Count what we already knew
                reward += position_reward

        # Continuous actions: [1 -> 0.5] LONG | [0.5 -> -0.5] HOLD |[-0.5 -> -1] SHORT
        action_name = self._get_action_name(action, ma_fast, ma_slow)

        if open_position < self.max_current_holding and action_name in ['BUY', 'SELL']:
            self.ticket_id  +=  1

            # Real trading ·Äô·Äæ·Ä¨ margin requirement ·Äõ·Äæ·Ä≠·Äû·Äú·Ä≠·ÄØ·Äô·Äª·Ä≠·ÄØ·Ä∏
            # Position ·Äñ·ÄΩ·ÄÑ·Ä∑·Ä∫·Äõ·ÄÑ·Ä∫ capital ·ÄÅ·Äª·ÄØ·Äï·Ä∫·ÄÑ·Äº·Ä¨·Ä∏·Äî·Ä±·Äõ·Äê·Äö·Ä∫
            # Position ·Äï·Ä≠·Äê·Ä∫·Äê·Ä≤·Ä∑·Ä°·ÄÅ·Ä´ ·Äï·Äº·Äî·Ä∫·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏·Äë·Ää·Ä∑·Ä∫·Äï·Ä±·Ä∏·Äê·Äö·Ä∫
            self.balance -= self.margin_requirement # hold up, this will make sure model can not open a lot of

            position        =   {
                "Ticket"        :   self.ticket_id,
                "Symbol"        :   self.symbol_col,
                "ActionTime"    :   _t,
                "Type"          :   action_name,
                "Lot"           :   1,
                "ActionPrice"   :   _c,
                "SL"            :   self.stop_loss,
                "PT"            :   self.profit_taken,
                "MaxDD"         :   0,
                "Swap"          :   0.0,
                "CloseTime"     :   "",
                "ClosePrice"    :   0.0,
                "Point"         :   self.point,
                "Reward"        :   self.transaction_fee,
                "DateDuration"  :   _t.date().isoformat(),
                "Status"        :   0, # 0 is Position is currently OPEN and active
                #"PIPS"          :   self.transaction_fee, # Price Interest Point (profit/loss ·ÄÄ·Ä≠·ÄØ measure ·Äú·ÄØ·Äï·Ä∫·Äê·Ä≤·Ä∑ unit)
                "PIPS"          :   0,
                "ActionStep"    :   self.current_step,
                "CloseStep"     :   -1, # Step number when position closed, not close yet is -1
                "DeltaStep"     :   0,
                "OpenBal"       :   self.balance,
                "CloseBal"       :   0,
                "HighestPrice"  :   _c,
                "LowestPrice"   :   _c,
            }

            self.positions.append(position)
            # do not use transaction_fee penalty
            # reward = self.transaction_fee #open cost
            # model ·ÄÄ ·Ä°·Äú·ÄΩ·Äî·Ä∫·Ä°·ÄÄ·Äª·ÄΩ·Ä∂ position ·Äê·ÄΩ·Ä± ·Äô·Äñ·ÄΩ·ÄÑ·Ä∑·Ä∫·Äô·Ä≠·Ä°·Ä±·Ä¨·ÄÑ·Ä∫ ·Äë·Ä≠·Äî·Ä∫·Ä∏·ÄÅ·Äª·ÄØ·Äï·Ä∫·Äê·Ä≤·Ä∑ mechanism ·Äñ·Äº·ÄÖ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫·Åã
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]} {position["Type"]} Rwd:{position["PIPS"]} SL:{position["SL"]} PT:{position["PT"]}')

        # HOLD Penalty ·ÄÄ·Ä≠·ÄØ ·Ä°·Äú·ÄΩ·Äî·Ä∫·Äû·Ä±·Ä∏·ÄÑ·Äö·Ä∫·Äû·Ä±·Ä¨ ·Äê·Äî·Ä∫·Äñ·Ä≠·ÄØ·Ä∏
        # (·Ä•·Äï·Äô·Ä¨: -0.0001) ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã ·Ä°·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·ÄÜ·ÄØ·Ä∂·Ä∏·Äô·Äæ·Ä¨
        # Trading ·Äô·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Penalty ·Äô·Äï·Ä±·Ä∏·Äò·Ä≤ action_hold_reward = 0 ·Äë·Ä¨·Ä∏·Äï·Ä´·Åã
        elif open_position < self.max_current_holding and action_name == "HOLD":
            action_hold_reward  =   0  # no open any position, encourage open position
        else:
            action_hold_reward  =   0




        reward              +=  action_hold_reward

        # Move to the next time step
        self.current_step   +=  1

        # check if episode is done
        terminated          =   (self.balance <= 0)
        truncated           =   (self.current_step > self.max_steps)

        # get next observation
        obs                 =   self._next_observation()
        _msg.append(f'---idle----step:{self.current_step}, RF:{action_name} Action:{_action} Balance: {self.balance} reward:{reward} total_rewards:{self.ttl_rewards} position_reward:{position_reward} action_hold_reward:{action_hold_reward}')


        current_equity = self._calculate_current_equity()
        self.equity_curve.append(current_equity)
        self._calculate_drawdown()  # This updates peak_equity and drawdowns

        # =========================================================================
        # START: Drawdown Penalty Logic
        # =========================================================================
        # self.current_drawdown ·Äû·Ää·Ä∫ Percentage (0.0 ·Äô·Äæ 1.0) ·Äñ·Äº·ÄÖ·Ä∫·Äû·Ää·Ä∫·Åã


        drawdown_penalty = self.current_drawdown * self.drawdown_penalty_factor
        # Reward ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äî·ÄØ·Äê·Ä∫·Äï·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        reward -= drawdown_penalty

        # Log the penalty for debugging
        _msg.append(f'Drawdown Penalty: -{drawdown_penalty:.4f} (DD:{self.current_drawdown:.4f})')
        # =========================================================================
        # END: Drawdown Penalty Logic
        # =========================================================================
        # Drawdown Penalty ·Äî·ÄØ·Äê·Ä∫·Äï·Äº·ÄÆ·Ä∏·Äô·Äæ·Äû·Ä¨ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ Reward ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
        self.ttl_rewards += reward  # <--- ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ ·Äï·Äº·Äî·Ä∫·Äë·Ää·Ä∑·Ä∫·Äï·Ä´

        info = {}

        if terminated or truncated:
            buy_positions = [p for p in self.positions if p["Type"] == "BUY"]
            sell_positions = [p for p in self.positions if p["Type"] == "SELL"]

            buy_count = len(buy_positions)
            sell_count = len(sell_positions)
            total_positions = len(self.positions)

            # Calculate win rates
            buy_wins = len([p for p in buy_positions if p["PIPS"] > 0])
            sell_wins = len([p for p in sell_positions if p["PIPS"] > 0])

            buy_win_rate = buy_wins / buy_count if buy_count > 0 else 0
            sell_win_rate = sell_wins / sell_count if sell_count > 0 else 0

            _m = f'--- Positions: {total_positions} (Buy:{buy_count}, Sell:{sell_count}) | '
            _m += f'WinRates: Buy:{buy_win_rate:.1%}, Sell:{sell_win_rate:.1%} | '
            _m += f'TotalRewards: {self.ttl_rewards} Balance: {self.balance}'

            logger.info(_m)
            _msg.append(_m)

            # Additional info
            if self.logger_show:
                for _m in _msg:
                    logger.info(_m)

            info["info"]                = _msg
            info["sharpe"]              = self._calculate_sharpe()  # ‚úÖ Now works! üí° 'sharpe_ratio' ·Äô·Äæ 'sharpe' ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã
            info["drawdown"]            = self.max_drawdown         # ‚úÖ Now accurate!'max_drawdown' ·Äô·Äæ 'drawdown' ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã
            info["current_equity"]      = current_equity            # ‚úÖ For debugging
            info["peak_equity"]         = self.peak_equity          # ‚úÖ For debugging
            info["equity_curve_length"] = len(self.equity_curve)    # ‚úÖ Monitor growth

        return obs, reward, terminated, truncated, info




    def _calculate_reward(self, position):
        _o, _h, _l, _c              = self.data_raw.iloc[self.current_step][['open', 'high', 'low', 'close']]
        _t                          = self.data.index[self.current_step]
        _msg                        =   []

        entry_price                 =   position['ActionPrice']
        direction                   =   position['Type']
        profit_target_price         =   entry_price + position['PT']/ self.point if direction == 'BUY' else entry_price - position['PT']/self.point
        stop_loss_price             =   entry_price + position['SL']/ self.point if direction == 'BUY' else entry_price - position['SL']/self.point
        closed                      =   False
        close_position_reward       =   0.0
        good_position_reward        =   0.0

        # Check for stoploss hit
        if (direction == 'BUY' and _l <= stop_loss_price) or (direction == 'SELL' and _h >= stop_loss_price):
            close_position_reward   =   position['SL'] # position sl ·ÄÄ minus value ·Äñ·Äº·ÄÖ·Ä∫·Äê·Äö·Ä∫

            position['CloseTime']   =   _t
            position['ClosePrice']  =   stop_loss_price
            position['Status']      =   1   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1
            position['CloseStep']   =   self.current_step
            position['PIPS']        =   close_position_reward - self.transaction_fee
            position['DeltaStep']   =   self.current_step - position['ActionStep']
            position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'

            self.balance            +=  self.margin_requirement + position['PIPS'] # return 100 is margin hold
            position['CloseBal']    =   self.balance
            closed                  =   True
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, SL:{position["SL"]}, DeltaStep:{position["DeltaStep"]}')

        elif (direction == 'BUY' and _h >= profit_target_price) or (direction == 'SELL' and _l <= profit_target_price):
            close_position_reward   =    position['PT'] # position tp ·ÄÄ plus value ·Äñ·Äº·ÄÖ·Ä∫·Äê·Äö·Ä∫

            position['CloseTime']   =   _t
            position['ClosePrice']  =   profit_target_price
            position['Status']      =   2   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1
            position['CloseStep']   =   self.current_step
            position['PIPS']        =   close_position_reward - self.transaction_fee
            position['DeltaStep']   =   self.current_step - position['ActionStep']
            position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'

            self.balance            +=  self.margin_requirement + position['PIPS'] # return 100 is margin hold
            position['CloseBal']    =   self.balance
            closed                  =   True
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, SL:{position["SL"]}, DeltaStep:{position["DeltaStep"]}')

        else:
            if self.current_step + 5 + self.sequence_length >= len(self.data):
                close_position_reward   =   (_c - position["ActionPrice"] if direction == 'BUY' else position["ActionPrice"] - _c)* self.point

                position['CloseTime']   =   _t
                position['ClosePrice']  =   _c
                position['Status']      =   3   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1, force close 2
                position['CloseStep']   =   self.current_step
                position['PIPS']        =   close_position_reward - self.transaction_fee
                position['DeltaStep']   =   self.current_step - position['ActionStep']
                position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'
                self.balance            +=  self.margin_requirement + position["PIPS"] # return 100 is margin hold
                position['CloseBal']    =   self.balance

                closed                  =   True
                _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, Cls:End, DeltaStep:{position["DeltaStep"]}')

            else:
                # =========================================================================
                # Real Trailing Stop Logic (·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
                # =========================================================================
                # 1. Highest/Lowest Price Update

                if direction == "BUY":
                  # Buy position ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫
                  if _c > position["HighestPrice"]:
                      position["HighestPrice"] = _c

                  # 2. New SL Target Price (Trailing Price) ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  # New_SL_Price = HighestPrice - (Trailing Distance Pips ·ÄÄ·Ä≠·ÄØ Price Change ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏)
                  trailing_price = position["HighestPrice"] - self.trailing_distance / self.point

                  # 3. SL ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÜ·ÄÑ·Ä∑·Ä∫·Äô·Äº·Äæ·ÄÑ·Ä∑·Ä∫·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  # ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ SL ·Äë·ÄÄ·Ä∫ ·Äï·Ä≠·ÄØ·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äô·Äæ·Äû·Ä¨ ·Äõ·ÄΩ·Ä±·Ä∑·Äï·Ä´
                  if trailing_price > stop_loss_price:

                      stop_loss_price = trailing_price
                      # SL_Price ·Ä°·Äû·ÄÖ·Ä∫·ÄÄ·Ä≠·ÄØ Points ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Äº·ÄÆ·Ä∏ position['SL'] ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
                      position["SL"] = (stop_loss_price - entry_price) * self.point
                    #   if position["SL"] > 0:
                    #       position["SL"]    =   -abs(position["SL"])
                      trailing_happened = True
                  else:
                      trailing_happened = False


                elif direction == "SELL":
                  # Sell position ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Ä°·Äî·Ä≠·Äô·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫
                  if _c < position["LowestPrice"]:
                      position["LowestPrice"] = _c

                  # New SL Target Price (Trailing Price) ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  trailing_price = position["LowestPrice"] + self.trailing_distance / self.point

                  # SL ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÜ·ÄÑ·Ä∑·Ä∫·Äô·Äº·Äæ·ÄÑ·Ä∑·Ä∫·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  if trailing_price < stop_loss_price:
                      stop_loss_price = trailing_price
                      # SL_Price ·Ä°·Äû·ÄÖ·Ä∫·ÄÄ·Ä≠·ÄØ Points ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Äº·ÄÆ·Ä∏ position['SL'] ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
                      position["SL"] = (entry_price - stop_loss_price) * self.point
                    #   if position["SL"] > 0:
                    #       position["SL"]    =   -abs(position["SL"])
                      trailing_happened = True
                  else:
                      trailing_happened = False

                # =========================================================================
                # Reward Logic (Trailing ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Bonus ·Äï·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
                # =========================================================================
                # Reward Sign ·ÄÄ·Ä≠·ÄØ ·Äö·ÄÅ·ÄÑ·Ä∫·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏ ·Äê·ÄΩ·ÄÄ·Ä∫·Äï·Ä´·Åã
                delta = _c - entry_price
                if direction == "BUY":
                    reward_sign = 1 if delta >= 0 else -1
                elif direction == "SELL":
                    reward_sign = -1 if delta >= 0 else 1

                good_position_reward = reward_sign * self.good_position_reward_scale

                # Trailing ·Ä°·Äô·Äæ·Äî·Ä∫·Äê·ÄÄ·Äö·Ä∫ ·Äñ·Äº·ÄÖ·Ä∫·Äû·ÄΩ·Ä¨·Ä∏·Äô·Äæ·Äû·Ä¨ Bonus Reward ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä´
                if trailing_happened:
                    good_position_reward += 0.001

                position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'
                position['CloseBal']    =   self.balance
                _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: NO_Close, PT:{position["PT"]}, SL:{position["SL"]}')

        return close_position_reward + good_position_reward, closed, _msg


    def _calculate_sharpe(self, risk_free_rate=0.0):
        """Calculate Sharpe ratio for the current episode"""
        if len(self.equity_curve) < 2:
            return 0.0

        returns = np.diff(self.equity_curve) / self.equity_curve[:-1]

        if np.std(returns) == 0:
            return 0.0

        sharpe = (np.mean(returns) - risk_free_rate) / np.std(returns)
        return float(sharpe * np.sqrt(288))  # Annualized (5-min bars ‚Üí 288/day)

    def _calculate_drawdown(self):
        """Update max drawdown during episode"""
        current_equity          =   self.equity_curve[-1]
        self.peak_equity        =   max(self.peak_equity, current_equity)
        self.current_drawdown   =   (self.peak_equity - current_equity) / self.peak_equity
        self.max_drawdown       =   max(self.max_drawdown, self.current_drawdown)


    def _calculate_current_equity(self):
        """Calculate total current equity (balance + unrealized P/L)"""
        total_equity = self.balance  # Start with cash balance

        # Add unrealized P/L from open positions
        for position in self.positions:
            if position['Status'] == 0:  # Only open positions
                current_price = self.data.iloc[self.current_step]["close"]
                entry_price = position['ActionPrice']

                if position['Type'] == 'BUY':
                    unrealized_pnl = (current_price - entry_price) * self.point
                else:  # Sell
                    unrealized_pnl = (entry_price - current_price) * self.point

                total_equity += unrealized_pnl

        return total_equity

    def render(self, mode='human', title=None, **kwargs):
        # Render the environment to the screen
        if mode in ('human', 'file'):
            log_header      =   True
            printout        =   False
            if mode == 'human':
                printout    =   True

            log_file = self.csv_file.replace("split/", "log/")
            pm = {
                "log_header": log_header,
                "log_filename": log_file,
                "printout": printout,
                "balance": self.balance,
                "balance_initial": self.balance_initial,
                "transaction_close_this_step": self.positions,
                "done_information": False
            }
            render_to_file(**pm)
            if log_header:
                    log_header = False