In [1]:
import pandas as pd
import numpy as np
import os 
from typing import Dict, Any


def convert_1m_to_5m_df(file_path: str) -> pd.DataFrame:
    # Column Names (·Äû·ÄÑ·Ä∑·Ä∫·Äõ·Ä≤·Ä∑ Data ·Ä°·ÄÖ·ÄÆ·Ä°·ÄÖ·Äâ·Ä∫·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏)
    COLUMN_NAMES = ['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume']
    DATETIME_FORMAT = '%Y.%m.%d %H:%M'

    """ CSV File ·Äô·Äæ 1-Minute Data ·ÄÄ·Ä≠·ÄØ Load ·Äï·Äº·ÄÆ·Ä∏ 5-Minute Candle ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤·Äû·Ää·Ä∫·Åã """
    if not os.path.exists(file_path):
        print(f"üö® Error: File not found at path: {file_path}")
        return pd.DataFrame()

    try:
        df = pd.read_csv(file_path, sep=',', header=None, names=COLUMN_NAMES,
                         dtype={'Open': np.float64, 'High': np.float64, 'Low': np.float64, 'Close': np.float64})
        
        # Volume column ·ÄÄ·Ä≠·ÄØ ·Äö·Ä¨·Äö·ÄÆ float ·Ä°·Äî·Ä±·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Äº·ÄÆ·Ä∏ NA ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ 0 ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Ä°·ÄÖ·Ä¨·Ä∏·Äë·Ä≠·ÄØ·Ä∏·Äû·Ää·Ä∫·Åã
        df['Volume'] = pd.to_numeric(df['Volume'], errors='coerce').fillna(0)
        df['Volume'] = df['Volume'].astype(np.int64) 
        
    except Exception as e:
        print(f"üö® Error loading CSV file: {e}")
        return pd.DataFrame()

    # Datetime Index ·ÄÄ·Ä≠·ÄØ ·Äê·Ää·Ä∫·ÄÜ·Ä±·Ä¨·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    df['Datetime'] = df['Date'].astype(str) + ' ' + df['Time'].astype(str)
    df['Datetime'] = pd.to_datetime(df['Datetime'], format=DATETIME_FORMAT, errors='coerce')
    df.set_index('Datetime', inplace=True)
    df.drop(columns=['Date', 'Time'], inplace=True)
    #df.dropna(subset=[df.index.name], inplace=True) # Invalid Datetime ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äñ·Äö·Ä∫·Äõ·Äæ·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏

    ohlcv_aggregation_rules: Dict[str, Any] = {
        'Open': 'first', 'High': 'max', 'Low': 'min', 'Close': 'last', 'Volume': 'sum'
    }
    df = df.resample('5Min').agg(ohlcv_aggregation_rules)
    df.dropna(inplace=True)
    df = df[df['Volume'] > 0]
    
    print(f"‚úÖ Conversion successful! 5-Min rows: {len(df)}")
    return df.reset_index().rename(columns={'index': 'Datetime'})

In [2]:
file_path_from_data = "./content/drive/MyDrive/data/raw/EURUSD_2020_all.csv"
file_path_to_data = "./content/drive/MyDrive/data/raw/EURUSD_2020_all_5.csv"

raw = convert_1m_to_5m_df(file_path_from_data)
raw.to_csv(file_path_to_data, index=False)

‚úÖ Conversion successful! 5-Min rows: 149715


In [3]:
import json

class EnvConfig():
    """environment configuration from json file
       tgym requires you configure your own parameters in json file.
        Args:
            config_file path/file.json

    """
    def __init__(self,config_file):
        self.config = {}
        with open(config_file) as j:
            self.config = json.load(j)

    def env_parameters(self,item=''):
        """environment variables
        """
        if item:
            return self.config["env"][item]
        else:
            return self.config["env"]

    def symbol(self, asset="GBPUSD", item='') :
        """get trading pair (symbol) information

        Args:
            asset (str, optional): symbol in config. Defaults to "GBPUSD".
            item (str, optional): name of item, if '' return dict, else return item value. Defaults to ''.

        Returns:
            [type]: [description]
        """
        if item:
            return self.config["symbol"][asset][item]
        else:
            return self.config["symbol"][asset]

    def data_processing_parameters(self, item=''):
        """Get data processing config"""
        if item:
            return self.config["data_processing"][item]
        return self.config["data_processing"]

    def trading_hour(self,place="NewYork"):
        """forex trading hour from different markets

        Args:
            place (str, optional): [Sydney,Tokyo,London] Defaults to "New York".

        Returns:
            [dict]: from time, to time
        """
        if place:
            return self.config["trading_hour"][place]
        else:
            return self.config["trading_hour"]

    def indicator(self,place="sma_fast_period"):
        """forex trading hour from different markets

        Args:
            place (str, optional): [Sydney,Tokyo,London] Defaults to "New York".

        Returns:
            [dict]: from time, to time
        """
        if place:
            return self.config["data_processing"]["indicator"][place]
        else:
            return self.config["data_processing"]["indicator"]



In [4]:
import pandas as pd
import logging

import logging
# Configure logging
logger = logging.getLogger(__name__)

def patch_missing_data(df, dt_col_name='time', cf=None):
    min_bars = cf.data_processing_parameters("min_bars_per_week")

    # ["time","open", "high", "low", "close"]
    required_cols = cf.data_processing_parameters("required_cols")

    # df ·Äô·Äæ·Ä¨ 6 columns ·Äõ·Äæ·Ä≠·Äõ·ÄÑ·Ä∫ vol ·Äï·Ä´·Äë·Ää·Ä∑·Ä∫·Äô·Äö·Ä∫
    if df.shape[1] == 6:
        df.columns = required_cols + ['Volume']
    elif df.shape[1] == 5:
        df.columns = required_cols
    else:
        raise ValueError(f"Invalid number of columns: {df.shape[1]} =>{required_cols}")

    logger.warning(f"shape of  column: {df.shape[1]}")
    # 1. Column validation
    if missing := set(required_cols) - set(df.columns):
        raise ValueError(f"Missing columns: {missing}")

    # 2. Auto-detect datetime column
    dt_candidates = {'time', 'timestamp', 'date', 'datetime', 'Datetime'}
    if dt_col_name not in df.columns:
        found = list(dt_candidates & set(df.columns))
        if not found:
            raise KeyError(f"No datetime column found. Tried: {dt_candidates}")
        dt_col_name = found[0]
        logger.info(f"Using datetime column: {dt_col_name}")

    # 3. Convert to datetime index
    df[dt_col_name] = pd.to_datetime(df[dt_col_name], utc=True)
    df = df.set_index(dt_col_name).sort_index()
    groups = df.groupby(pd.Grouper(freq='W-SUN'))

    patched_weeks = []  # patched weekly df storage

    for w, week_df in groups:
        if week_df.empty:
            continue

        if len(week_df) != min_bars:
            logger.warning(f"Week {w} has {len(week_df)}/{min_bars} bars")

        # Create 5-minute frequency index
        new_index = pd.date_range(
            start=week_df.index.min(),
            end=week_df.index.max(),
            freq='5min',
            tz='UTC'
        )

        # Reindex + forward fill
        week_df = week_df.reindex(new_index)
        week_df.index = week_df.index.tz_localize(None)
        fill_limit = 12 # ·Ä•·Äï·Äô·Ä¨: 1 ·Äî·Ä¨·Äõ·ÄÆ (12 bars) ·Äë·ÄÄ·Ä∫·Äï·Ä≠·ÄØ·Äê·Ä≤·Ä∑ ·ÄÄ·ÄΩ·ÄÄ·Ä∫·Äú·Äï·Ä∫·ÄÄ·Ä≠·ÄØ ·Äô·Äñ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´
        fill_cols = ['open', 'high', 'low', 'close', 'vol'] if 'vol' in df.columns else ['open', 'high', 'low', 'close']
        # FFill: ·Äõ·Äæ·Ä±·Ä∑·ÄÄ data ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Äñ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´
        week_df[fill_cols] = week_df[fill_cols].ffill(limit=fill_limit)
        patched_weeks.append(week_df)

    # Merge back all weeks
    if patched_weeks:
        all_df = pd.concat(patched_weeks)
    else:
        all_df = df.copy()

    return all_df.reset_index().rename(columns={'index': 'Datetime'})


In [5]:
cf = EnvConfig('./content/drive/MyDrive/configure.json')
raw = pd.read_csv(file_path_to_data)
df = patch_missing_data(raw,cf=cf)

shape of  column: 6
Week 2020-01-05 00:00:00+00:00 has 576/1440 bars
Week 2020-01-12 00:00:00+00:00 has 1439/1440 bars
Week 2020-03-29 00:00:00+00:00 has 1433/1440 bars
Week 2020-09-20 00:00:00+00:00 has 1439/1440 bars
Week 2020-10-04 00:00:00+00:00 has 1436/1440 bars
Week 2020-12-27 00:00:00+00:00 has 1152/1440 bars
Week 2021-01-03 00:00:00+00:00 has 1142/1440 bars
Week 2021-05-23 00:00:00+00:00 has 1439/1440 bars
Week 2021-05-30 00:00:00+00:00 has 1437/1440 bars
Week 2021-06-06 00:00:00+00:00 has 1439/1440 bars
Week 2021-09-19 00:00:00+00:00 has 1439/1440 bars
Week 2021-10-10 00:00:00+00:00 has 1439/1440 bars
Week 2021-12-12 00:00:00+00:00 has 1439/1440 bars
Week 2021-12-26 00:00:00+00:00 has 1438/1440 bars
Week 2022-01-02 00:00:00+00:00 has 1428/1440 bars


In [6]:
df.tail(3)

Unnamed: 0,Datetime,open,high,low,close,Volume
149734,2021-12-31 23:45:00,1.1378,1.13801,1.13764,1.13795,172.0
149735,2021-12-31 23:50:00,1.13796,1.13823,1.13778,1.1378,216.0
149736,2021-12-31 23:55:00,1.1378,1.13781,1.1365,1.1366,210.0


In [7]:
def add_time_feature(df_5m: pd.DataFrame, cf=None, source_tz='UTC') -> pd.DataFrame:
    """
    5M Data Frame (DatetimeIndex ·Äï·Ä´·Äù·ÄÑ·Ä∫·Äû·Ää·Ä∫·Äü·ÄØ ·Äö·Ä∞·ÄÜ·Äï·Ä´) ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Temporal features ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    """
    # üß≠ Ensure datetime index
    if not isinstance(df_5m.index, pd.DatetimeIndex):
        if 'Datetime' in df_5m.columns:
            df_5m['Datetime'] = pd.to_datetime(df_5m['Datetime'])
            df_5m = df_5m.set_index('Datetime')
        else:
            raise ValueError("DataFrame must have datetime index or 'time' column")

    # DataFrame ·Åè Index ·ÄÄ·Ä≠·ÄØ DatetimeIndex ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if not isinstance(df_5m.index, pd.DatetimeIndex):
         raise TypeError("DataFrame ·Åè Index ·Äû·Ää·Ä∫ DatetimeIndex ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã")

    df_5m.index = df_5m.index.tz_localize(None)
    # Index ·ÄÄ·Ä≠·ÄØ Timezone aware (UTC) ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Äû·Ä±·ÄÅ·Äª·Ä¨·Ä°·Ä±·Ä¨·ÄÑ·Ä∫·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    if df_5m.index.tz is None:
        # Timezone-Naive data ·ÄÄ·Ä≠·ÄØ ·Äô·Ä∞·Äõ·ÄÑ·Ä∫·Ä∏ Source Timezone ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ localize
        # Dukascopy data ·ÄÜ·Ä≠·ÄØ·Äõ·ÄÑ·Ä∫ 'UTC' ·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄÆ·Ä∏·Åä Broker data ·ÄÜ·Ä≠·ÄØ·Äõ·ÄÑ·Ä∫ 'GMT+3' ·Äú·Ä≠·ÄØ·Äô·Äª·Ä≠·ÄØ·Ä∏ ·Äû·ÄØ·Ä∂·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫
        df = df_5m.tz_localize(source_tz, ambiguous='NaT', nonexistent='NaT')
        df = df.tz_convert('UTC')
    else:
        df = df_5m.copy()
    
    # ----------------------------------------------------
    # I. ·Ä°·ÄÅ·Äº·Ä±·ÄÅ·Ä∂ features ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Cyclical Encoding ·Äô·Äª·Ä¨·Ä∏ (Hour ·ÄÄ·Ä≠·ÄØ Index ·Äô·Äæ ·Äê·Ä≠·ÄØ·ÄÄ·Ä∫·Äõ·Ä≠·ÄØ·ÄÄ·Ä∫·Äö·Ä∞·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
    # ----------------------------------------------------

    # df['weekday'] = df.index.dayofweek 
    # df['day'] = df.index.day
    # df['week'] = df.index.isocalendar().week.astype(int)
    # df['month'] = df.index.month
    # df['year'] = df.index.year
    df['hour'] = df.index.hour
    
    # ·Äî·Ä¨·Äõ·ÄÆ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Cyclical features
    df['hour_sin'] = np.sin(2 * np.pi * df['hour']/24).round(6)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour']/24).round(6)

    # ----------------------------------------------------
    # III. DST-Aware Market Sessions (Timezone Handling)
    # ----------------------------------------------------
    
    # ·Äî·Ä¨·Äõ·ÄÆ·ÄÄ·Ä≠·ÄØ local time zone ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äú·Ä≤ (Timezone Aware Index ·Äô·Äæ·Äû·Ä¨ tz_convert ·Äú·ÄØ·Äï·Ä∫·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫)
    london_time = df.index.tz_convert('Europe/London')
    ny_time = df.index.tz_convert('America/New_York')

    # Session Hours (cf ·Äô·Äæ Local Time ·Äî·Ä¨·Äõ·ÄÆ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä≠·ÄØ·Ä∑·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã)
    ny = cf.trading_hour('NewYork')
    ldn = cf.trading_hour('London')

    # London Session (Local Time: 08:00 - 16:00)
    df['london_session'] = ((london_time.hour >= ldn['from']) & (london_time.hour < ldn['to'])).astype(int)
    
    # NY Session (Local Time: 13:00 - 21:00 UTC/GMT) -> (9:00 - 17:00 EST/EDT)
    # cf ·Äô·Äæ Local NY Time (·Ä•·Äï·Äô·Ä¨: 9, 17) ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä≠·ÄØ·Ä∑·Äõ·Äô·Ää·Ä∫
    df['ny_session'] = ((ny_time.hour >= ny['from']) & (ny_time.hour < ny['to'])).astype(int)

    df['overlap_session'] = (df['london_session'] & df['ny_session']).astype(int)

    # ... (IV. Holiday features ·ÄÄ·Ä≠·ÄØ ·ÄÜ·ÄÄ·Ä∫·Äú·ÄÄ·Ä∫·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ää·Ä∫) ...
    
    #df['symbol'] = symbol
    
    # ·Äö·Ä¨·Äö·ÄÆ columns ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äñ·Äö·Ä∫·Äõ·Äæ·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    df = df.drop(columns=['hour'], errors='ignore') # minute_block_15 ·Äû·Ää·Ä∫ 1M data ·Äô·Äæ ·Äú·Ä¨·Äú·Äª·Äæ·ÄÑ·Ä∫·Äû·Ä¨ ·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äû·Ää·Ä∫·Åã 5M ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äô·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äï·Ä´·Åã
    
    # Index ·ÄÄ·Ä≠·ÄØ reset ·Äô·Äú·ÄØ·Äï·Ä∫·Äò·Ä≤ ·Äï·Äº·Äî·Ä∫·Äï·Ä≠·ÄØ·Ä∑·Äï·Ä´ (Env ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Datetime Index ·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äû·Ää·Ä∫)
    return df.reset_index().rename(columns={'index': 'Datetime'})


In [8]:
# Broker Data (00:00 ·Äô·Äæ ·ÄÖ·Äû·Ä±·Ä¨) ·ÄÄ·Ä≠·ÄØ ·ÄÅ·Ä±·Ä´·Ä∫·ÄÜ·Ä≠·ÄØ·Äû·Ää·Ä∑·Ä∫·Ä°·ÄÅ·Ä´
# GMT+2/GMT+3 ·ÄÄ·Ä≠·ÄØ ·Ä°·Äú·Ä≠·ÄØ·Ä°·Äú·Äª·Ä±·Ä¨·ÄÄ·Ä∫ ·ÄÄ·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·ÄΩ·Äö·Ä∫·Äï·Ä±·Ä∏·Äõ·Äî·Ä∫
axiory_tz = 'Europe/Kiev'  

dft = add_time_feature(df, cf=cf, source_tz=axiory_tz)

In [9]:
dft.head(2)

Unnamed: 0,Datetime,open,high,low,close,Volume,hour_sin,hour_cos,london_session,ny_session,overlap_session
0,2020-01-01 22:00:00+00:00,1.12117,1.12128,1.12087,1.12114,31.0,-0.5,0.866025,0,0,0
1,2020-01-01 22:05:00+00:00,1.12117,1.12124,1.12103,1.12103,44.0,-0.5,0.866025,0,0,0


In [10]:
dft.to_csv('dft.csv', index=False)


In [11]:
from finta import TA

def tech_indicators(df, cf=None):
    """
    Forex RL ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Price Action·Åä Momentum ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Long-Term Trend Features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Åã
    """
    price   =   df['close']
    sma_fast_period = cf.indicator('sma_fast_period')
    sma_mid_period = cf.indicator('sma_mid_period')
    sma_slow_period = cf.indicator('sma_slow_period')
    atr_period = cf.indicator('atr_period')
    rsi_period = cf.indicator('rsi_period')
    
    
    df['return_5'] = df['close'].pct_change(5)  # 5-step return

    
    df['fast_ma'] = TA.SMA(df, period=sma_fast_period)
    df['mid_ma'] = TA.SMA(df, period=sma_mid_period)
    df['slow_ma'] = TA.SMA(df, period=sma_slow_period)
    df['rsi'] = TA.RSI(df, period=rsi_period).ffill().round(6)
    
    # trend strength
    df['fast_ts']  =   (price - df['fast_ma']) / df['fast_ma']
    df['mid_ts']  =   (price - df['mid_ma']) / df['mid_ma']
    df['slow_ts']  =   (price - df['slow_ma']) / df['slow_ma']

    df['fast_td'] = np.sign(df['fast_ts'])
    df['mid_td'] = np.sign(df['mid_ts'])
    df['slow_td'] = np.sign(df['slow_ts'])

    df['fast_mid_gap'] = (df['fast_ma'] - df['mid_ma']) / df['mid_ma']
    df['mid_slow_gap'] = (df['mid_ma'] - df['slow_ma']) / df['slow_ma']
    df['fast_slow_gap'] = (df['fast_ma'] - df['slow_ma']) / df['slow_ma']

    # --- ·ÅÅ·Åã Volatility Measure (ATR ·ÄÄ·Ä≠·ÄØ Base ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Ä°·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄØ·Äõ·Äî·Ä∫) ---
    df['atr_base'] = TA.ATR(df, period=atr_period).ffill()
    df['atr_norm'] = df['atr_base'] / price

    window=100
    df['low_thr'] = df['atr_norm'].rolling(window).quantile(0.33)
    df['high_thr'] = df['atr_norm'].rolling(window).quantile(0.66)

    # Volatility categories (one-hot)
    df['vol_low'] = (df['atr_norm'] < df['low_thr']).astype(int)
    df['vol_med'] = ((df['atr_norm'] >= df['low_thr']) &
                    (df['atr_norm'] < df['high_thr'])).astype(int)
    df['vol_high'] = (df['atr_norm'] >= df['high_thr']).astype(int)

    # Momentum
    df['momentum_score'] = (df['rsi'] - 50) / 50
    # RSI categories one-hot
    df['mom_bearish'] = (df['rsi'] < 45).astype(int)
    df['mom_neutral'] = ((df['rsi'] >= 45) & (df['rsi'] <= 55)).astype(int)
    df['mom_bullish'] = (df['rsi'] > 55).astype(int)


    df.dropna(inplace=True)
    
    return df

In [12]:
dft = pd.read_csv('dft.csv')
dfi = tech_indicators(dft, cf=cf)
dfi.to_csv('dfi.csv', index=False)

In [13]:
dft.head(3)

Unnamed: 0,Datetime,open,high,low,close,Volume,hour_sin,hour_cos,london_session,ny_session,...,atr_norm,low_thr,high_thr,vol_low,vol_med,vol_high,momentum_score,mom_bearish,mom_neutral,mom_bullish
199,2020-01-02 14:35:00+00:00,1.11767,1.11802,1.11767,1.1178,518.0,-0.5,-0.866025,1,1,...,0.00037,0.000224,0.000272,0,0,1,-0.434099,1,0,0
200,2020-01-02 14:40:00+00:00,1.1178,1.11781,1.11712,1.11725,496.0,-0.5,-0.866025,1,1,...,0.000393,0.000229,0.000273,0,0,1,-0.536659,1,0,0
201,2020-01-02 14:45:00+00:00,1.11727,1.11746,1.11712,1.11727,506.0,-0.5,-0.866025,1,1,...,0.000393,0.000232,0.000276,0,0,1,-0.52583,1,0,0


In [14]:
def add_news_features(df: pd.DataFrame, news_df: pd.DataFrame, window_pre=30, window_post=30):
    # üß≠ Ensure datetime index
    if not isinstance(df.index, pd.DatetimeIndex):
        if 'Datetime' in df.columns:
            df['Datetime'] = pd.to_datetime(df['Datetime'])
            df = df.set_index('Datetime')
        else:
            raise ValueError("DataFrame must have datetime index or 'time' column")

    
    df['pre_news'] = 0.0
    df['post_news'] = 0.0
    news_df['Start'] = pd.to_datetime(news_df['Start'], utc=True)

    for _, row in news_df.iterrows():
        news_time  = row['Start']
        pre_mask = (df.index >= news_time - pd.Timedelta(minutes=window_pre)) & (df.index < news_time)

        if pre_mask.any():
            minutes_to_news = (news_time - df.index[pre_mask]).total_seconds() / 60
            df.loc[pre_mask, 'pre_news'] = 1 - (minutes_to_news / window_pre)

        # --- Post-news: 1 ‚Üí 0 decay ---
        post_mask = (df.index > news_time) & (df.index <= news_time + pd.Timedelta(minutes=window_post))
        if post_mask.any():
            minutes_after_news = (df.index[post_mask] - news_time).total_seconds() / 60
            df.loc[post_mask, 'post_news'] = 1 - (minutes_after_news / window_post)


        # --- News candle itself ---
        exact_mask = (df.index == news_time)
        if exact_mask.any():
            df.loc[exact_mask, ['pre_news', 'post_news']] = 1.0
            
    df['pre_news'] = df['pre_news'].clip(0, 1)
    df['post_news'] = df['post_news'].clip(0, 1)
    # Index ·ÄÄ·Ä≠·ÄØ reset ·Äô·Äú·ÄØ·Äï·Ä∫·Äò·Ä≤ ·Äï·Äº·Äî·Ä∫·Äï·Ä≠·ÄØ·Ä∑·Äï·Ä´ (Env ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Datetime Index ·Äú·Ä≠·ÄØ·Ä°·Äï·Ä∫·Äû·Ää·Ä∫)
    return df.reset_index().rename(columns={'index': 'Datetime'})



In [15]:
news = pd.read_csv("calendar-event-list.csv")
dfi = pd.read_csv("dfi.csv")

dfn = add_news_features(dfi, news, window_pre=30, window_post=30)
dfn.to_csv('dfn.csv', index=False)

In [147]:
import torch
import torch.nn as nn
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import gymnasium as gym


In [148]:
class TimeSeriesTransformer(nn.Module):
    """
    A Transformer-based model for time series data.
    This class projects input features to an embedding, adds positional
    encodings, and then processes the inputs using a Transformer encoder.
    Finally, a decoder layer is used to produce the output.
    Args:
        input_size (int): Number of features in the input time series data.
        embed_dim (int): Dimensionality of the learned embedding space.
        num_heads (int): Number of attention heads in each Transformer layer.
        num_layers (int): Number of Transformer encoder layers.
        sequence_length (int): Length of the input sequences (time steps).
        dropout (float, optional): Dropout probability to apply in the
            Transformer encoder layers. Defaults to 0.1.
    Attributes:
        model_type (str): Identifier for the model type ('Transformer').
        embedding (nn.Linear): Linear layer for input feature embedding.
        positional_encoding (torch.nn.Parameter): Parameter storing the
            positional encodings used to retain temporal information.
        transformer_encoder (nn.TransformerEncoder): Stack of Transformer
            encoder layers with optional final LayerNorm.
        decoder (nn.Linear): Linear layer used to produce the final output
            dimensions.
    Forward Inputs:
        src (torch.Tensor): Input tensor of shape (batch_size, sequence_length,
            input_size).
    Forward Returns:
        torch.Tensor: Output tensor of shape (batch_size, embed_dim) from the
            last time step.
    Raises:
        ValueError: If the model output contains NaN or Inf values, indicating
            numerical instability.
    """
    # input_size: Input features ·Ä°·Äõ·Ä±·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ (·Ä•·Äï·Äô·Ä¨ 10·Åä price + SMA/RSI indicators ·ÄÖ·Äê·Ä¨)·Åã
    # embed_dim: Internal embedding ·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏·Ä°·Äê·Ä¨ (·Ä•·Äï·Äô·Ä¨ 64·Åä data ·ÄÄ·Ä≠·ÄØ ·Äï·Ä≠·ÄØ·Äî·ÄÄ·Ä∫·Äõ·Äæ·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏ ·Ä°·Ä±·Ä¨·ÄÑ·Ä∫ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏)·Åã
    # num_heads: Attention heads ·Ä°·Äõ·Ä±·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ (multi-head attention ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫·Åä ·Äô·Äê·Ä∞·Ää·ÄÆ ·Ä°·Äî·Ä±·Äî·Ä≤·Ä∑ ·Ä°·Ä¨·Äõ·ÄØ·Ä∂ ·ÄÖ·Ä≠·ÄØ·ÄÄ·Ä∫)·Åã
    # num_layers: Encoder layers ·Ä°·Äõ·Ä±·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ (·Ä•·Äï·Äô·Ä¨ 2·Åä ·Äõ·Ä≠·ÄØ·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Ä∏ ·Äë·Ä¨·Ä∏·Äê·Ä¨)·Åã
    # sequence_length: Input sequence ·Ä°·Äõ·Äæ·Ää·Ä∫ (·Ä•·Äï·Äô·Ä¨ 20 timesteps)·Åã
    # dropout=0.1: Overfitting ·ÄÄ·Äî·Ä± ·ÄÄ·Ä¨·ÄÄ·ÄΩ·Äö·Ä∫ ·Äê·Ä≤·Ä∑ dropout rate·Åã
    def __init__(self, input_size, embed_dim, num_heads, num_layers, sequence_length, dropout=0.1):

        super(TimeSeriesTransformer, self).__init__()
        self.embed_dim = embed_dim
        self.sequence_length = sequence_length

        # Input embedding
        self.embedding = nn.Linear(input_size, embed_dim)

        # Positional encoding
        self.positional_encoding = nn.Parameter(torch.zeros(1, sequence_length, embed_dim))

        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embed_dim,
            nhead=num_heads,
            dropout=dropout,
            norm_first=True
        )

        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layer,
            num_layers=num_layers,
            norm=nn.LayerNorm(embed_dim)
        )


        # Decoder (optional feature projection)
        self.decoder = nn.Linear(embed_dim, embed_dim)

    def forward(self, src, aggregate_last_n: int = 1):
        """
        src: (batch_size, sequence_length, input_size)
        aggregate_last_n: int, number of last timesteps to aggregate
        """
        device = src.device  # Use input device dynamically
        
        # Embedding + positional encoding
        x = self.embedding(src) + self.positional_encoding.to(device)
        # Transformer encoder
        x = self.transformer_encoder(x)
        # Decoder projection
        x = self.decoder(x)

        # Check for NaN/Inf
        if torch.isnan(x).any() or torch.isinf(x).any():
            raise ValueError("Transformer output contains NaN or Inf values")

        # Aggregate last N timesteps (default = last timestep)
        if aggregate_last_n == 1:
            return x[:, -1, :]
        else:
            return x[:, -aggregate_last_n:, :].mean(dim=1)



In [149]:
class CustomCombinedExtractor(BaseFeaturesExtractor):
    """
    Custom feature extractor for time series data using a Transformer.
    - LayerNorm input normalization for stability
    - Device-agnostic (works CPU/GPU)
    - Supports optional aggregation of last N timesteps
    """

    def __init__(self, observation_space: gym.spaces.Box, sequence_length: int, embed_dim: int = 64, num_heads: int = 2, num_layers: int = 2):
        super(CustomCombinedExtractor, self).__init__(observation_space, features_dim=embed_dim)
        num_features = observation_space.shape[1]  # input feature count

        # Layer normalization before Transformer
        self.layernorm_before = nn.LayerNorm(num_features)

        # TimeSeriesTransformer
        self.transformer = TimeSeriesTransformer(
            input_size=num_features,
            embed_dim=embed_dim,
            num_heads=num_heads,
            num_layers=num_layers,
            sequence_length=sequence_length
        )

    def forward(self, observations, aggregate_last_n: int = 1):
        """
        observations: (batch_size, sequence_length, num_features)
        aggregate_last_n: number of last timesteps to aggregate (default 1)
        """
        device = observations.device
        # Normalize input
        x = self.layernorm_before(observations.float().to(device))

        # Pass through Transformer
        x = self.transformer(x, aggregate_last_n=aggregate_last_n)

        # Check for NaN / Inf
        if torch.isnan(x).any() or torch.isinf(x).any():
            raise ValueError("Transformer output contains NaN or Inf values")

        return x


In [150]:
from stable_baselines3.common.callbacks import BaseCallback
import numpy as np

class TrainingMetricsCallback(BaseCallback):
    def __init__(self, check_freq=1000, verbose=1):
        super().__init__(verbose)
        self.check_freq = check_freq
        self.sharpe_ratios = []
        self.drawdowns = []
        self.episode_count = 0

    def _on_step(self) -> bool:
        # Track metrics only when episodes complete
        if "sharpe" in self.locals['infos'][0] and "drawdown" in self.locals['infos'][0]:
            self.episode_count += 1
            self.sharpe_ratios.append(self.locals['infos'][0]['sharpe'])
            self.drawdowns.append(self.locals['infos'][0]['drawdown'])

            # Log to tensorboard every N episodes
            if self.episode_count % 10 == 0:
                self.logger.record('train/mean_sharpe', np.mean(self.sharpe_ratios[-10:]))
                self.logger.record('train/max_drawdown', np.mean(self.drawdowns[-10:]))
                self.logger.record('train/episodes', self.episode_count)

        return True


In [151]:
def render_to_file(**kwargs):
    log_header                  =   kwargs.get("log_header",False)
    log_filename                =   kwargs.get("log_filename","")
    printout                    =   kwargs.get("printout",False)
    balance                     =   kwargs.get("balance")
    balance_initial             =   kwargs.get("balance_initial")
    transaction_close_this_step =   kwargs.get("transaction_close_this_step",[])
    done_information            =   kwargs.get("done_information","")
    profit                      =   balance - balance_initial

    tr_lines                    =   ""
    tr_lines_comma              =   ""
    _header                     =   ""
    _header_comma               =   ""
    if log_header:
        _header = f'{"Ticket":>8} {"Type":>4} {"ActionStep":16} \
                    {"ActionPrice":>12} {"CloseStep":8} {"ClosePrice":>12} \
                    {"OpenBal":>12} {"CloseBal":>12} {"Status":8} {"Info":>8} {"PIPS":>6} {"SL":>6} {"PT":>6} {"DeltaStep":8}\n'


        _header_comma = f'{"Ticket,Type,ActionTime,ActionStep,ActionPrice,CloseTime,ClosePrice, OpenBal, CloseBal, Status, Info, PIPS,SL,PT,CloseStep,DeltaStep"}\n'
    if transaction_close_this_step:
        for _tr in transaction_close_this_step:
            if _tr["CloseStep"] >=0:
                tr_lines += f'{_tr["Ticket"]:>8} {_tr["Type"]:>4} {_tr["ActionStep"]:16} \
                    {_tr["ActionPrice"]:.5f} {_tr["CloseStep"]:8} {_tr["ClosePrice"]:.5f} \
                    {_tr["OpenBal"]:.2f} {_tr["CloseBal"]:.2f} {_tr["Status"]:8}  {_tr["Info"]:>8}  {_tr["PIPS"]:4.0f} {_tr["SL"]:4.0f} {_tr["PT"]:4.0f} {_tr["DeltaStep"]:8}\n'

                tr_lines_comma += f'{_tr["Ticket"]},{_tr["Type"]},{_tr["ActionTime"]},{_tr["ActionStep"]}, \
                    {_tr["ActionPrice"]},{_tr["CloseTime"]},{_tr["ClosePrice"]}, \
                    {_tr["OpenBal"]},{_tr["CloseBal"]}, {_tr["Status"]},{_tr["Info"]},{_tr["PIPS"]},{_tr["SL"]},{_tr["PT"]},{_tr["CloseStep"]},{_tr["DeltaStep"]}\n'

    log = _header_comma + tr_lines_comma
    # log = f"Step: {current_step}   Balance: {balance}, Profit: {profit} \
    #     MDD: {max_draw_down_pct}\n{tr_lines_comma}\n"
    if done_information:
        log += done_information
    if log:
        # os.makedirs(log_filename, exist_ok=True)
        dir_path = os.path.dirname(log_filename)
        if dir_path and not os.path.exists(dir_path):
            os.makedirs(dir_path, exist_ok=True)
        with open(log_filename, 'a+') as _f:
            _f.write(log)
            _f.close()

    tr_lines = _header + tr_lines
    if printout and tr_lines:
        print(tr_lines)
        if done_information:
            print(done_information)


In [152]:
class ForexTradingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, file, cf, asset, logger_show=False, scaler=None):
        # scaler parameter ·ÄÄ·Ä≠·ÄØ ·Äë·Äï·Ä∫·Äë·Ää·Ä∑·Ä∫·Äõ·Äï·Ä´·Äô·Ää·Ä∫·Åã ·Åé·ÄÑ·Ä∫·Ä∏·Äû·Ää·Ä∫ Global Train Set ·Äê·ÄΩ·ÄÑ·Ä∫ Fit ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ Scaler ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        # 'scaler' ·Äû·Ää·Ä∫ Global Train Set ·Äê·ÄΩ·ÄÑ·Ä∫ Fit ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ StandardScaler instance ·Äñ·Äº·ÄÖ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        self.scaler = scaler
        if self.scaler is None:
             raise ValueError("A fitted StandardScaler instance must be provided to the Environment.")
        super(ForexTradingEnv, self).__init__()
        # ·ÄÄ·Ä≠·Äî·Ä∫·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·ÄÖ·Äê·ÄÑ·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self._initialize_parameters(file, cf, asset, logger_show)
        # [NEW ACTION] Raw Data ·ÄÄ·Ä≠·ÄØ Scaler ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Transform ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        # OHLCV features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äï·Ä´·Äô·Äö·Ä∫ (·Ä•·Äï·Äô·Ä¨: open, high, low, close, vol, atr_base, log_returns, price_norm, etc.)
        # Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äî·Ä∫ features ·Äô·Äª·Ä¨·Ä∏·ÄÖ·Ä¨·Äõ·ÄÑ·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Ä´·Åã
        # ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ ·ÄÄ·Äª·ÄΩ·Äî·Ä∫·Äê·Ä±·Ä¨·Ä∫·Äê·Ä≠·ÄØ·Ä∑·Äû·Ää·Ä∫ OHLCV ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Indicator Features ·Ä°·Ä¨·Ä∏·Äú·ÄØ·Ä∂·Ä∏·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äô·Ää·Ä∫·Äü·ÄØ ·Äö·Ä∞·ÄÜ·Äï·Ä´·Äô·Ää·Ä∫·Åã
        self._scale_data()

        # Action ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Observation Spaces ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self._initialize_spaces()
        # Scaled Data ·ÄÄ·Ä≠·ÄØ Numpy Array ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Äº·ÄÆ·Ä∏ ·ÄÖ·ÄÖ·Ä∫·Äï·Ä´·Åã
        data_check = self.data[self.features_scaled].values

        if np.isnan(data_check).any() or np.isinf(data_check).any():
            # ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ Debugging ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ print ·Äë·ÄØ·Äê·Ä∫·Äï·Ä´
            print("FATAL ERROR: NaN/Inf detected in scaled data!")
            # NaN/Inf ·Äõ·Äæ·Ä≠·Äû·Ä±·Ä¨ Column ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·Äû·Ä≠·ÄÅ·Äª·ÄÑ·Ä∫·Äõ·ÄÑ·Ä∫:
            # print(self.data[self.features_scaled].isnull().any())
            raise ValueError("Invalid values detected in environment data (NaN/Inf)!")


        # Environment ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÖ·Äï·Äº·ÄØ·Ä°·ÄÅ·Äº·Ä±·Ä°·Äî·Ä±·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äú·Ää·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
        self.reset()


    def _scale_data(self):
        """Raw Data (self.data) ·Äô·Äæ features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Global Scaler ·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Transform ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏"""
        # [self.features] ·Äê·ÄΩ·ÄÑ·Ä∫ OHLCV ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Indicator Features ·Ä°·Ä¨·Ä∏·Äú·ÄØ·Ä∂·Ä∏ ·Äï·Ä´·Äù·ÄÑ·Ä∫·Äõ·Äô·Ää·Ä∫·Åã
        # [NOTE]: 'time' ·ÄÄ·Ä≤·Ä∑·Äû·Ä≠·ÄØ·Ä∑·Äû·Ä±·Ä¨ Non-Numeric features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ self.features ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äô·Äï·Ä´·Äù·ÄÑ·Ä∫·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏ ·Äû·Ä±·ÄÅ·Äª·Ä¨·Äï·Ä´·ÄÖ·Ä±·Åã
        if not self.scaler.scale_.any():
             logger.warning("Scaler is not properly fitted. Continuing with raw data.")
             return

        # Scaled Features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Original DataFrame ·Äê·ÄΩ·ÄÑ·Ä∫ ·Ä°·ÄÖ·Ä¨·Ä∏·Äë·Ä≠·ÄØ·Ä∏·Äû·Ää·Ä∫·Åã
        # Scaled Data ·Äê·ÄΩ·ÄÑ·Ä∫ NaN/Inf ·Äô·Äñ·Äº·ÄÖ·Ä∫·ÄÖ·Ä±·Äõ·Äî·Ä∫ Data Frame ·ÄÄ·Ä≠·ÄØ ·ÄÄ·Äº·Ä≠·ÄØ·Äê·ÄÑ·Ä∫·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·Äë·Ä¨·Ä∏·Äû·ÄÑ·Ä∑·Ä∫·Äï·Ä´·Äû·Ää·Ä∫·Åã
        self.data[self.features_scaled] = self.scaler.transform(self.data[self.features_scaled])
        # logger.info(f"Data scaled successfully using fitted StandardScaler.")

    # ·ÄÄ·Ä≠·Äî·Ä∫·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ ·ÄÖ·Äê·ÄÑ·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def _initialize_parameters(self, file, cf, asset, logger_show):
        # Params to variables
        self.csv_file               =   file
        self.cf                     =   cf
        self.symbol_col             =   asset
        self.features_scaled        =   self.cf.env_parameters('features_scaled') # Time-Series Features List
        self.features_unscaled      =   self.cf.env_parameters('features_unscaled')
        self.features_filter        =   self.cf.env_parameters('features_filter')
        # Scaled Data Frame ·Åè Feature List
        self.obs_features           =   self.features_scaled + self.features_unscaled
        self.sequence_length        =   self.cf.data_processing_parameters("sequence_length") # Transformer Lookback Window (100)
        self.logger_show            =   logger_show

        self.data_raw = pd.read_csv(file)
        # if 'time' in self.data_raw.columns:
        #     self.data_raw = self.data_raw.set_index(pd.to_datetime(self.data_raw['time'], utc=True)).drop(columns=['time'])
        # Index ·ÄÄ·Ä≠·ÄØ datetime type ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏
        self.data_raw.index = pd.to_datetime(self.data_raw.index)


        # self.data ·ÄÄ·Ä≠·ÄØ Scaling ·Äú·ÄØ·Äï·Ä∫·Äõ·Äî·Ä∫ Copy ·Äö·Ä∞·Äï·Ä´·Äô·Ää·Ä∫·Åã
        self.data = self.data_raw.copy()

        # We use sequence transformer, so max steps will be this
        self.max_steps              =   len(self.data) - self.sequence_length - 1

        # Configs to variables
        # Agent ·ÄÄ Action ·ÄÄ Continuous Action ·ÄÄ·Ä≠·ÄØ Discrete Action ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä±·Ä∏·Äû·Ä±·Ä¨ threshold
        self.action_threshold       =   self.cf.env_parameters('action_threshold')
        self.balance_initial        =   self.cf.env_parameters('balance')

        # position close ·Äô·Äñ·Äº·ÄÖ·Ä∫·Äû·Ä±·Ä∏·Äõ·ÄÑ·Ä∫
        # buy ·Äë·Ä¨·Ä∏·Äï·Äº·ÄÆ·Ä∏ price up ·Äñ·Äº·ÄÖ·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ reward ·Äï·Ä±·Ä∏·Åã sell ·Äë·Ä¨·Ä∏·Äï·Äº·ÄÆ·Ä∏ price down ·Äñ·Äº·ÄÖ·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ reward ·Äï·Ä±·Ä∏
        # position management ·Äô·Äæ·Ä¨·Äú·Ää·Ä∫·Ä∏ ·Äû·ÄØ·Ä∂·Ä∏·Åã
        # buy ·Äô·Äæ·Ä¨ ·Äô·Äº·Äê·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Äï·Ä±·Ä´·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Äï·Ä±·Ä´·Ä∫·Äõ·ÄΩ·Ä±·Ä∑·Åã  ·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Äï·Ä±·Ä´·Ä∫·Äê·ÄÑ·Ä∫,
        # sell ·Äô·Äæ·Ä¨ ·Äô·Äº·Äê·Ä∫·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑ sl ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·Äõ·ÄΩ·Ä±·Ä∑·Åã ·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä±·Äõ·ÄÑ·Ä∫ tp ·Ä°·Äï·Ä±·Ä´·Ä∫·Äê·ÄÑ·Ä∫ sl ·Ä°·Ä±·Ä¨·ÄÄ·Ä∫·ÄÅ·Äª
        self.good_position_reward_scale = self.cf.env_parameters("good_position_reward_scale") # ·Ä•·Äï·Äô·Ä¨: 0.01
        # ·Äõ·Ää·Ä∫·Äõ·ÄΩ·Äö·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫ ·ÅÇ: SL/PT Trailing ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äê·Äî·Ä∫·Äñ·Ä≠·ÄØ·Ä∏ (Move Step Size)
        self.trailing_distance = self.cf.env_parameters("trailing_stop_distance_points")

        # ·Ä°·Äõ·Äæ·ÄØ·Ä∂·Ä∏·Äî·Ä≤·Ä∑·Ä°·Äô·Äº·Äê·Ä∫ ·Äô·Äª·Äæ·Äê·Äô·Äæ·ÄØ·Äõ·Äæ·Ä≠·Äê·Ä≤·Ä∑ trading performance ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äï·Ä±·Ä∏·Äê·Ä≤·Ä∑ bonus reward 0.01
        # self.consistency_reward = self.cf.env_parameters("consistency_reward")
        self.stop_loss = self.cf.symbol(self.symbol_col, "stop_loss_max")
        self.profit_taken = self.cf.symbol(self.symbol_col, "profit_taken_max")
        self.point = self.cf.symbol(self.symbol_col, "point")
        self.transaction_fee = self.cf.symbol(self.symbol_col, "transaction_fee")
        self.over_night_penalty = self.cf.symbol(self.symbol_col, "over_night_penalty")
        self.max_current_holding = self.cf.symbol(self.symbol_col, "max_current_holding")
        # Drawdown Penalty Factor
        self.drawdown_penalty_factor = self.cf.env_parameters("drawdown_penalty_factor")
        self.margin_requirement = self.cf.env_parameters('margin_requirement')


    # Action ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Observation Spaces ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def _initialize_spaces(self):
        # Continuous actions: [1 -> 0.5] LONG | [0.5 -> -0.5] HOLD |[-0.5 -> -1] SHORT
        self.action_space = spaces.Box(
            low=-1,
            high=1,
            shape=(1,),
            dtype=np.float32
        )

        # [MODIFIED] Transformer Observation Space: Time Series (100) + Context (4)
        N_FEATURES_TS = len(self.obs_features)
        N_FEATURES_CONTEXT = 4 # [Equity, Drawdown, Open_Pos_Ratio, Time_Context]

        # [MODIFIED] Observation Space (Time Series Sequence Only)
        # Transformer ·Äû·ÄØ·Ä∂·Ä∏·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨ features ·Äê·ÄΩ·Ä±·Äõ·Ä≤·Ä∑ previous sequence length candle ·ÄÄ·Ä≠·ÄØ·Äï·Ä´ ·Äê·Äï·Äº·Ä≠·ÄØ·ÄÑ·Ä∫·Äê·Ää·Ä∫·Ä∏·ÄÄ·Äº·Ää·Ä∑·Ä∫
        obs_shape = (self.sequence_length, N_FEATURES_TS + N_FEATURES_CONTEXT)

        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf, # Scaled Data ·Äô·Äª·Ä¨·Ä∏·Äû·Ää·Ä∫ Theoretical Inf/ -Inf ·Äõ·Äæ·Ä≠·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫ np.inf ·ÄÄ·Ä≠·ÄØ ·Äû·ÄØ·Ä∂·Ä∏·Äï·Ä´
            shape=obs_shape,
            dtype=np.float32
        )

    # Environment ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÖ·Äï·Äº·ÄØ·Ä°·ÄÅ·Äº·Ä±·Ä°·Äî·Ä±·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äú·Ää·Ä∫·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äû·Ää·Ä∫·Åã
    def reset(self, *, seed = None, options = None):
        super().reset(seed=seed, options=options)

        self.ticket_id          =   0
        self.ttl_rewards        =   0 # total rewards

        self.balance            =   self.balance_initial
        self.positions          =   []

        # equity tracking
        self.equity_curve       =   [self.balance_initial] # Starting with initial balance
        # ·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äê·Ä≤·Ä∑ eq value
        self.peak_equity        =   self.balance_initial # Start with initial balance as peak

        self.max_drawdown       =   0.0
        self.current_drawdown   =   0.0

        # transformer ·Ä°·Äû·ÄØ·Ä∂·Ä∏·Äï·Äº·ÄØ·Äë·Ä¨·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∑
        self.current_step       =   self.sequence_length
        logger.info(f"--- Environment reset. Starting at step {self.current_step} --total rewards: {self.ttl_rewards}")

        observation             =   self._next_observation()
        info                    =   {}
        return  observation, info


# AI model ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ market condition ·ÄÄ·Ä≠·ÄØ·ÄÄ·Ä≠·ÄØ·Äö·Ä∫·ÄÖ·Ä¨·Ä∏·Äï·Äº·ÄØ·Äê·Ä≤·Ä∑ observation data ·ÄÄ·Ä≠·ÄØ ·Äï·Äº·ÄÑ·Ä∫·ÄÜ·ÄÑ·Ä∫·Äï·Ä±·Ä∏·Äñ·Ä≠·ÄØ·Ä∑·Äñ·Äº·ÄÖ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫·Åã
    def _next_observation(self):

        # 1. Time Series Observation (Scaled Data)
        obs_ts = self.data.iloc[
            self.current_step - self.sequence_length: self.current_step
        ][self.obs_features].values # Shape: (100, N_Features_TS)

        # 2. Account State (Non-Time-Series / Context Vector)
        current_equity = self._calculate_current_equity()
        open_positions_count = sum(1 for p in self.positions if p['Status'] == 0)

        obs_context = np.array([
            current_equity / self.balance_initial, # 1. Normalized Equity
            self.current_drawdown,                 # 2. Current Drawdown (Percentage)
            open_positions_count / self.max_current_holding, # 3. Open Positions Ratio
            self.data.iloc[self.current_step]['hour_cos']   # 4. Time Context (Scaled)
        ], dtype=np.float32) # Shape: (4,)

        # 3. Final Observation Construction (Time Series + Context)

        # Context features ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ Sequence Length (100) ·Ä°·Äú·Ä≠·ÄØ·ÄÄ·Ä∫ ·Äñ·Äº·Äî·Ä∑·Ä∫·ÄÄ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (Broadcasting)
        obs_context_expanded = np.tile(obs_context, (self.sequence_length, 1)) # Shape: (100, 4)

        # Horizontal Stack (Sequence length, N_Features_TS + N_Features_Context)
        obs_final = np.hstack([obs_ts, obs_context_expanded])

        # Data Validation ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        # NaN (Not a Number) values ·Äõ·Äæ·Ä≠·Äô·Äõ·Äæ·Ä≠·ÄÖ·ÄÖ·Ä∫·Äô·Äö·Ä∫
        # Infinite values ·Äõ·Äæ·Ä≠·Äô·Äõ·Äæ·Ä≠·ÄÖ·ÄÖ·Ä∫·Äô·Äö·Ä∫
        # Invalid data ·Äõ·Äæ·Ä≠·Äõ·ÄÑ·Ä∫ error ·Äï·Äº·Äô·Äö·Ä∫
        if np.isnan(obs_final).any() or np.isinf(obs_final).any():
            logger.error(f"Invalid observation at step {self.current_step}")
            raise ValueError(f"Invalid observation at step {self.current_step}")


        # NumPy Array ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        # GPU memory ‚Üí CPU memory ·Äï·Äº·Äî·Ä∫·Äõ·ÄΩ·Äæ·Ä±·Ä∑·Äô·Äö·Ä∫
        # PyTorch tensor ‚Üí NumPy array ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äô·Äö·Ä∫
        # Gym environment ·ÄÄ NumPy arrays ·ÄÄ·Ä≠·ÄØ ·Äï·Ä≠·ÄØ·ÄÄ·Äº·Ä≠·ÄØ·ÄÄ·Ä∫·Äê·Äö·Ä∫·Åã Memory management ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äê·Äö·Ä∫
        return obs_final


        # # 4. PyTorch/Device Conversion and Validation
        # try:
        #     # NumPy array ‚Üí PyTorch tensor ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äô·Äö·Ä∫
        #     obs_tensor = torch.tensor(obs_final, dtype=torch.float32).to(device)
        #     # Data Validation ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        #     if torch.isnan(obs_tensor).any() or torch.isinf(obs_tensor).any():
        #         logger.error(f"Invalid observation (NaN/Inf) at step {self.current_step}")
        #         raise ValueError(f"Invalid observation (NaN/Inf) at step {self.current_step}")
        #     return obs_tensor.cpu().numpy()

        # except NameError:
        #     # Torch ·ÄÄ·Ä≠·ÄØ ·Äô·Äû·ÄØ·Ä∂·Ä∏·Äï·Ä´·ÄÄ NumPy ·ÄÄ·Ä≠·ÄØ·Äû·Ä¨ ·Äï·Äº·Äî·Ä∫·Äï·Ä±·Ä∏·Äï·Ä´·Åã
        #     if np.isnan(obs_final).any() or np.isinf(obs_final).any():
        #         logger.error(f"Invalid observation (NaN/Inf) at step {self.current_step}")
        #         raise ValueError(f"Invalid observation (NaN/Inf) at step {self.current_step}")
        #     return obs_final # Final NumPy array


    def _ray_mask(self, a, c, bounds):
        """
        Ray Mask ·Äú·ÄØ·Äï·Ä∫·Äï·Ä´·Åã
        a: ·Äô·Ä∞·Äõ·ÄÑ·Ä∫·Ä∏ action (np.array)
        c: ·Äû·ÄÄ·Ä∫·ÄÜ·Ä≠·ÄØ·ÄÑ·Ä∫·Äõ·Ä¨ ·Ä°·ÄÖ·ÄØ ·Äõ·Ä≤·Ä∑ ·Ä°·Äú·Äö·Ä∫·Äó·Äü·Ä≠·ÄØ
        A_r_boundary_func: lambda_A_r ·Äê·ÄΩ·ÄÄ·Ä∫·Äê·Ä≤·Ä∑ func
        A_boundary_func: lambda_A ·Äê·ÄΩ·ÄÄ·Ä∫·Äê·Ä≤·Ä∑ func
        """
        if np.allclose(a, c):
            return c  # ·Ä°·Äú·Äö·Ä∫·Äô·Äæ·Ä¨ ·ÄÜ·Ä≠·ÄØ ·Äô·Äõ·ÄΩ·Äæ·Ä±·Ä∑

        direction = a - c
        norm_dir = direction / np.linalg.norm(direction)

        lambda_A_r = bounds[1] - c if norm_dir > 0 else c - bounds[0]
        lambda_A = 1 - c if norm_dir > 0 else c - (-1)


        if lambda_A_r <= 0 or lambda_A <= 0:
            return c  # ·Ä°·Äô·Äæ·Ä¨·Ä∏ ·Äõ·Äæ·Ä±·Ä¨·ÄÑ·Ä∫

        scale = lambda_A_r / lambda_A
        a_r = c + scale * direction
        return np.clip(a_r, -1, 1)  # Action space ·ÄÄ·Äî·Ä∫·Ä∑·Äû·Äê·Ä∫


    def _get_action_name(self, _action, ma_first, ma_slow):

        c = 0.0  # ·Ä°·Äú·Äö·Ä∫·Äó·Äü·Ä≠·ÄØ (hold)
        if ma_first > ma_slow:  # Uptrend: buy only [0, 1]
            bounds = [0, 1]
        else:  # Downtrend: sell only [-1, 0]
            bounds = [-1, 0]

        a_masked = self._ray_mask(_action, c, bounds)

        """Convert continuous action to discrete action name"""
        if a_masked >= self.action_threshold:
            return "BUY"
        elif a_masked <= -self.action_threshold:
            return "SELL"
        else:
            return "HOLD"

    def step(self, action):
        # self.data ·Äû·Ää·Ä∫ Index ·Äê·ÄΩ·ÄÑ·Ä∫ 'time' ·ÄÄ·Ä≠·ÄØ ·Äë·Ä¨·Ä∏·Äõ·Äæ·Ä≠·Äï·Äº·ÄÆ·Ä∏ drop ·Äú·ÄØ·Äï·Ä∫·Äë·Ä¨·Ä∏·Äû·Ä±·Ä¨·ÄÄ·Äº·Ä±·Ä¨·ÄÑ·Ä∑·Ä∫·Åä Index ·Äô·Äæ time ·ÄÄ·Ä≠·ÄØ ·Äö·Ä∞·Äõ·Äî·Ä∫·Äú·Ä≠·ÄØ·Äû·Ää·Ä∫·Åã
        current_row_raw = self.data_raw.iloc[self.current_step]

        # Unscaled Price Features
        _o, _h, _l, _c, ma_fast, ma_slow = current_row_raw[['open', 'high', 'low', 'close', 'ma_fast', 'ma_slow']]

        _t = self.data.index[self.current_step] # Get time from index
        reward                      =   0 # ·Äí·ÄÆ step ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ reward
        position_reward             =   0 # Position ·Äï·Ä≠·Äê·Ä∫·Äõ·ÄÑ·Ä∫ ·Äõ·Äê·Ä≤·Ä∑ reward
        action_hold_reward          =   0 # Hold action ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ reward/penalty

        _msg                        =   []
        _action                     =   action[0] # action value eg. [0.75]
        open_position               =   0
        for position in self.positions:
            if position['Status']   ==  0:
                position_reward, closed, _msg   =   self._calculate_reward(position)
                if not closed: open_position += 1  # Count what we already knew
                reward += position_reward

        # Continuous actions: [1 -> 0.5] LONG | [0.5 -> -0.5] HOLD |[-0.5 -> -1] SHORT
        action_name = self._get_action_name(action, ma_fast, ma_slow)

        if open_position < self.max_current_holding and action_name in ['BUY', 'SELL']:
            self.ticket_id  +=  1

            # Real trading ·Äô·Äæ·Ä¨ margin requirement ·Äõ·Äæ·Ä≠·Äû·Äú·Ä≠·ÄØ·Äô·Äª·Ä≠·ÄØ·Ä∏
            # Position ·Äñ·ÄΩ·ÄÑ·Ä∑·Ä∫·Äõ·ÄÑ·Ä∫ capital ·ÄÅ·Äª·ÄØ·Äï·Ä∫·ÄÑ·Äº·Ä¨·Ä∏·Äî·Ä±·Äõ·Äê·Äö·Ä∫
            # Position ·Äï·Ä≠·Äê·Ä∫·Äê·Ä≤·Ä∑·Ä°·ÄÅ·Ä´ ·Äï·Äº·Äî·Ä∫·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏·Äë·Ää·Ä∑·Ä∫·Äï·Ä±·Ä∏·Äê·Äö·Ä∫
            self.balance -= self.margin_requirement # hold up, this will make sure model can not open a lot of

            position        =   {
                "Ticket"        :   self.ticket_id,
                "Symbol"        :   self.symbol_col,
                "ActionTime"    :   _t,
                "Type"          :   action_name,
                "Lot"           :   1,
                "ActionPrice"   :   _c,
                "SL"            :   self.stop_loss,
                "PT"            :   self.profit_taken,
                "MaxDD"         :   0,
                "Swap"          :   0.0,
                "CloseTime"     :   "",
                "ClosePrice"    :   0.0,
                "Point"         :   self.point,
                "Reward"        :   self.transaction_fee,
                "DateDuration"  :   _t.date().isoformat(),
                "Status"        :   0, # 0 is Position is currently OPEN and active
                #"PIPS"          :   self.transaction_fee, # Price Interest Point (profit/loss ·ÄÄ·Ä≠·ÄØ measure ·Äú·ÄØ·Äï·Ä∫·Äê·Ä≤·Ä∑ unit)
                "PIPS"          :   0,
                "ActionStep"    :   self.current_step,
                "CloseStep"     :   -1, # Step number when position closed, not close yet is -1
                "DeltaStep"     :   0,
                "OpenBal"       :   self.balance,
                "CloseBal"       :   0,
                "HighestPrice"  :   _c,
                "LowestPrice"   :   _c,
            }

            self.positions.append(position)
            # do not use transaction_fee penalty
            # reward = self.transaction_fee #open cost
            # model ·ÄÄ ·Ä°·Äú·ÄΩ·Äî·Ä∫·Ä°·ÄÄ·Äª·ÄΩ·Ä∂ position ·Äê·ÄΩ·Ä± ·Äô·Äñ·ÄΩ·ÄÑ·Ä∑·Ä∫·Äô·Ä≠·Ä°·Ä±·Ä¨·ÄÑ·Ä∫ ·Äë·Ä≠·Äî·Ä∫·Ä∏·ÄÅ·Äª·ÄØ·Äï·Ä∫·Äê·Ä≤·Ä∑ mechanism ·Äñ·Äº·ÄÖ·Ä∫·Äï·Ä´·Äê·Äö·Ä∫·Åã
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]} {position["Type"]} Rwd:{position["PIPS"]} SL:{position["SL"]} PT:{position["PT"]}')

        # HOLD Penalty ·ÄÄ·Ä≠·ÄØ ·Ä°·Äú·ÄΩ·Äî·Ä∫·Äû·Ä±·Ä∏·ÄÑ·Äö·Ä∫·Äû·Ä±·Ä¨ ·Äê·Äî·Ä∫·Äñ·Ä≠·ÄØ·Ä∏
        # (·Ä•·Äï·Äô·Ä¨: -0.0001) ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã ·Ä°·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·ÄÜ·ÄØ·Ä∂·Ä∏·Äô·Äæ·Ä¨
        # Trading ·Äô·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Penalty ·Äô·Äï·Ä±·Ä∏·Äò·Ä≤ action_hold_reward = 0 ·Äë·Ä¨·Ä∏·Äï·Ä´·Åã
        elif open_position < self.max_current_holding and action_name == "HOLD":
            action_hold_reward  =   0  # no open any position, encourage open position
        else:
            action_hold_reward  =   0




        reward              +=  action_hold_reward

        # Move to the next time step
        self.current_step   +=  1

        # check if episode is done
        terminated          =   (self.balance <= 0)
        truncated           =   (self.current_step > self.max_steps)

        # get next observation
        obs                 =   self._next_observation()
        _msg.append(f'---idle----step:{self.current_step}, RF:{action_name} Action:{_action} Balance: {self.balance} reward:{reward} total_rewards:{self.ttl_rewards} position_reward:{position_reward} action_hold_reward:{action_hold_reward}')


        current_equity = self._calculate_current_equity()
        self.equity_curve.append(current_equity)
        self._calculate_drawdown()  # This updates peak_equity and drawdowns

        # =========================================================================
        # START: Drawdown Penalty Logic
        # =========================================================================
        # self.current_drawdown ·Äû·Ää·Ä∫ Percentage (0.0 ·Äô·Äæ 1.0) ·Äñ·Äº·ÄÖ·Ä∫·Äû·Ää·Ä∫·Åã


        drawdown_penalty = self.current_drawdown * self.drawdown_penalty_factor
        # Reward ·Äê·ÄΩ·ÄÑ·Ä∫ ·Äî·ÄØ·Äê·Ä∫·Äï·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        reward -= drawdown_penalty

        # Log the penalty for debugging
        _msg.append(f'Drawdown Penalty: -{drawdown_penalty:.4f} (DD:{self.current_drawdown:.4f})')
        # =========================================================================
        # END: Drawdown Penalty Logic
        # =========================================================================
        # Drawdown Penalty ·Äî·ÄØ·Äê·Ä∫·Äï·Äº·ÄÆ·Ä∏·Äô·Äæ·Äû·Ä¨ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ Reward ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
        self.ttl_rewards += reward  # <--- ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ ·Äï·Äº·Äî·Ä∫·Äë·Ää·Ä∑·Ä∫·Äï·Ä´

        info = {}

        if terminated or truncated:
            buy_positions = [p for p in self.positions if p["Type"] == "BUY"]
            sell_positions = [p for p in self.positions if p["Type"] == "SELL"]

            buy_count = len(buy_positions)
            sell_count = len(sell_positions)
            total_positions = len(self.positions)

            # Calculate win rates
            buy_wins = len([p for p in buy_positions if p["PIPS"] > 0])
            sell_wins = len([p for p in sell_positions if p["PIPS"] > 0])

            buy_win_rate = buy_wins / buy_count if buy_count > 0 else 0
            sell_win_rate = sell_wins / sell_count if sell_count > 0 else 0

            _m = f'--- Positions: {total_positions} (Buy:{buy_count}, Sell:{sell_count}) | '
            _m += f'WinRates: Buy:{buy_win_rate:.1%}, Sell:{sell_win_rate:.1%} | '
            _m += f'TotalRewards: {self.ttl_rewards} Balance: {self.balance}'

            logger.info(_m)
            _msg.append(_m)

            # Additional info
            if self.logger_show:
                for _m in _msg:
                    logger.info(_m)

            info["info"]                = _msg
            info["sharpe"]              = self._calculate_sharpe()  # ‚úÖ Now works! üí° 'sharpe_ratio' ·Äô·Äæ 'sharpe' ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã
            info["drawdown"]            = self.max_drawdown         # ‚úÖ Now accurate!'max_drawdown' ·Äô·Äæ 'drawdown' ·Äû·Ä≠·ÄØ·Ä∑·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Ä´·Åã
            info["current_equity"]      = current_equity            # ‚úÖ For debugging
            info["peak_equity"]         = self.peak_equity          # ‚úÖ For debugging
            info["equity_curve_length"] = len(self.equity_curve)    # ‚úÖ Monitor growth
            info["episode"]             = {
                "r": reward,
                "l": self.current_step
            }

        return obs, reward, terminated, truncated, info




    def _calculate_reward(self, position):
        _o, _h, _l, _c              = self.data_raw.iloc[self.current_step][['open', 'high', 'low', 'close']]
        _t                          = self.data.index[self.current_step]
        _msg                        =   []

        entry_price                 =   position['ActionPrice']
        direction                   =   position['Type']
        profit_target_price         =   entry_price + position['PT']/ self.point if direction == 'BUY' else entry_price - position['PT']/self.point
        stop_loss_price             =   entry_price + position['SL']/ self.point if direction == 'BUY' else entry_price - position['SL']/self.point
        closed                      =   False
        close_position_reward       =   0.0
        good_position_reward        =   0.0

        # Check for stoploss hit
        if (direction == 'BUY' and _l <= stop_loss_price) or (direction == 'SELL' and _h >= stop_loss_price):
            close_position_reward   =   position['SL'] # position sl ·ÄÄ minus value ·Äñ·Äº·ÄÖ·Ä∫·Äê·Äö·Ä∫

            position['CloseTime']   =   _t
            position['ClosePrice']  =   stop_loss_price
            position['Status']      =   1   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1
            position['CloseStep']   =   self.current_step
            position['PIPS']        =   close_position_reward - self.transaction_fee
            position['DeltaStep']   =   self.current_step - position['ActionStep']
            position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'

            self.balance            +=  self.margin_requirement + position['PIPS'] # return 100 is margin hold
            position['CloseBal']    =   self.balance
            closed                  =   True
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, SL:{position["SL"]}, DeltaStep:{position["DeltaStep"]}')

        elif (direction == 'BUY' and _h >= profit_target_price) or (direction == 'SELL' and _l <= profit_target_price):
            close_position_reward   =    position['PT'] # position tp ·ÄÄ plus value ·Äñ·Äº·ÄÖ·Ä∫·Äê·Äö·Ä∫

            position['CloseTime']   =   _t
            position['ClosePrice']  =   profit_target_price
            position['Status']      =   2   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1
            position['CloseStep']   =   self.current_step
            position['PIPS']        =   close_position_reward - self.transaction_fee
            position['DeltaStep']   =   self.current_step - position['ActionStep']
            position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'

            self.balance            +=  self.margin_requirement + position['PIPS'] # return 100 is margin hold
            position['CloseBal']    =   self.balance
            closed                  =   True
            _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, SL:{position["SL"]}, DeltaStep:{position["DeltaStep"]}')

        else:
            if self.current_step + 5 + self.sequence_length >= len(self.data):
                close_position_reward   =   (_c - position["ActionPrice"] if direction == 'BUY' else position["ActionPrice"] - _c)* self.point

                position['CloseTime']   =   _t
                position['ClosePrice']  =   _c
                position['Status']      =   3   # Status ·ÄÄ open ·ÄÜ·Ä≠·ÄØ 0 close ·ÄÜ·Ä≠·ÄØ 1, force close 2
                position['CloseStep']   =   self.current_step
                position['PIPS']        =   close_position_reward - self.transaction_fee
                position['DeltaStep']   =   self.current_step - position['ActionStep']
                position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'
                self.balance            +=  self.margin_requirement + position["PIPS"] # return 100 is margin hold
                position['CloseBal']    =   self.balance

                closed                  =   True
                _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: Rwd:{position["PIPS"]}, Cls:End, DeltaStep:{position["DeltaStep"]}')

            else:
                # =========================================================================
                # Real Trailing Stop Logic (·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
                # =========================================================================
                # 1. Highest/Lowest Price Update

                if direction == "BUY":
                  # Buy position ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Ä°·Äô·Äº·ÄÑ·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫
                  if _c > position["HighestPrice"]:
                      position["HighestPrice"] = _c

                  # 2. New SL Target Price (Trailing Price) ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  # New_SL_Price = HighestPrice - (Trailing Distance Pips ·ÄÄ·Ä≠·ÄØ Price Change ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏)
                  trailing_price = position["HighestPrice"] - self.trailing_distance / self.point

                  # 3. SL ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÜ·ÄÑ·Ä∑·Ä∫·Äô·Äº·Äæ·ÄÑ·Ä∑·Ä∫·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  # ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ SL ·Äë·ÄÄ·Ä∫ ·Äï·Ä≠·ÄØ·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äô·Äæ·Äû·Ä¨ ·Äõ·ÄΩ·Ä±·Ä∑·Äï·Ä´
                  if trailing_price > stop_loss_price:

                      stop_loss_price = trailing_price
                      # SL_Price ·Ä°·Äû·ÄÖ·Ä∫·ÄÄ·Ä≠·ÄØ Points ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Äº·ÄÆ·Ä∏ position['SL'] ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
                      position["SL"] = (stop_loss_price - entry_price) * self.point
                    #   if position["SL"] > 0:
                    #       position["SL"]    =   -abs(position["SL"])
                      trailing_happened = True
                  else:
                      trailing_happened = False


                elif direction == "SELL":
                  # Sell position ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Ä°·Äî·Ä≠·Äô·Ä∑·Ä∫·ÄÜ·ÄØ·Ä∂·Ä∏ ·Äõ·Ä±·Ä¨·ÄÄ·Ä∫·Äñ·Ä∞·Ä∏·Äû·Ä±·Ä¨ ·Äà·Ä±·Ä∏·Äî·Äæ·ÄØ·Äî·Ä∫·Ä∏·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äê·Äô·Ä∫·Ä∏·Äê·ÄÑ·Ä∫
                  if _c < position["LowestPrice"]:
                      position["LowestPrice"] = _c

                  # New SL Target Price (Trailing Price) ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  trailing_price = position["LowestPrice"] + self.trailing_distance / self.point

                  # SL ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÜ·ÄÑ·Ä∑·Ä∫·Äô·Äº·Äæ·ÄÑ·Ä∑·Ä∫·Äê·ÄÑ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
                  if trailing_price < stop_loss_price:
                      stop_loss_price = trailing_price
                      # SL_Price ·Ä°·Äû·ÄÖ·Ä∫·ÄÄ·Ä≠·ÄØ Points ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Äî·Ä∫·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·Äï·Äº·ÄÆ·Ä∏ position['SL'] ·ÄÄ·Ä≠·ÄØ ·Ä°·Äï·Ä∫·Äí·Ä≠·Äê·Ä∫·Äú·ÄØ·Äï·Ä∫·Äï·Ä´
                      position["SL"] = (entry_price - stop_loss_price) * self.point
                    #   if position["SL"] > 0:
                    #       position["SL"]    =   -abs(position["SL"])
                      trailing_happened = True
                  else:
                      trailing_happened = False

                # =========================================================================
                # Reward Logic (Trailing ·Äú·ÄØ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ Bonus ·Äï·Ä±·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏)
                # =========================================================================
                # Reward Sign ·ÄÄ·Ä≠·ÄØ ·Äö·ÄÅ·ÄÑ·Ä∫·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏ ·Äê·ÄΩ·ÄÄ·Ä∫·Äï·Ä´·Åã
                delta = _c - entry_price
                if direction == "BUY":
                    reward_sign = 1 if delta >= 0 else -1
                elif direction == "SELL":
                    reward_sign = -1 if delta >= 0 else 1

                good_position_reward = reward_sign * self.good_position_reward_scale

                # Trailing ·Ä°·Äô·Äæ·Äî·Ä∫·Äê·ÄÄ·Äö·Ä∫ ·Äñ·Äº·ÄÖ·Ä∫·Äû·ÄΩ·Ä¨·Ä∏·Äô·Äæ·Äû·Ä¨ Bonus Reward ·ÄÄ·Ä≠·ÄØ ·Äï·Ä±·Ä∏·Äï·Ä´
                if trailing_happened:
                    good_position_reward += 0.001

                position['Info']        =   f'{profit_target_price:.5f} | {stop_loss_price:.5f}'
                position['CloseBal']    =   self.balance
                _msg.append(f'Step:{self.current_step} Tkt:{position["Ticket"]}: NO_Close, PT:{position["PT"]}, SL:{position["SL"]}')

        return close_position_reward + good_position_reward, closed, _msg


    def _calculate_sharpe(self, risk_free_rate=0.0):
        """Calculate Sharpe ratio for the current episode"""
        if len(self.equity_curve) < 2:
            return 0.0

        returns = np.diff(self.equity_curve) / self.equity_curve[:-1]

        if np.std(returns) == 0:
            return 0.0

        sharpe = (np.mean(returns) - risk_free_rate) / np.std(returns)
        return float(sharpe * np.sqrt(288))  # Annualized (5-min bars ‚Üí 288/day)

    def _calculate_drawdown(self):
        """Update max drawdown during episode"""
        current_equity          =   self.equity_curve[-1]
        self.peak_equity        =   max(self.peak_equity, current_equity)
        self.current_drawdown   =   (self.peak_equity - current_equity) / self.peak_equity
        self.max_drawdown       =   max(self.max_drawdown, self.current_drawdown)


    def _calculate_current_equity(self):
        """Calculate total current equity (balance + unrealized P/L)"""
        total_equity = self.balance  # Start with cash balance

        # Add unrealized P/L from open positions
        for position in self.positions:
            if position['Status'] == 0:  # Only open positions
                current_price = self.data.iloc[self.current_step]["close"]
                entry_price = position['ActionPrice']

                if position['Type'] == 'BUY':
                    unrealized_pnl = (current_price - entry_price) * self.point
                else:  # Sell
                    unrealized_pnl = (entry_price - current_price) * self.point

                total_equity += unrealized_pnl

        return total_equity

    def render(self, mode='human', title=None, **kwargs):
        # Render the environment to the screen
        if mode in ('human', 'file'):
            log_header      =   True
            printout        =   False
            if mode == 'human':
                printout    =   True

            log_file = self.csv_file.replace("split/", "log/")
            pm = {
                "log_header": log_header,
                "log_filename": log_file,
                "printout": printout,
                "balance": self.balance,
                "balance_initial": self.balance_initial,
                "transaction_close_this_step": self.positions,
                "done_information": False
            }
            render_to_file(**pm)
            if log_header:
                    log_header = False

In [153]:
# Total Data: ·ÅÇ·ÅÄ·ÅÇ·ÅÄ ·Äá·Äî·Ä∫·Äî·Äù·Ä´·Äõ·ÄÆ·Äô·Äæ ·ÅÇ·ÅÄ·ÅÇ·ÅÖ (·ÅÖ ·Äî·Äæ·ÄÖ·Ä∫·ÄÖ·Ä¨)
# Total Steps (·ÄÅ·Äî·Ä∑·Ä∫·Äô·Äæ·Äî·Ä∫·Ä∏): 72,000 Steps/·Äî·Äæ·ÄÖ·Ä∫ 5·Äî·Äæ·ÄÖ·Ä∫ === 360,000 Steps
# Total Training Steps (Decay ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫): Continuous Fine-tuning Steps ·Äô·Äª·Ä¨·Ä∏·Äï·Ä´ ·Äë·Ää·Ä∑·Ä∫·Äû·ÄΩ·ÄÑ·Ä∫·Ä∏·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·Äï·Ä´·ÄÄ·Åä
# ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ 1,000,000 (1M) Steps ·ÄÄ·Ä≠·ÄØ Total Decay Length ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·ÄÄ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´·Åã
# Global Variables ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏

GLOBAL_TOTAL_STEPS = 1_000_000 # ·Ä•·Äï·Äô·Ä¨- 1 Million Steps
START_LR = 1e-4
END_LR = 5e-6

class ContinuousLRScheduler(BaseCallback):
    def __init__(self, total_global_steps, verbose=0):
        super(ContinuousLRScheduler, self).__init__(verbose)
        self.total_global_steps = total_global_steps
        # 'num_timesteps' ·Äû·Ää·Ä∫ SB3 ·ÄÄ Model ·Äê·ÄÖ·Ä∫·ÄÅ·ÄØ Training ·ÄÖ·Äê·ÄÑ·Ä∫·ÄÅ·Äª·Ä≠·Äî·Ä∫·Äô·Äæ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ Step ·ÄÄ·Ä≠·ÄØ ·Äô·Äæ·Äê·Ä∫·Äë·Ä¨·Ä∏·Äû·Ää·Ä∫·Åã

    def _on_step(self) -> bool:
        # ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ ·ÄÖ·ÄØ·ÄÖ·ÄØ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏ Global Step ·ÄÄ·Ä≠·ÄØ ·Äõ·Äö·Ä∞·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        current_global_step = self.num_timesteps

        # Global Progress (0.0 ·Äô·Äæ 1.0) ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        progress = current_global_step / self.total_global_steps
        progress_remaining = 1.0 - progress

        # LR ·Ä°·Äû·ÄÖ·Ä∫·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (·Äû·ÄÑ·Ä∫·Åè Decay Logic ·Ä°·Äê·Ä≠·ÄØ·ÄÑ·Ä∫·Ä∏)
        new_lr = END_LR + (START_LR - END_LR) * progress_remaining

        # Optimizer ·Åè LR ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
        self.model.policy.optimizer.param_groups[0]['lr'] = new_lr

        return True

# ·Ä§ Callback ·ÄÄ·Ä≠·ÄØ Initial Training ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Continuous Fine-tuning ·Äî·Äæ·ÄÖ·Ä∫·ÄÅ·ÄØ·ÄÖ·Äú·ÄØ·Ä∂·Ä∏·Äê·ÄΩ·ÄÑ·Ä∫ ·Äû·ÄØ·Ä∂·Ä∏·Äî·Ä≠·ÄØ·ÄÑ·Ä∫·Äï·Ä´·Äû·Ää·Ä∫·Åã
# ·Åé·ÄÑ·Ä∫·Ä∏·Äû·Ää·Ä∫ Agent ·Åè ·Äú·ÄÄ·Ä∫·Äõ·Äæ·Ä≠ num_timesteps ·Äï·Ä±·Ä´·Ä∫·Äô·Ä∞·Äê·Ää·Ä∫·Åç LR ·ÄÄ·Ä≠·ÄØ ·Ä°·ÄÜ·ÄÄ·Ä∫·Äô·Äï·Äº·Äê·Ä∫ ·Äú·Äª·Ä±·Ä¨·Ä∑·ÄÅ·Äª·Äï·Ä±·Ä∏·Äû·ÄΩ·Ä¨·Ä∏·Äï·Ä´·Äô·Ää·Ä∫·Åã

In [None]:
from sklearn.preprocessing import StandardScaler
from joblib import dump, load # Scaler ·ÄÄ·Ä≠·ÄØ ·Äû·Ä≠·Äô·Ä∫·Ä∏·ÄÜ·Ää·Ä∫·Ä∏/·Äï·Äº·Äî·Ä∫·Äö·Ä∞·Äõ·Äî·Ä∫

cf = EnvConfig('./drive/MyDrive/configure.json')
features_scaled = cf.env_parameters(item='features_scaled')

SCALER_PATH = '/content/drive/MyDrive/data/model/EURUSD/scalar_2020_2021.joblib'
DATA_PATH = '/content/drive/MyDrive/data/raw/final.csv'
train_scaler = StandardScaler()

train_data_df = pd.read_csv(DATA_PATH)
data_to_fit = train_data_df[features_scaled]
data_to_fit = data_to_fit.dropna()
train_scaler.fit(data_to_fit)

# 3. Scaler Object ·ÄÄ·Ä≠·ÄØ File ·Ä°·Äñ·Äº·ÄÖ·Ä∫ ·Äû·Ä≠·Äô·Ä∫·Ä∏·ÄÜ·Ää·Ä∫·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏ (Production/Test ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫)
dump(train_scaler, SCALER_PATH)


In [None]:
import os
import datetime
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from stable_baselines3 import PPO
import torch.nn as nn
from stable_baselines3.common.utils import set_random_seed
# Assume logger is defined elsewhere, e.g., import logging; logger = logging.getLogger(__name__)
from stable_baselines3.common.callbacks import CallbackList

BASE_SEED = 42
number_envs = 1
# Stable-Baselines3 ·Äõ·Ä≤·Ä∑ Global Seed ·ÄÄ·Ä≠·ÄØ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Ä´
set_random_seed(BASE_SEED)


def single_csv_training(csv_file, env_config_file, asset, model_name='', cf=None, number_envs=1, week_num=0):  # Added week_num for varying seed
    # 1. Log Root Directory ·Äî·Äæ·ÄÑ·Ä∑·Ä∫ Run Name ·ÄÄ·Ä≠·ÄØ ·Äê·ÄΩ·ÄÄ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    BASE_LOG_DIR = "/content/drive/MyDrive/data/log"
    RUN_NAME = f"{asset}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}"

    # 2. Log Root Directory ·Äõ·Äæ·Ä≠·Äô·Äõ·Äæ·Ä≠ ·ÄÖ·ÄÖ·Ä∫·ÄÜ·Ä±·Ä∏·Äï·Äº·ÄÆ·Ä∏ ·Äñ·Äî·Ä∫·Äê·ÄÆ·Ä∏·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    os.makedirs(BASE_LOG_DIR, exist_ok=True)

    #sequence_length = cf.env_parameters("backward_window")
    sequence_length = cf.data_processing_parameters("sequence_length")
    # lr_schedule = linear_schedule(1e-4, 5e-6)
    policy_kwargs = dict(
        # Repo ·Äõ·Ä≤·Ä∑ custom feature extractor (Transformer + MLP ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏·Äë·Ä¨·Ä∏·Äê·Ä¨·Åä time series data ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ ·Äû·ÄÑ·Ä∑·Ä∫·Äê·Ä±·Ä¨·Ä∫·Äê·Äö·Ä∫)·Åã
        features_extractor_class=CustomCombinedExtractor,
        # features_extractor_kwargs: Sequence length ·ÄÄ·Ä≠·ÄØ ·Äë·Ää·Ä∑·Ä∫·Åã
        features_extractor_kwargs=dict(
            sequence_length=sequence_length,   # input sequence length
            embed_dim=64,
            num_heads=2,
            num_layers=2
        ),
        # net_arch: Actor (pi - policy network) ·Äî·Ä≤·Ä∑ Critic (vf - value function) ·Äî·Äæ·ÄÖ·Ä∫·ÄÅ·ÄØ ·Äú·ÄØ·Ä∂·Ä∏ ·Ä°·Äê·ÄΩ·ÄÄ·Ä∫ hidden layers [256, 256] ·Äû·ÄØ·Ä∂·Ä∏·Åã (Updated vf to [512,256] for better explained variance)
        net_arch=dict(pi=[256, 256], vf=[512, 256]),  # Increased vf capacity
        # Activation function ·Ä°·Äî·Ä±·Äî·Ä≤·Ä∑ ReLU ·Äû·ÄØ·Ä∂·Ä∏ (non-linear ·Äñ·Äº·ÄÖ·Ä∫·Ä°·Ä±·Ä¨·ÄÑ·Ä∫)·Åã
        activation_fn=nn.ReLU,
        # Orthogonal initialization ·Äô·Äû·ÄØ·Ä∂·Ä∏ (financial data ·Äô·Äæ·Ä¨ ·Äï·Ä≠·ÄØ·ÄÄ·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏ ·Äê·Äö·Ä∫·Äú·Ä≠·ÄØ·Ä∑ comment ·Äô·Äæ·Ä¨ ·Äõ·Ä±·Ä∏ ·Äë·Ä¨·Ä∏·Äê·Äö·Ä∫·Åä ·Äí·Ä´·ÄÄ weights ·ÄÄ·Ä≠·ÄØ ·Äï·Ä≠·ÄØ·Äõ·Ä≠·ÄØ·Ä∏·Äõ·Äæ·ÄÑ·Ä∫·Ä∏ ·ÄÖ ·Äú·ÄØ·Äï·Ä∫·Äê·Äö·Ä∫)·Åã
        ortho_init=False  # better for financial data
    )

    # Environment Factories ·Äô·Äª·Ä¨·Ä∏ ·Äñ·Äî·Ä∫·Äê·ÄÆ·Ä∏·Äï·Ä´
    env_fns = [
        lambda: ForexTradingEnv(
            csv_file,
            cf,
            asset,
            logger_show=True,
            scaler=train_scaler
        )
        for _ in range(number_envs)
    ]
    # DummyVecEnv ·ÄÄ·Ä≠·ÄØ ·Äê·Ää·Ä∫·ÄÜ·Ä±·Ä¨·ÄÄ·Ä∫·Äï·Ä´ (SubprocVecEnv)
    env = DummyVecEnv(env_fns)
    # ·Ä§·Äî·Ä±·Äõ·Ä¨·Äû·Ää·Ä∫ ·Ä°·Äì·Ä≠·ÄÄ·ÄÄ·Äª·Äû·Ää·Ä∫·Åã ·Åé·ÄÑ·Ä∫·Ä∏·ÄÄ Environment ·Äê·ÄÖ·Ä∫·ÄÅ·ÄØ·ÄÅ·Äª·ÄÑ·Ä∫·Ä∏·ÄÖ·ÄÆ·ÄÄ·Ä≠·ÄØ
    # BASE_SEED, BASE_SEED+1, BASE_SEED+2... ·ÄÖ·Äû·Ää·Ä∫·Äñ·Äº·ÄÑ·Ä∑·Ä∫ Seed ·Äô·Äª·Ä¨·Ä∏ ·Äû·Äê·Ä∫·Äô·Äæ·Äê·Ä∫·Äï·Ä±·Ä∏·Äï·Äº·ÄÆ·Ä∏
    # ·Åé·ÄÑ·Ä∫·Ä∏·Äê·Ä≠·ÄØ·Ä∑·Åè reset() ·ÄÄ·Ä≠·ÄØ ·Äï·Äº·Äî·Ä∫·Äú·Ää·Ä∫·ÄÅ·Ä±·Ä´·Ä∫·Äï·Ä±·Ä∏·Äú·Ä≠·Äô·Ä∑·Ä∫·Äô·Ää·Ä∫·Åã
    # Vary seed per week to avoid overfitting in incremental training
    varied_seed = BASE_SEED + week_num  # Example: Pass week_num=1 for week 2, etc.
    env.seed(varied_seed)

    if model_name:
        model = PPO.load(
            model_name,
            env=env,
            learning_rate=START_LR # override by learning_rate callback
        )
    else:
        model = PPO(
            'MlpPolicy',
            env,
            device='cuda', # üí• ·Ä§·Äî·Ä±·Äõ·Ä¨·Äê·ÄΩ·ÄÑ·Ä∫ 'cpu' ·Äû·Ä≠·ÄØ·Ä∑ ·Äï·Äº·Ä±·Ä¨·ÄÑ·Ä∫·Ä∏·ÄÄ·Äº·Ää·Ä∑·Ä∫·Äï·Ä´
            verbose=1,
            tensorboard_log=BASE_LOG_DIR,
            normalize_advantage=True,
            policy_kwargs=policy_kwargs,
            learning_rate=START_LR,
            seed=varied_seed,

            # ·Ä°·Äõ·Ä±·Ä∏·ÄÄ·Äº·ÄÆ·Ä∏
            n_steps= 2048,
            batch_size= 256,
            n_epochs= 7,
            max_grad_norm=0.5,  # Tighter gradient control

            ent_coef=0.01,  # Reduced from 0.01 for controlled exploration
            vf_coef=0.5,
            target_kl=0.02,
            clip_range=0.02,
            
            # vf_coef=0.5,  # Reduced from 0.7 to balance policy vs value
            # target_kl=0.02,  # Increased from 0.005 for better updates
            # ent_coef=0.005,  # Reduced from 0.01 for controlled exploration
            # clip_range=0.01,  # Increased from 0.002 to reduce high clip_fraction
            # gamma=0.99,
        )

    # Train the agent
    logger.info("Starting model training...")
    metrics_callback = TrainingMetricsCallback()

    lr_callback = ContinuousLRScheduler(GLOBAL_TOTAL_STEPS)
    # Callback ·Äô·Äª·Ä¨·Ä∏·ÄÄ·Ä≠·ÄØ List ·Ä°·Äî·Ä±·Äñ·Äº·ÄÑ·Ä∑·Ä∫ ·Äï·Ä±·Ä´·ÄÑ·Ä∫·Ä∏·ÄÖ·Äï·Ä∫·ÄÅ·Äº·ÄÑ·Ä∫·Ä∏
    callback_list = CallbackList([lr_callback, metrics_callback])

    model.learn(
        total_timesteps=3_000_000, # retain 200,000
        callback=callback_list,

        # üö® ·Äï·Äº·ÄÑ·Ä∫·ÄÜ·ÄÑ·Ä∫·ÄÅ·Äª·ÄÄ·Ä∫ ·ÅÇ: tb_log_name ·Äî·Ä±·Äõ·Ä¨·Äô·Äæ·Ä¨ Run Folder Name ·ÄÄ·Ä≠·ÄØ·Äï·Ä≤ ·Äï·Ä±·Ä∏·Äï·Ä´·Åã
        tb_log_name=RUN_NAME,
        reset_num_timesteps=False if model_name else True  # üîÑ Existing model ·ÄÜ·Ä≠·ÄØ·Äõ·ÄÑ·Ä∫ timesteps ·ÄÜ·ÄÄ·Ä∫·Äô·Äæ·Äê·Ä∫
    )
    logger.info("Model training complete")
    model_filename = csv_file.replace("split/", "model/").replace(".csv", "_single_test.zip")
    model.save(model_filename)