In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [101]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
import torch.nn as nn
from stable_baselines3 import PPO
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.vec_env import DummyVecEnv


In [60]:
df = pd.read_csv('/Users/data/VsCodeProjects/DataScience/data/raw/eurusd_hour.csv')

In [5]:
df

Unnamed: 0,Date,Time,BO,BH,BL,BC,BCh,AO,AH,AL,AC,ACh
0,2005-05-02,00:00,1.28520,1.28520,1.28400,1.28440,-0.00080,1.28540,1.28540,1.28420,1.28460,-0.00080
1,2005-05-02,01:00,1.28440,1.28480,1.28390,1.28420,-0.00020,1.28460,1.28500,1.28410,1.28440,-0.00020
2,2005-05-02,02:00,1.28430,1.28540,1.28410,1.28510,0.00080,1.28450,1.28560,1.28430,1.28530,0.00080
3,2005-05-02,03:00,1.28510,1.28590,1.28500,1.28510,0.00000,1.28530,1.28610,1.28520,1.28530,0.00000
4,2005-05-02,04:00,1.28520,1.28590,1.28490,1.28550,0.00030,1.28540,1.28610,1.28510,1.28570,0.00030
...,...,...,...,...,...,...,...,...,...,...,...,...
93079,2020-04-29,18:00,1.08620,1.08851,1.08611,1.08713,0.00093,1.08660,1.08865,1.08640,1.08726,0.00066
93080,2020-04-29,19:00,1.08711,1.08823,1.08659,1.08748,0.00037,1.08725,1.08837,1.08675,1.08760,0.00035
93081,2020-04-29,20:00,1.08747,1.08788,1.08691,1.08708,-0.00039,1.08758,1.08810,1.08706,1.08808,0.00050
93082,2020-04-29,21:00,1.08708,1.08740,1.08699,1.08714,0.00006,1.08808,1.08814,1.08763,1.08766,-0.00042


In [61]:
df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
df = df.sort_values('datetime')
df = df.reset_index(drop=True)


In [62]:
df['MidOpen'] = (df['BO'] + df['AO']) / 2
df['MidHigh'] = (df['BH'] + df['AH']) / 2
df['MidLow'] = (df['BL'] + df['AL']) / 2
df['MidClose'] = (df['AC'] + df['BC']) / 2

df['Spread'] = df['AC'] - df['BC']
df['Spread_norm'] = df['Spread'] / df['MidClose']

In [63]:
df['r_1'] = np.log(df['MidClose'] / df['MidClose'].shift(1))
df['r_3'] = np.log(df['MidClose'] / df['MidClose'].shift(3))
df['r_6'] = np.log(df['MidClose'] / df['MidClose'].shift(6))
df['r_12'] = np.log(df['MidClose'] / df['MidClose'].shift(12))


In [64]:
df['mom_6'] = df['MidClose'] - df['MidClose'].shift(6)
df['mom_12'] = df['MidClose'] - df['MidClose'].shift(12)

In [65]:
df['EMA20'] = df['MidClose'].ewm(span=20, adjust=False).mean()
df['EMA50'] = df['MidClose'].ewm(span=50, adjust=False).mean()

In [66]:
high = df['MidHigh']
low = df['MidLow']
close = df['MidClose']

tr1 = high - low
tr2 = (high - close.shift()).abs()
tr3 = (low - close.shift()).abs()

tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)

df['ATR14'] = tr.rolling(14).mean()
df['ATR50'] = tr.rolling(50).mean()

df['vol_ratio'] = df['ATR14'] / df['ATR50']

In [67]:
df['dist_ema20'] = (df['MidClose'] - df['EMA20']) / df['ATR14']
df['dist_ema50'] = (df['MidClose'] - df['EMA50']) / df['ATR14']

In [68]:
df['std_10'] = df['r_1'].rolling(10).std()
df['std_20'] = df['r_1'].rolling(20).std()

In [69]:
df['range'] = df['MidHigh'] - df['MidLow']
df['range_norm'] = df['range'] / df['ATR14']

In [70]:
spread_mean = df['Spread'].rolling(50).mean()
df['spread_spike'] = (df['Spread'] > spread_mean * 1.5).astype(int)

In [71]:
df['hour'] = df['datetime'].dt.hour
df['weekday'] = df['datetime'].dt.weekday

df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

In [72]:
df = df.dropna().reset_index(drop=True)

In [73]:
high = df['MidHigh']
low = df['MidLow']

In [74]:
df['fractal_high_raw'] = (
    (high >= high.shift(1)) &
    (high >= high.shift(2)) &
    (high > high.shift(-1)) &
    (high > high.shift(-2))
)

df['fractal_low_raw'] = (
    (low <= low.shift(1)) &
    (low <= low.shift(2)) &
    (low <= low.shift(-1)) &
    (low < low.shift(-2))
)

df['fractal_high'] = df['fractal_high_raw'].shift(2, fill_value=False).astype(int)
df['fractal_low'] = df['fractal_low_raw'].shift(2, fill_value=False).astype(int)

In [75]:
df['last_fractal_high'] = np.where(df['fractal_high']==1, df['MidHigh'], np.nan)
df['last_fractal_low']  = np.where(df['fractal_low']==1, df['MidLow'], np.nan)

df['last_fractal_high'] = df['last_fractal_high'].ffill()
df['last_fractal_low']  = df['last_fractal_low'].ffill()


In [76]:
df['dist_last_high'] = (df['MidClose'] - df['last_fractal_high']) / df['ATR14']
df['dist_last_low'] = (df['MidClose'] - df['last_fractal_low']) / df['ATR14']

In [77]:
df['bos_up'] = (
    (df['MidHigh'] > df['last_fractal_high']) &
    df['last_fractal_high'].notna()
).astype(int)

df['bos_down'] = (
    (df['MidLow'] < df['last_fractal_low']) &
    df['last_fractal_low'].notna()
).astype(int)

In [78]:
df['sweep_high'] = (
    (df['MidHigh'] > df['last_fractal_high']) &
    (df['MidClose'] < df['last_fractal_high'])
).astype(int)

df['sweep_low'] = (
    (df['MidLow'] < df['last_fractal_low']) &
    (df['MidClose'] > df['last_fractal_low'])
).astype(int)

In [79]:
df['hh_count_20'] = df['bos_up'].rolling(20).sum()
df['ll_count_20'] = df['bos_down'].rolling(20).sum()

In [80]:
df

Unnamed: 0,Date,Time,BO,BH,BL,BC,BCh,AO,AH,AL,...,last_fractal_high,last_fractal_low,dist_last_high,dist_last_low,bos_up,bos_down,sweep_high,sweep_low,hh_count_20,ll_count_20
0,2005-05-04,01:00,1.28863,1.29623,1.28853,1.29623,0.00760,1.28878,1.29638,1.28868,...,,,,,0,0,0,0,,
1,2005-05-04,02:00,1.29633,1.29633,1.29433,1.29613,-0.00020,1.29648,1.29648,1.29448,...,,,,,0,0,0,0,,
2,2005-05-04,03:00,1.29603,1.29613,1.29513,1.29573,-0.00030,1.29618,1.29628,1.29528,...,,,,,0,0,0,0,,
3,2005-05-04,04:00,1.29573,1.29733,1.29563,1.29613,0.00040,1.29588,1.29748,1.29578,...,,,,,0,0,0,0,,
4,2005-05-04,05:00,1.29633,1.29663,1.29543,1.29563,-0.00070,1.29648,1.29678,1.29558,...,,,,,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93030,2020-04-29,18:00,1.08620,1.08851,1.08611,1.08713,0.00093,1.08660,1.08865,1.08640,...,1.085815,1.084285,0.844221,1.780205,1,0,0,0,12.0,5.0
93031,2020-04-29,19:00,1.08711,1.08823,1.08659,1.08748,0.00037,1.08725,1.08837,1.08675,...,1.085815,1.084285,1.020063,1.924815,1,0,0,0,13.0,5.0
93032,2020-04-29,20:00,1.08747,1.08788,1.08691,1.08708,-0.00039,1.08758,1.08810,1.08706,...,1.087990,1.084285,-0.249132,2.002170,0,0,0,0,12.0,5.0
93033,2020-04-29,21:00,1.08708,1.08740,1.08699,1.08714,0.00006,1.08808,1.08814,1.08763,...,1.087990,1.084285,-0.396448,2.093113,0,0,0,0,11.0,5.0


In [81]:
df = df.drop(columns=['fractal_high_raw', 'fractal_low_raw'])


In [82]:
df

Unnamed: 0,Date,Time,BO,BH,BL,BC,BCh,AO,AH,AL,...,last_fractal_high,last_fractal_low,dist_last_high,dist_last_low,bos_up,bos_down,sweep_high,sweep_low,hh_count_20,ll_count_20
0,2005-05-04,01:00,1.28863,1.29623,1.28853,1.29623,0.00760,1.28878,1.29638,1.28868,...,,,,,0,0,0,0,,
1,2005-05-04,02:00,1.29633,1.29633,1.29433,1.29613,-0.00020,1.29648,1.29648,1.29448,...,,,,,0,0,0,0,,
2,2005-05-04,03:00,1.29603,1.29613,1.29513,1.29573,-0.00030,1.29618,1.29628,1.29528,...,,,,,0,0,0,0,,
3,2005-05-04,04:00,1.29573,1.29733,1.29563,1.29613,0.00040,1.29588,1.29748,1.29578,...,,,,,0,0,0,0,,
4,2005-05-04,05:00,1.29633,1.29663,1.29543,1.29563,-0.00070,1.29648,1.29678,1.29558,...,,,,,0,0,0,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93030,2020-04-29,18:00,1.08620,1.08851,1.08611,1.08713,0.00093,1.08660,1.08865,1.08640,...,1.085815,1.084285,0.844221,1.780205,1,0,0,0,12.0,5.0
93031,2020-04-29,19:00,1.08711,1.08823,1.08659,1.08748,0.00037,1.08725,1.08837,1.08675,...,1.085815,1.084285,1.020063,1.924815,1,0,0,0,13.0,5.0
93032,2020-04-29,20:00,1.08747,1.08788,1.08691,1.08708,-0.00039,1.08758,1.08810,1.08706,...,1.087990,1.084285,-0.249132,2.002170,0,0,0,0,12.0,5.0
93033,2020-04-29,21:00,1.08708,1.08740,1.08699,1.08714,0.00006,1.08808,1.08814,1.08763,...,1.087990,1.084285,-0.396448,2.093113,0,0,0,0,11.0,5.0


In [83]:
df = df.iloc[60:].reset_index(drop=True)


In [84]:
df.columns

Index(['Date', 'Time', 'BO', 'BH', 'BL', 'BC', 'BCh', 'AO', 'AH', 'AL', 'AC',
       'ACh', 'datetime', 'MidOpen', 'MidHigh', 'MidLow', 'MidClose', 'Spread',
       'Spread_norm', 'r_1', 'r_3', 'r_6', 'r_12', 'mom_6', 'mom_12', 'EMA20',
       'EMA50', 'ATR14', 'ATR50', 'vol_ratio', 'dist_ema20', 'dist_ema50',
       'std_10', 'std_20', 'range', 'range_norm', 'spread_spike', 'hour',
       'weekday', 'hour_sin', 'hour_cos', 'fractal_high', 'fractal_low',
       'last_fractal_high', 'last_fractal_low', 'dist_last_high',
       'dist_last_low', 'bos_up', 'bos_down', 'sweep_high', 'sweep_low',
       'hh_count_20', 'll_count_20'],
      dtype='object')

In [85]:
df = df.drop(columns=[
    'Date','Time','datetime',
    'BO','BH','BL','BC','BCh',
    'AO','AH','AL','AC','ACh'
])


In [86]:
H = 6

In [87]:
close = df['MidClose']
high = df['MidHigh']
low = df['MidLow']

In [91]:
df_model = df.copy()

In [90]:
df.columns

Index(['MidOpen', 'MidHigh', 'MidLow', 'MidClose', 'Spread', 'Spread_norm',
       'r_1', 'r_3', 'r_6', 'r_12', 'mom_6', 'mom_12', 'EMA20', 'EMA50',
       'ATR14', 'ATR50', 'vol_ratio', 'dist_ema20', 'dist_ema50', 'std_10',
       'std_20', 'range', 'range_norm', 'spread_spike', 'hour', 'weekday',
       'hour_sin', 'hour_cos', 'fractal_high', 'fractal_low',
       'last_fractal_high', 'last_fractal_low', 'dist_last_high',
       'dist_last_low', 'bos_up', 'bos_down', 'sweep_high', 'sweep_low',
       'hh_count_20', 'll_count_20'],
      dtype='object')

In [96]:
feature_cols = [
    'MidOpen', 'MidHigh', 'MidLow', 'MidClose', 'Spread', 'Spread_norm',
    'r_1', 'r_3', 'r_6', 'r_12', 'mom_6', 'mom_12',
    'EMA20', 'EMA50', 'ATR14', 'ATR50', 'vol_ratio',
    'dist_ema20', 'dist_ema50', 'std_10', 'std_20',
    'range', 'range_norm', 'spread_spike',
    'hour_sin', 'hour_cos',
    'fractal_high', 'fractal_low',
    'last_fractal_high', 'last_fractal_low',
    'dist_last_high', 'dist_last_low',
    'bos_up', 'bos_down', 'sweep_high', 'sweep_low',
    'hh_count_20', 'll_count_20'
]

data = df_model[feature_cols].values
window_size = 24  

train_ratio = 0.7
train_size = int(len(data) * train_ratio)
train_data = data[:train_size]
test_data = data[train_size:]



In [95]:
import gymnasium as gym
from gymnasium import spaces


In [97]:
class TradingEnv(gym.Env):
    metadata = {'render_models': ['human'], 'render_fps': 1}

    def __init__(self, data, window_size, render_mode=None):
        super().__init__()
        self.data = data
        self.window_size = window_size
        self.current_step = window_size
        self.end_step = len(data) - 1

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(window_size, data.shape[1]),
            dtype=np.float32,
        )
        

        self.position = 0
        self.entry_price = 0
        self.balance = 10000
        self.initial_balance = 10000
        self.render_mode = render_mode

    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.window_size
        self.position = 0
        self.entry_price = 0
        self.balance = self.initial_balance
        obs = self._get_observation()
        return obs, {}

    
    def _get_observation(self):
        return self.data[self.current_step - self.window_size : self.current_step]
    

    def step(self, action):
        reward = 0
        done = False
        price = self.data[self.current_step][3]


        if action == 1:
            if self.position == 0:
                self.position = 1
                self.entry_price = price
            elif self.position == -1:
                reward = self.entry_price - price
                self.balance += reward
                self.position = 1
                self.entry_price = price
            

        elif action == 2:
            if self.position == 0:
                self.position = -1
                self.entry_price = price
            elif self.position == 1:
                reward = price - self.entry_price
                self.balance += reward
                self.position = -1
                self.entry_price = price
        

        if action == 0 and self.position != 0:
            if self.position == 1:
                reward = price - self.entry_price
            else:
                reward = self.entry_price - price
        

        self.current_step += 1
        if self.current_step >= self.end_step:
            done = True
        
        obs = self._get_observation()
        info = {'balance': self.balance, 'position': self.position}

        return obs, reward, done, False, info 

    
    def render(self):
        if self.render_mode == 'human':
            print(f'Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}')

In [102]:
class CustomFeatureExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Box, lstm_hidden=64, dense_hidden=64):
        super(CustomFeatureExtractor, self).__init__(observation_space, features_dim=lstm_hidden)

        self.window_size, self.num_features = observation_space.shape

        # LSTM слой
        self.lstm = nn.LSTM(input_size=self.num_features, hidden_size=lstm_hidden, batch_first=True)

        # Полносвязный слой после LSTM
        self.fc = nn.Sequential(
            nn.Linear(lstm_hidden, dense_hidden),
            nn.ReLU()
        )

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        # observations shape: (batch, window_size, num_features)
        lstm_out, (h_n, c_n) = self.lstm(observations)
        last_hidden = h_n[-1]  # последний скрытый слой
        return self.fc(last_hidden)


In [103]:
window_size = 24
env = DummyVecEnv([lambda: TradingEnv(train_data, window_size)])


In [104]:
policy_kwargs = dict(
    features_extractor_class=CustomFeatureExtractor,
    features_extractor_kwargs=dict(lstm_hidden=64, dense_hidden=64)
)

model = PPO(
    policy="MlpPolicy", 
    env=env,
    verbose=1,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    policy_kwargs=policy_kwargs
)


Using cpu device


In [None]:
model.learn(total_timesteps=20000)

In [None]:
test_env = DummyVecEnv([lambda: TradingEnv(test_data, window_size, render_mode="human")])

obs = test_env.reset()
done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = test_env.step(action)
    done = terminated or truncated
