In [1]:
import pandas as pd
import pandas_ta as ta

# Load and preprocess the data
df = pd.read_csv("EURUSD_Candlestick_1_M_BID_19.01.2023-18.01.2025.csv")
df['Gmt time'] = pd.to_datetime(df['Gmt time'], format='%d.%m.%Y %H:%M:%S.%f')
df = df[df['Volume'] != 0]
df.reset_index(drop=True, inplace=True)

# Display the result
df.head(10)

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume
0,2023-01-19 00:00:00,1.07974,1.07977,1.07956,1.07956,143.12
1,2023-01-19 00:01:00,1.07957,1.0796,1.07944,1.07949,80.13
2,2023-01-19 00:02:00,1.07947,1.07971,1.07946,1.0797,46.52
3,2023-01-19 00:03:00,1.07969,1.07973,1.0796,1.07968,58.5
4,2023-01-19 00:04:00,1.07966,1.0798,1.07963,1.0798,47.99
5,2023-01-19 00:05:00,1.07979,1.0798,1.0797,1.07972,53.4
6,2023-01-19 00:06:00,1.0797,1.0797,1.07961,1.07962,56.55
7,2023-01-19 00:07:00,1.07962,1.07982,1.07954,1.07955,72.64
8,2023-01-19 00:08:00,1.07956,1.07964,1.07951,1.07964,43.81
9,2023-01-19 00:09:00,1.07964,1.07964,1.07934,1.07934,77.99


In [2]:
df['box_start'] = (df['Gmt time'].dt.minute % 5 == 0).astype(int)

In [3]:
df.head(15)

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume,box_start
0,2023-01-19 00:00:00,1.07974,1.07977,1.07956,1.07956,143.12,1
1,2023-01-19 00:01:00,1.07957,1.0796,1.07944,1.07949,80.13,0
2,2023-01-19 00:02:00,1.07947,1.07971,1.07946,1.0797,46.52,0
3,2023-01-19 00:03:00,1.07969,1.07973,1.0796,1.07968,58.5,0
4,2023-01-19 00:04:00,1.07966,1.0798,1.07963,1.0798,47.99,0
5,2023-01-19 00:05:00,1.07979,1.0798,1.0797,1.07972,53.4,1
6,2023-01-19 00:06:00,1.0797,1.0797,1.07961,1.07962,56.55,0
7,2023-01-19 00:07:00,1.07962,1.07982,1.07954,1.07955,72.64,0
8,2023-01-19 00:08:00,1.07956,1.07964,1.07951,1.07964,43.81,0
9,2023-01-19 00:09:00,1.07964,1.07964,1.07934,1.07934,77.99,0


In [4]:
# Create columns for rolling maximum and minimum over the last 5 candles, excluding the current candle
df['max_box'] = df['High'].shift(1).rolling(window=5).max()
df['min_box'] = df['Low'].shift(1).rolling(window=5).min()

In [5]:
df[25:45]

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume,box_start,max_box,min_box
25,2023-01-19 00:25:00,1.07918,1.07927,1.07908,1.07908,72.89,1,1.07967,1.07907
26,2023-01-19 00:26:00,1.07907,1.07914,1.07898,1.07912,74.69,0,1.07951,1.07907
27,2023-01-19 00:27:00,1.07912,1.07919,1.07904,1.07919,64.24,0,1.07946,1.07898
28,2023-01-19 00:28:00,1.0792,1.07925,1.0792,1.07923,25.51,0,1.07927,1.07898
29,2023-01-19 00:29:00,1.07922,1.07938,1.07918,1.07938,55.01,0,1.07927,1.07898
30,2023-01-19 00:30:00,1.07939,1.07939,1.07912,1.07933,172.4,1,1.07938,1.07898
31,2023-01-19 00:31:00,1.07932,1.07952,1.07929,1.07951,227.61,0,1.07939,1.07898
32,2023-01-19 00:32:00,1.07952,1.07964,1.07951,1.07964,81.01,0,1.07952,1.07904
33,2023-01-19 00:33:00,1.07965,1.07967,1.07954,1.07961,87.14,0,1.07964,1.07912
34,2023-01-19 00:34:00,1.0796,1.0796,1.07939,1.07946,82.74,0,1.07967,1.07912


In [6]:
# Set max_5 and min_5 to NaN for rows where 5min_start is 0
df.loc[df['box_start'] == 0, ['max_box', 'min_box']] = None

# Forward fill max_5 and min_5
df['max_box'].fillna(method='ffill', inplace=True)
df['min_box'].fillna(method='ffill', inplace=True)

# Drop rolling NA values
df = df.dropna().reset_index(drop=True)

# Display the updated dataframe
df[25:45]

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume,box_start,max_box,min_box
25,2023-01-19 00:30:00,1.07939,1.07939,1.07912,1.07933,172.4,1,1.07938,1.07898
26,2023-01-19 00:31:00,1.07932,1.07952,1.07929,1.07951,227.61,0,1.07938,1.07898
27,2023-01-19 00:32:00,1.07952,1.07964,1.07951,1.07964,81.01,0,1.07938,1.07898
28,2023-01-19 00:33:00,1.07965,1.07967,1.07954,1.07961,87.14,0,1.07938,1.07898
29,2023-01-19 00:34:00,1.0796,1.0796,1.07939,1.07946,82.74,0,1.07938,1.07898
30,2023-01-19 00:35:00,1.07949,1.07965,1.07944,1.07944,107.48,1,1.07967,1.07912
31,2023-01-19 00:36:00,1.07944,1.07945,1.07937,1.07944,84.3,0,1.07967,1.07912
32,2023-01-19 00:37:00,1.07943,1.07948,1.07932,1.07932,100.6,0,1.07967,1.07912
33,2023-01-19 00:38:00,1.0793,1.07932,1.07918,1.07927,115.57,0,1.07967,1.07912
34,2023-01-19 00:39:00,1.0793,1.07933,1.07905,1.07909,92.13,0,1.07967,1.07912


In [20]:
df["Break_signal"] =  (df["Close"] > df["max_box"]).astype(int) * 2 + (df["Close"] < df["min_box"]).astype(int)

In [21]:
df[:50]

Unnamed: 0,Gmt time,Open,High,Low,Close,Volume,box_start,max_box,min_box,Break_signal,pointpos
0,2023-01-19 00:05:00,1.07979,1.0798,1.0797,1.07972,53.4,1,1.0798,1.07944,0,
1,2023-01-19 00:06:00,1.0797,1.0797,1.07961,1.07962,56.55,0,1.0798,1.07944,0,
2,2023-01-19 00:07:00,1.07962,1.07982,1.07954,1.07955,72.64,0,1.0798,1.07944,0,
3,2023-01-19 00:08:00,1.07956,1.07964,1.07951,1.07964,43.81,0,1.0798,1.07944,0,
4,2023-01-19 00:09:00,1.07964,1.07964,1.07934,1.07934,77.99,0,1.0798,1.07944,1,1.07974
5,2023-01-19 00:10:00,1.07934,1.07952,1.07934,1.07948,42.09,1,1.07982,1.07934,0,
6,2023-01-19 00:11:00,1.07949,1.0796,1.07949,1.0796,35.14,0,1.07982,1.07934,0,
7,2023-01-19 00:12:00,1.07959,1.07966,1.07952,1.07952,51.67,0,1.07982,1.07934,0,
8,2023-01-19 00:13:00,1.0795,1.07959,1.07949,1.07957,31.94,0,1.07982,1.07934,0,
9,2023-01-19 00:14:00,1.07957,1.07972,1.07957,1.07965,53.41,0,1.07982,1.07934,0,


In [22]:
len(df)

742358

In [23]:
# Define the block size
block_size = 5

# Create a new column to indicate the block number
df['block'] = df.index // block_size

# For each block, keep only the first occurrence of a signal (1 or 2), set others to 0
def keep_first_signal(x):
    seen = set()  # Keep track of seen signals
    return x.map(lambda val: val if val in {1, 2} and val not in seen and not seen.add(val) else 0)

df['Break_signal'] = df.groupby('block')['Break_signal'].transform(keep_first_signal)

# Drop the 'block' column if no longer needed
df.drop(columns=['block'], inplace=True)


In [24]:
df["Break_signal"].value_counts()

Break_signal
0    617913
2     62658
1     61787
Name: count, dtype: int64

## Vizualisation

In [27]:
import numpy as np
def pointpos(x):
    if x['Break_signal']==2:
        return x['Low']-1e-4
    elif x['Break_signal']==1:
        return x['High']+1e-4
    else:
        return np.nan

df['pointpos'] = df.apply(lambda row: pointpos(row), axis=1)

In [29]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
dfpl = df[450:550]
fig = go.Figure(data=[go.Candlestick(x=dfpl.index,
                open=dfpl['Open'],
                high=dfpl['High'],
                low=dfpl['Low'],
                close=dfpl['Close'])])

fig.add_scatter(x=dfpl.index, y=dfpl['pointpos'], mode="markers",
                marker=dict(size=5, color="MediumPurple"),
                name="pivot")

#fig.update_layout(xaxis_rangeslider_visible=False)
fig.show()

In [30]:
#df = df.set_index(["Gmt time"])
df.reset_index(inplace=True)

In [41]:
from backtesting import Strategy, Backtest
import numpy as np

def SIGNAL():
    return df.Break_signal

class MyStrat(Strategy):
    mysize = 0.02  # Trade size 1% of the account
    tp_sl_ratio = 2

    def init(self):
        super().init()
        self.signal1 = self.I(SIGNAL)  # Assuming SIGNAL is a function that returns signals

    def next(self):
        super().next()
        spread_threshold = 0.0002
        if self.signal1[-1] == 2 and not self.position:
            # Open a new long position with calculated SL
            current_low = self.data.Low[-1]
            current_close = self.data.Close[-1]
            sl = current_low  # SL at the low of the current candle
            tp = current_close + self.tp_sl_ratio * (current_close - current_low)

            # Check the TP > Close > SL condition
            if tp > current_close+spread_threshold > sl + 2*spread_threshold:
                self.buy(size=self.mysize, sl=sl, tp=tp)

        elif self.signal1[-1] == 1 and not self.position:
            # Open a new short position with calculated SL
            current_high = self.data.High[-1]
            current_close = self.data.Close[-1]
            sl = current_high  # SL at the high of the current candle
            tp = current_close - self.tp_sl_ratio * (current_high - current_close)

            # Check the TP < Close < SL condition
            if tp + 2*spread_threshold < current_close + spread_threshold < sl:
                self.sell(size=self.mysize, sl=sl, tp=tp)

In [42]:
bt = Backtest(df, MyStrat, cash=50000, margin=1/5, commission=0.0002)
bt.run()


Data index is not datetime. Assuming simple periods, but `pd.DateTimeIndex` is advised.



Start                                     0.0
End                                  742357.0
Duration                             742357.0
Exposure Time [%]                   44.392732
Equity Final [$]                  38729.16182
Equity Peak [$]                       50000.0
Return [%]                         -22.541676
Buy & Hold Return [%]               -4.873486
Return (Ann.) [%]                         0.0
Volatility (Ann.) [%]                     NaN
Sharpe Ratio                              NaN
Sortino Ratio                             NaN
Calmar Ratio                              0.0
Max. Drawdown [%]                  -22.541676
Avg. Drawdown [%]                  -22.541676
Max. Drawdown Duration               742353.0
Avg. Drawdown Duration               742353.0
# Trades                              12852.0
Win Rate [%]                        32.920946
Best Trade [%]                       1.310362
Worst Trade [%]                     -0.581251
Avg. Trade [%]                    

In [43]:
bt.plot()


found multiple competing values for 'toolbar.active_drag' property; using the latest value


found multiple competing values for 'toolbar.active_scroll' property; using the latest value

