In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import pytz
from datetime import datetime
import MetaTrader5 as mt5


## Get Data from MT5

In [None]:
# -------------------------------------------------------------------------------------------
def fetch_MT5_data(symbol: str, timeframe: int, date_from: datetime, date_to: datetime):
    if not mt5.initialize():
        print(f"initialize() failed, error code = {mt5.last_error()}")
        mt5.shutdown()
    else:
        
        rates = mt5.copy_rates_range(symbol, timeframe, date_from, date_to)
        
        if rates is not None and len(rates) > 0:
            df = pd.DataFrame(rates)
            df['time'] = pd.to_datetime(df['time'], unit='s')
            df.to_csv(f'{timeframe} ({date_from} -- {date_to}).csv'.replace(" 00:00:00+00:00", ""), index=False)
        else:
            print(f"Failed to fetch data or no data found. Error: {mt5.last_error()}")
            
        mt5.shutdown()


symbol = "EURUSD"
timeframe = mt5.TIMEFRAME_M5

timezone  = pytz.timezone("UTC")
date_from = datetime(2025, 10, 1, tzinfo=timezone)
date_to   = datetime(2025, 12, 1, tzinfo=timezone)

# fetch_MT5_data(symbol, timeframe, date_from, date_to)

In [4]:
mt5.initialize()
tick = mt5.symbol_info_tick("EURUSD")

print("Bid:", tick.bid)
print("Ask:", tick.ask)
print("Last:", tick.last)
print("Volume:", tick.volume)

mt5.shutdown()


Bid: 1.17269
Ask: 1.17271
Last: 0.0
Volume: 0


True

## Core Features

In [2]:
# -------------------------------------------------------------------------------------------
def core_features(df: pd.DataFrame):

    df['return']     = np.log(df['close'] / df['close'].shift(1))       # r_{t}​ = ln (Close_{t} − ​Close_{t-1}​​)
    df['body'  ]     = df['close'] - df['open']                         # body_{t} ​= Close_{t} ​− Open_{t}
    df['range' ]     = df['high' ] - df['low' ]                         # range_{t} ​= High_{t} ​− Low_{t}​
    df['upper_wick'] = df['high' ] - df[['open', 'close']].max(axis=1)  # upper = High − max(Open, Close)
    df['lower_wick'] = df[['open', 'close']].min(axis=1) - df['low']    # lower = min(Open, Close) − Low
    df['ema_20']     = df['close'].ewm(span=20, adjust=False).mean()    # EMA_{t} ​= α⋅Close_{t} ​+ (1−α)EMA_{t} − 1​ , α = 2/(N+1)​
    df['price_ema_ratio'] = df['close'] / df['ema_20']                  # trend_strength = Close/EMA​
    df['volatility_20'  ] = df['return'].rolling(20).std()              # σ{t} ​= sqrt( Var(r_{t−n:t}​) )
    df['volume_norm'] = df['tick_volume'] / df['tick_volume'].rolling(20).mean()   	# vol_norm = V/MA(V)
    
	# Time of Day in a Cycle. ( Sin(time), Cos(time) )
    time_in_minutes = pd.to_datetime(df['time']).dt.hour*60 + pd.to_datetime(df['time']).dt.minute
    time_in_radians = 2 * np.pi * time_in_minutes / 1440
    df['time_sin_cos'] = list(zip(np.round(np.sin(time_in_radians), 3), np.round(np.cos(time_in_radians), 3)))

    return df


data = pd.read_csv("M5 (2024-09-01 -- 2025-12-01).csv")
data = core_features(data)

# k = 5
# df['target'] = (df['close'].shift(-k) > df['close']).astype(int)    # y_{t} ​= 1[Close_{t+k} ​> Close_{t}​]
# df



## Level Features

In [None]:
# -------------------------------------------------------------------------------------------
def level_features(df: pd.DataFrame, period: list[pd.Series], atr_window: int=14, digits: int=2, ema_span :int=12*12):
    """
      Perform three calculations on the DataFrame:
      1. Distance of the candle shadow from the boundaries of the previous period (day or week)
      2. Distance from rounded numbers
      3. EMA and price-to-EMA ratio (trend)
    """

    df = df.copy()

    df['dist_round']        = round_distance (df, digits=digits)
    df[f'trend_{ema_span}'] = add_trend      (df, ema_span=ema_span)
    for p in period: df[f'dist_shadow_prev_{p.name}'] = shadow_distance(df, period=p, atr_window=atr_window)

    return df

def round_distance(df: pd.DataFrame, digits: int=2):
    """
    Calculate the distance of the candle's high and low from the nearest rounded number.

    Parameters:
    ----------
    df : DataFrame containing the columns ['high','low']
    digits : Number of decimal places for rounding (default: 2)

    Output:
    -------
    DataFrame with the following columns:
      - dist_high : distance from high to its nearest rounded number
      - dist_low  : distance from low to its nearest rounded number
      - dist_round: minimum distance between high and low to a rounded number
    """

    df = df.copy()

    factor = 10**digits
    df['dist_high']  = (df['high'] - df['high'].round(digits)) * factor
    df['dist_low' ]  = (df['low' ] - df['low' ].round(digits)) * factor

    dist_round = np.minimum(df['dist_high'].abs(), df['dist_low'].abs())
    return dist_round

def shadow_distance(df: pd.DataFrame, period: list[pd.Series], atr_window: int=14):
    """
    Calculate the distance of the candle shadow (high/low) from the maximum and minimum of the previous period,
    normalized by ATR.

    Parameters:
    ----------
    df : DataFrame containing the columns ['time','open','high','low','close']
    period : 'day' or 'week' → determines whether the comparison reference is the previous day or the previous week
    atr_window : ATR window length (default: 14)

    Output:
    -------
    DataFrame with columns for the distances and the minimum shadow distance to the previous range
    """

    df = df.copy()

    # Average True Range Calculate
    high_low   = df['high'] - df['low']
    high_close = np.abs(df['high'] - df['close'].shift(1))
    low_close  = np.abs(df['low' ] - df['close'].shift(1))

    df['tr' ] = np.maximum(high_low, np.maximum(high_close, low_close)) # TR
    df['atr'] = df['tr'].rolling(atr_window).mean()                     # ATR

    # Maximum and minimum period
    agg = df.groupby(period).agg(ref_high=('high','max'), ref_low=('low','min'))
    df  = df.join(agg.shift(1), on=period)              # Add previous period

    # Distance of today's shadow from the boundaries of the previous period
    df['high_to_prev_high'] = (df['high'] - df['ref_high']).abs() / df['atr']
    df['high_to_prev_low' ] = (df['high'] - df['ref_low' ]).abs() / df['atr']
    df['low_to_prev_high' ] = (df['low' ] - df['ref_high']).abs() / df['atr']
    df['low_to_prev_low'  ] = (df['low' ] - df['ref_low' ]).abs() / df['atr']

    # Minimum shadow distance to previous range
    dist_shadow_prev = df[['high_to_prev_high', 'high_to_prev_low', 'low_to_prev_high', 'low_to_prev_low']].min(axis=1)
    return dist_shadow_prev

def add_trend(df: pd.DataFrame, ema_span: int=12*12):
    """
    Calculate EMA and the price-to-EMA ratio as a trend indicator.

    Parameters:
    ----------
    df : DataFrame containing the 'close' column
    ema_span : EMA window length (default: 12*12, approximately 1 hour for 5-minute data)
    col_close : Name of the closing price column (default: 'close')

    Output:
    -------
    DataFrame with the following columns:
      - ema_{ema_span}   : calculated EMA
      - trend_{ema_span} : ratio of close to EMA
    """

    df = df.copy()

    df[f'ema_{ema_span}'] = df['close'].ewm(span=ema_span).mean()
    trend_col = df['close'] / df[f'ema_{ema_span}']

    return trend_col



Datetime = pd.to_datetime(data['time']).dt
period = [Datetime.date, Datetime.isocalendar().year.astype(str)+'-'+Datetime.isocalendar().week.astype(str)]
data = level_features(data, period=period, atr_window=14)
data.to_csv('test.csv', index=False)