### Select all symbols

Step 1: Load Data for All Available Symbols

In [1]:
import MetaTrader5 as mt5
import pandas as pd
import numpy as np
import ta
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint

# ✅ Initialize MT5 Connection
if not mt5.initialize():
    print("MT5 Initialization Failed")
    mt5.shutdown()
    quit()

# ✅ Retrieve All Symbols in MT5
symbols = [s.name for s in mt5.symbols_get()]
print(f"✅ Found {len(symbols)} symbols in MT5")

# ✅ Limit to Top 50 Symbols for Performance (adjust as needed)
symbols = symbols[:50]

# ✅ Function to Fetch Price Data from MT5
def get_mt5_data(symbol, n_bars=1000, timeframe=mt5.TIMEFRAME_D1):
    """Fetches historical data from MT5"""
    rates = mt5.copy_rates_from_pos(symbol, timeframe, 0, n_bars)
    if rates is None:
        print(f"⚠️ Could not retrieve data for {symbol}")
        return None
    df = pd.DataFrame(rates)
    df["time"] = pd.to_datetime(df["time"], unit="s")
    df.set_index("time", inplace=True)
    return df[["close"]]

# ✅ Load Data for All Symbols
data = {symbol: get_mt5_data(symbol) for symbol in symbols if get_mt5_data(symbol) is not None}

# ✅ Merge Data into One DataFrame
df = pd.concat(data.values(), axis=1, keys=data.keys()).dropna()

print(f"📊 Data Loaded for {len(df.columns)//2} Symbols")


✅ Found 2061 symbols in MT5
📊 Data Loaded for 25 Symbols


Step 2: Add Technical Features

In [2]:
# ✅ Add Features
def add_features(df):
    for sym in symbols:
        df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \
                                  ta.volatility.bollinger_lband(df[(sym, "close")], window=20)
        df[(sym, "momentum")] = ta.momentum.ROCIndicator(df[(sym, "close")], window=10).roc()
        df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]

    return df.dropna()

df = add_features(df)
print(f"✅ Feature Engineering Complete. Shape: {df.shape}")


✅ Feature Engineering Complete. Shape: (947, 200)


  df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]
  df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \
  df[(sym, "momentum")] = ta.momentum.ROCIndicator(df[(sym, "close")], window=10).roc()
  df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]
  df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \
  df[(sym, "momentum")] = ta.momentum.ROCIndicator(df[(sym, "close")], window=10).roc()
  df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]
  df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \
  df[(sym, "momentum")] = ta.momentum.ROCIndicator(df[(sym, "close")], window=10).roc()
  df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]
  df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \


Step 3: PCA for Pair Selection

In [3]:
from sklearn.preprocessing import StandardScaler

# ✅ Standardize the Data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.xs("close", axis=1, level=1))

# ✅ PCA: Find Principal Components
pca = PCA(n_components=3)  # Keep 3 Principal Components
pca_components = pca.fit_transform(scaled_data)

# ✅ Assign PCA Factor Weights to Each Symbol
factor_df = pd.DataFrame(pca.components_.T, index=symbols, columns=[f"PC{i+1}" for i in range(pca.n_components_)])
print(factor_df)

# ✅ Find Closest Pairs Based on PCA Factor Similarity
def find_pca_pairs(factor_df):
    pairs = []
    for i, sym1 in enumerate(factor_df.index):
        for j, sym2 in enumerate(factor_df.index):
            if i < j:
                distance = euclidean(factor_df.loc[sym1], factor_df.loc[sym2])
                pairs.append((sym1, sym2, distance))
    
    # ✅ Select Best Pairs with Smallest Distance
    sorted_pairs = sorted(pairs, key=lambda x: x[2])
    return sorted_pairs[:10]  # Return Top 10 Closest Pairs

selected_pairs = find_pca_pairs(factor_df)
print("🎯 Best PCA-Based Pairs:", selected_pairs)


             PC1       PC2       PC3
EURUSD -0.074120  0.270696  0.039489
GBPUSD -0.061762  0.274868 -0.131038
USDCHF -0.108607 -0.215683 -0.051013
USDJPY  0.173301 -0.089506 -0.055852
USDCAD  0.164355 -0.045407  0.066992
AUDUSD -0.162769  0.089750 -0.103257
AUDNZD  0.085779 -0.196785 -0.144085
AUDCAD -0.086638  0.126954 -0.108870
AUDCHF -0.174758 -0.053159 -0.103954
AUDJPY  0.155454 -0.083599 -0.149799
CHFJPY  0.182018 -0.008838 -0.035827
EURGBP -0.009282 -0.105880  0.444556
EURAUD  0.128375  0.172335  0.163352
EURJPY  0.178240  0.011701 -0.047627
EURCHF -0.166736  0.074977 -0.007875
EURNZD  0.165860  0.075182  0.087998
EURCAD  0.087572  0.246827  0.111828
GBPCHF -0.151606  0.108690 -0.173693
GBPJPY  0.175006  0.031103 -0.130606
CADCHF -0.166122 -0.101996 -0.081290
CADJPY  0.159921 -0.106765 -0.103625
GBPAUD  0.122411  0.200551 -0.034171
GBPCAD  0.081074  0.256699 -0.086531
GBPNZD  0.154990  0.109517 -0.096284
NZDCAD -0.109840  0.200551  0.019302
NZDCHF -0.180453  0.004945 -0.058713
N

Step 4: K-Means Clustering for Pair Selection

In [6]:
from sklearn.cluster import KMeans

# Use factor_df from PCA as input for clustering
kmeans = KMeans(n_clusters=3, random_state=42)
factor_df['cluster'] = kmeans.fit_predict(factor_df)

print("📌 Symbol Clusters:")
print(factor_df[['cluster']])

# Group symbols by cluster and form pairs within each cluster
def find_cluster_pairs_from_factor_df(factor_df):
    pairs = []
    for cluster in factor_df['cluster'].unique():
        symbols_in_cluster = factor_df[factor_df['cluster'] == cluster].index.tolist()
        for i, sym1 in enumerate(symbols_in_cluster):
            for j, sym2 in enumerate(symbols_in_cluster):
                if i < j:
                    pairs.append((sym1, sym2))
    return pairs

cluster_pairs = find_cluster_pairs_from_factor_df(factor_df)
print("🎯 Cluster-Based Pairs:", cluster_pairs)


📌 Symbol Clusters:
        cluster
EURUSD        1
GBPUSD        1
USDCHF        2
USDJPY        0
USDCAD        0
AUDUSD        1
AUDNZD        2
AUDCAD        1
AUDCHF        2
AUDJPY        0
CHFJPY        0
EURGBP        2
EURAUD        0
EURJPY        0
EURCHF        1
EURNZD        0
EURCAD        0
GBPCHF        1
GBPJPY        0
CADCHF        2
CADJPY        0
GBPAUD        0
GBPCAD        1
GBPNZD        0
NZDCAD        1
NZDCHF        1
NZDJPY        0
NZDUSD        1
USDSGD        2
AUDSGD        1
CHFSGD        0
EURDKK        0
EURHKD        1
EURNOK        0
EURPLN        2
EURSEK        0
EURSGD        1
EURTRY        0
EURZAR        0
GBPDKK        1
GBPNOK        0
GBPSEK        0
GBPSGD        1
GBPTRY        0
NOKJPY        0
NOKSEK        2
SEKJPY        0
SGDJPY        0
USDCNH        0
USDCZK        2
🎯 Cluster-Based Pairs: [('EURUSD', 'GBPUSD'), ('EURUSD', 'AUDUSD'), ('EURUSD', 'AUDCAD'), ('EURUSD', 'EURCHF'), ('EURUSD', 'GBPCHF'), ('EURUSD', 'GBPCAD'), ('EURUSD'

Step 5: Backtest the Pair Trading Strategy

In [7]:
import vectorbt as vbt

# Option 1: Use the PCA-selected pair
pair1, pair2 = selected_pairs[0][:2]

# Option 2: Alternatively, use a cluster-based pair
# pair1, pair2 = cluster_pairs[0]

print(f"Testing pair: {pair1} vs {pair2}")

df1, df2 = data[pair1], data[pair2]

# Compute Spread & Z-Score
spread = df1["close"] - df2["close"]
z_score = (spread - spread.rolling(60).mean()) / spread.rolling(60).std()

# Define Entry & Exit Signals
entries = z_score < -1.5       # Buy pair1, Sell pair2
short_entries = z_score > 1.5    # Sell pair1, Buy pair2
exits = abs(z_score) < 0.5       # Exit when mean reversion occurs

# Backtest using vectorbt
portfolio = vbt.Portfolio.from_signals(
    close=df1["close"],
    entries=entries,
    exits=exits,
    short_entries=short_entries,
    short_exits=exits,
    size=1,  
    size_type="percent",
    init_cash=10000,
    fees=0.0002,
    freq="1H"
)

print(portfolio.stats())
portfolio.plot().show()


Testing pair: EURUSD vs EURHKD
Start                         2021-04-29 00:00:00
End                           2025-03-05 00:00:00
Period                           41 days 16:00:00
Start Value                               10000.0
End Value                             9726.830626
Total Return [%]                        -2.731694
Benchmark Return [%]                   -12.028977
Max Gross Exposure [%]                      100.0
Total Fees Paid                         95.663717
Max Drawdown [%]                        11.550894
Max Drawdown Duration            30 days 09:00:00
Total Trades                                   25
Total Closed Trades                            24
Total Open Trades                               1
Open Trade PnL                          31.724849
Win Rate [%]                                 37.5
Best Trade [%]                           4.555476
Worst Trade [%]                         -1.846803
Avg Winning Trade [%]                    1.590328
Avg Losing Trade [%


'H' is deprecated and will be removed in a future version. Please use 'h' instead of 'H'.



Step 6: Live Trading with MT5

In [None]:
def live_pair_trading(pair1, pair2):
    """Executes pair trading strategy on MT5."""
    df_signals = generate_pair_signals(pair1, pair2)

    latest = df_signals.iloc[-1]

    if latest["long_signal"]:
        place_order(pair1, LOT_SIZE, is_buy=True, magic=MAGIC_NUMBER)
        place_order(pair2, LOT_SIZE, is_buy=False, magic=MAGIC_NUMBER)
    elif latest["short_signal"]:
        place_order(pair1, LOT_SIZE, is_buy=False, magic=MAGIC_NUMBER)
        place_order(pair2, LOT_SIZE, is_buy=True, magic=MAGIC_NUMBER)
    elif latest["exit_signal"]:
        close_positions(pair1, MAGIC_NUMBER)
        close_positions(pair2, MAGIC_NUMBER)

# ✅ Run Live Trading
while True:
    live_pair_trading(pair1, pair2)
    time.sleep(3600)  # Run every hour


### Select specific symbols

Step 1: Load & Feature Engineer Data

In [1]:
import MetaTrader5 as mt5
import pandas as pd
import numpy as np
import ta
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from scipy.spatial.distance import euclidean
from statsmodels.tsa.stattools import coint

# ✅ Initialize MT5 Connection
if not mt5.initialize():
    print("MT5 Initialization Failed")
    mt5.shutdown()

# ✅ Fetch Data for Multiple Symbols
symbols = ["AUDUSD", "NZDUSD", "EURUSD", "GBPUSD", "USDJPY", "USDCAD"]  # Can be expanded
n_bars = 2000  # Historical lookback

def get_mt5_data(symbol, n_bars, timeframe=mt5.TIMEFRAME_H1):
    """Fetches historical data from MT5"""
    rates = mt5.copy_rates_from_pos(symbol, timeframe, 0, n_bars)
    if rates is None:
        raise ValueError(f"Could not retrieve data for {symbol}")
    df = pd.DataFrame(rates)
    df["time"] = pd.to_datetime(df["time"], unit="s")
    df.set_index("time", inplace=True)
    return df[["close"]]

# ✅ Load Data for All Symbols
data = {symbol: get_mt5_data(symbol, n_bars) for symbol in symbols}

# ✅ Merge Data into One DataFrame
df = pd.concat(data.values(), axis=1, keys=data.keys())

# ✅ Add Technical Features (Volatility, Momentum, etc.)
def add_features(df):
    for sym in symbols:
        df[(sym, "volatility")] = ta.volatility.bollinger_hband(df[(sym, "close")], window=20) - \
                                  ta.volatility.bollinger_lband(df[(sym, "close")], window=20)
        df[(sym, "momentum")] = ta.momentum.ROCIndicator(df[(sym, "close")], window=10).roc()
        df[(sym, "mean_reversion")] = df[(sym, "close")].rolling(50).mean() - df[(sym, "close")]

    return df.dropna()

df = add_features(df)

print(df.head())  # Verify Features


                      AUDUSD   NZDUSD   EURUSD   GBPUSD   USDJPY   USDCAD  \
                       close    close    close    close    close    close   
time                                                                        
2024-11-07 19:00:00  0.66772  0.60247  1.08048  1.29899  153.016  1.38504   
2024-11-07 20:00:00  0.66700  0.60199  1.07981  1.29726  152.963  1.38617   
2024-11-07 21:00:00  0.66675  0.60170  1.07806  1.29645  153.085  1.38616   
2024-11-07 22:00:00  0.66773  0.60238  1.08025  1.29827  152.853  1.38613   
2024-11-07 23:00:00  0.66802  0.60252  1.08031  1.29871  152.899  1.38594   

                        AUDUSD                              NZDUSD  ...  \
                    volatility  momentum mean_reversion volatility  ...   
time                                                                ...   
2024-11-07 19:00:00   0.014067  0.819883      -0.007538   0.011677  ...   
2024-11-07 20:00:00   0.013384  0.521446      -0.006740   0.011059  ...   
2024-11-

Step 2: PCA for Pair Selection

In [None]:
from sklearn.preprocessing import StandardScaler

# ✅ Standardize the Data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df.xs("close", axis=1, level=1))

# ✅ PCA: Find Principal Components
pca = PCA(n_components=3)  # Keep 3 Principal Components
pca_components = pca.fit_transform(scaled_data)

# ✅ Assign PCA Factor Weights to Each Symbol
factor_df = pd.DataFrame(pca.components_.T, index=symbols, columns=[f"PC{i+1}" for i in range(pca.n_components_)])
print(factor_df)

# ✅ Find Closest Pairs Based on PCA Factor Similarity
def find_pca_pairs(factor_df):
    pairs = []
    for i, sym1 in enumerate(factor_df.index):
        for j, sym2 in enumerate(factor_df.index):
            if i < j:
                distance = euclidean(factor_df.loc[sym1], factor_df.loc[sym2])
                pairs.append((sym1, sym2, distance))
    
    # ✅ Select Best Pairs with Smallest Distance
    sorted_pairs = sorted(pairs, key=lambda x: x[2])
    return sorted_pairs[:10]  # Return Top 10 Closest Pairs

selected_pairs = find_pca_pairs(factor_df)
print("🎯 Best PCA-Based Pairs:", selected_pairs)


 Step 3: K-Means Clustering for Pair Selection

In [None]:
from sklearn.cluster import KMeans

# ✅ K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df["cluster"] = kmeans.fit_predict(scaled_data)

# ✅ Group Stocks by Clusters
clusters = df.groupby("cluster").apply(lambda x: x.index.tolist())
print("📌 Identified Clusters:", clusters)

# ✅ Select Best Pairs within Clusters
def find_cluster_pairs(clusters):
    best_pairs = []
    for cluster in clusters:
        for i, sym1 in enumerate(cluster):
            for j, sym2 in enumerate(cluster):
                if i < j:
                    best_pairs.append((sym1, sym2))
    return best_pairs

cluster_pairs = find_cluster_pairs(clusters)
print("🎯 Cluster-Based Pairs:", cluster_pairs)


Step 4: Backtest the Pair Trading Strategy

In [None]:
import vectorbt as vbt

# ✅ Select Pair for Backtest
pair1, pair2 = selected_pairs[0][:2]
df1, df2 = data[pair1], data[pair2]

# ✅ Compute Spread & Z-Score
spread = df1["close"] - df2["close"]
z_score = (spread - spread.rolling(60).mean()) / spread.rolling(60).std()

# ✅ Define Entry & Exit Signals
entries = z_score < -1.5  # Buy Pair1, Sell Pair2
exits = abs(z_score) < 0.5  # Exit when mean reversion happens
short_entries = z_score > 1.5  # Sell Pair1, Buy Pair2

# ✅ Backtest with vectorbt
portfolio = vbt.Portfolio.from_signals(
    close=df1["close"],
    entries=entries,
    exits=exits,
    short_entries=short_entries,
    short_exits=exits,
    size=1,  
    size_type="percent",
    init_cash=10000,
    fees=0.0002,
    freq="1H"
)

# ✅ Results
print(portfolio.stats())
portfolio.plot().show()


Step 5: Live Trading with MT5

In [None]:
def live_pair_trading(pair1, pair2):
    """Executes pair trading strategy on MT5."""
    df_signals = generate_pair_signals(pair1, pair2)

    latest = df_signals.iloc[-1]

    if latest["long_signal"]:
        place_order(pair1, LOT_SIZE, is_buy=True, magic=MAGIC_NUMBER)
        place_order(pair2, LOT_SIZE, is_buy=False, magic=MAGIC_NUMBER)
    elif latest["short_signal"]:
        place_order(pair1, LOT_SIZE, is_buy=False, magic=MAGIC_NUMBER)
        place_order(pair2, LOT_SIZE, is_buy=True, magic=MAGIC_NUMBER)
    elif latest["exit_signal"]:
        close_positions(pair1, MAGIC_NUMBER)
        close_positions(pair2, MAGIC_NUMBER)

# ✅ Run Live Trading
while True:
    live_pair_trading(pair1, pair2)
    time.sleep(3600)  # Run every hour
