In [None]:
### Generate dataset from MT5

In [29]:
import MetaTrader5 as mt5
import pandas as pd
from datetime import datetime

# Connect to MetaTrader 5
if not mt5.initialize():
    print("MT5 Initialization failed:", mt5.last_error())
    quit()

# Set symbol
symbol = "XAUUSD"

# Ensure symbol is available
if not mt5.symbol_select(symbol, True):
    print(f"Failed to select {symbol}")
    mt5.shutdown()
    quit()

# Get last 1000 1-minute candles
rates = mt5.copy_rates_from_pos(symbol, mt5.TIMEFRAME_M1, 0, 1000)

# Convert to DataFrame
df = pd.DataFrame(rates)
df['time'] = pd.to_datetime(df['time'], unit='s')

# Show last few rows
print(df.tail())

# Save for training
df.to_csv("xauusd_data.csv", index=False)

# Disconnect
mt5.shutdown()


                   time     open     high      low    close  tick_volume  \
995 2025-07-01 12:22:00  3354.86  3354.90  3353.14  3353.27          226   
996 2025-07-01 12:23:00  3353.24  3354.08  3353.24  3354.04          211   
997 2025-07-01 12:24:00  3354.04  3354.76  3353.42  3353.71          214   
998 2025-07-01 12:25:00  3353.64  3354.50  3352.79  3354.25          247   
999 2025-07-01 12:26:00  3354.12  3354.41  3353.50  3353.71          131   

     spread  real_volume  
995      12            0  
996      12            0  
997      12            0  
998      12            0  
999      12            0  


True

In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb
import joblib
import os

In [19]:
# === Step 1: Load and explore the dataset
df = pd.read_csv("xauusd_data.csv")

In [21]:
df.head()

Unnamed: 0,time,open,high,low,close,tick_volume,spread,real_volume
0,2025-06-30 18:15:00,3297.48,3298.29,3297.36,3298.19,105,12,0
1,2025-06-30 18:16:00,3298.19,3298.26,3297.5,3297.52,114,12,0
2,2025-06-30 18:17:00,3297.52,3297.95,3297.14,3297.69,123,12,0
3,2025-06-30 18:18:00,3297.7,3299.14,3297.62,3298.53,145,12,0
4,2025-06-30 18:19:00,3298.53,3298.71,3298.12,3298.19,89,12,0


In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   time         1000 non-null   object 
 1   open         1000 non-null   float64
 2   high         1000 non-null   float64
 3   low          1000 non-null   float64
 4   close        1000 non-null   float64
 5   tick_volume  1000 non-null   int64  
 6   spread       1000 non-null   int64  
 7   real_volume  1000 non-null   int64  
dtypes: float64(4), int64(3), object(1)
memory usage: 62.6+ KB


In [None]:
# === Step 1: Load and clean the dataset ===
df = pd.read_csv("xauusd_data.csv")
df['time'] = pd.to_datetime(df['time'])

In [23]:
# Focus on 'close' column
close_data = df['close'].values.reshape(-1, 1)

In [24]:
# === Step 2: Normalize the data ===
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(close_data)

In [25]:
# === Step 3: Create 60-step sequences for XGBoost ===
def create_sequences(data, window=60):
    X, y = [], []
    for i in range(window, len(data)):
        X.append(data[i-window:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, window=60)

In [26]:
# === Step 4: Train-test split ===
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [27]:
# === Step 5: Train XGBoost model ===
xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5)
xgb_model.fit(X_train, y_train)

0,1,2
,objective,'reg:squarederror'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [28]:
# === Step 6: Save model + scaler ===
os.makedirs("models", exist_ok=True)
xgb_model.save_model("models/xgboost_model.json")
joblib.dump(scaler, "models/scaler.pkl")

print("✅ XGBoost model and scaler saved in /models")

✅ XGBoost model and scaler saved in /models
