In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error

In [14]:
df = pd.read_csv(r'C:\Users\BaranokVA\Desktop\XPT_USD Historical Data.csv')
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df = df.sort_values("Date").reset_index(drop=True)

In [15]:
for col in ["Price", "Open", "High", "Low"]:
    df[col] = pd.to_numeric(df[col].astype(str).str.replace(",", "", regex=False), errors="coerce")

if "Change %" in df.columns:
    df["Change %"] = pd.to_numeric(df["Change %"].astype(str).str.replace("%", "", regex=False), errors="coerce")

# Vol. обычно пустой/текстовый — не нужен
if "Vol." in df.columns:
    df = df.drop(columns=["Vol."])

df = df.dropna(subset=["Date", "Open", "High", "Low", "Price"]).reset_index(drop=True)

# Target for trading: drawdown
df["drawdown"] = df["Open"] - df["Low"]

# ===== 2) Features (no leakage) =====
def make_features(d):
    x = d.copy()
    x["dow"] = x["Date"].dt.dayofweek
    x["month"] = x["Date"].dt.month

    x["range"] = x["High"] - x["Low"]

    # ВАЖНО: min_periods, чтобы не получить колонку "вся NaN" на короткой истории
    x["atr_14"] = x["range"].rolling(14, min_periods=14).mean().shift(1)

    for lag in [1, 2, 3, 5, 7]:
        x[f"dd_lag_{lag}"] = x["drawdown"].shift(lag)
        x[f"low_lag_{lag}"] = x["Low"].shift(lag)
        x[f"open_lag_{lag}"] = x["Open"].shift(lag)
        x[f"range_lag_{lag}"] = x["range"].shift(lag)

    return x

In [16]:
feat = make_features(df)

In [17]:
all_nan_cols = feat.columns[feat.isna().all()].tolist()
if all_nan_cols:
    print("Удаляю колонки, где все NaN:", all_nan_cols)
    feat = feat.drop(columns=all_nan_cols)

In [18]:
feat = feat.dropna().reset_index(drop=True)

In [19]:
print("Строк после фичей и dropna:", len(feat))

Строк после фичей и dropna: 52


In [20]:
y = feat["drawdown"]
X = feat.drop(columns=["Date", "Low", "High", "Price", "drawdown"], errors="ignore")

In [22]:
n_splits = min(5, max(2, len(X) // 10))
tscv = TimeSeriesSplit(n_splits=n_splits)

In [23]:
model = XGBRegressor(
    n_estimators=300,
    max_depth=3,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    random_state=42,
    tree_method="hist"
)

In [24]:
mae = []
for tr, te in tscv.split(X):
    model.fit(X.iloc[tr], y.iloc[tr])
    pred = model.predict(X.iloc[te])
    mae.append(mean_absolute_error(y.iloc[te], pred))

print(f"CV MAE drawdown (n_splits={n_splits}):", round(float(np.mean(mae)), 2))

CV MAE drawdown (n_splits=5): 40.45


In [25]:
model.fit(X, y)

pred_dd = float(model.predict(X.iloc[[-1]])[0])
today_open = float(feat["Open"].iloc[-1])
pred_low = today_open - pred_dd

print("Дата (последняя в файле):", feat["Date"].iloc[-1].date())
print("Прогноз drawdown:", round(pred_dd, 2))
print("Прогноз Low:", round(pred_low, 2))

Дата (последняя в файле): 2026-01-29
Прогноз drawdown: 39.72
Прогноз Low: 2668.41
