# MOEX Agent — Обучение моделей в Google Colab

Этот notebook обучает ML модели для торговой системы MOEX Agent.

**Инструкция:**
1. Откройте в Google Colab: File → Open notebook → GitHub → вставьте URL
2. Включите GPU: Runtime → Change runtime type → GPU
3. Запустите все ячейки по порядку

In [None]:
# Клонируем репозиторий
!git clone https://github.com/omnistrateg-ux/moex-agent.git
%cd moex-agent

In [None]:
# Устанавливаем зависимости
!pip install -q pandas numpy scikit-learn joblib pyyaml requests

In [None]:
# Создаём папки
!mkdir -p data models

In [None]:
# Загружаем данные с MOEX (последние 30 дней)
import sys
sys.path.insert(0, '.')

from moex_agent.config_schema import load_config
from moex_agent.storage import connect
from moex_agent.bootstrap import bootstrap_recent

config = load_config()
conn = connect(config.sqlite_path)

print("Загружаем данные с MOEX...")
bootstrap_recent(conn, config, days=30)
print("Готово!")

In [None]:
# Проверяем количество данных
cur = conn.execute("SELECT COUNT(*) as cnt FROM candles")
count = cur.fetchone()["cnt"]
print(f"Загружено свечей: {count:,}")

In [None]:
# Импортируем модули для обучения
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score
import joblib
import json
from datetime import datetime

from moex_agent.features import build_feature_frame, FEATURE_COLS
from moex_agent.labels import create_labels

In [None]:
# Конфигурация обучения
HORIZONS = ["5m", "10m", "30m", "1h"]
N_SPLITS = 5  # Walk-Forward splits

MODEL_PARAMS = {
    "n_estimators": 100,
    "max_depth": 5,
    "learning_rate": 0.1,
    "min_samples_split": 50,
    "min_samples_leaf": 20,
    "random_state": 42,
}

In [None]:
# Функция Walk-Forward обучения
def walk_forward_train(X, y, n_splits=5):
    """Walk-Forward валидация без утечки данных."""
    tscv = TimeSeriesSplit(n_splits=n_splits)
    
    results = []
    best_model = None
    best_score = 0
    
    for fold, (train_idx, test_idx) in enumerate(tscv.split(X)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        
        model = GradientBoostingClassifier(**MODEL_PARAMS)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]
        
        # Метрики
        acc = accuracy_score(y_test, y_pred)
        
        # Win Rate и Profit Factor (симуляция)
        high_conf = y_prob > 0.55
        if high_conf.sum() > 0:
            wins = ((y_pred == y_test) & high_conf).sum()
            total = high_conf.sum()
            win_rate = wins / total
        else:
            win_rate = 0.5
        
        results.append({
            "fold": fold,
            "accuracy": acc,
            "win_rate": win_rate,
            "trades": high_conf.sum(),
        })
        
        if acc > best_score:
            best_score = acc
            best_model = model
        
        print(f"  Fold {fold+1}: accuracy={acc:.3f}, win_rate={win_rate:.3f}, trades={high_conf.sum()}")
    
    # Средние метрики
    avg_metrics = {
        "accuracy": np.mean([r["accuracy"] for r in results]),
        "win_rate": np.mean([r["win_rate"] for r in results]),
        "trades": int(np.mean([r["trades"] for r in results])),
    }
    
    return best_model, avg_metrics

In [None]:
# Загружаем все данные
print("Загружаем данные из БД...")
df = pd.read_sql("SELECT * FROM candles ORDER BY ts", conn)
print(f"Загружено: {len(df):,} свечей")

In [None]:
# Обучаем модели для каждого горизонта
meta = {
    "trained_at": datetime.now().isoformat(),
    "horizons": {},
    "features_count": len(FEATURE_COLS),
    "candles_used": len(df),
}

for horizon in HORIZONS:
    print(f"\n{'='*50}")
    print(f"Обучение модели: {horizon}")
    print(f"{'='*50}")
    
    # Строим фичи для каждого тикера
    all_features = []
    all_labels = []
    
    for ticker in df['secid'].unique():
        ticker_df = df[df['secid'] == ticker].copy()
        if len(ticker_df) < 1000:
            continue
        
        # Фичи
        features_df = build_feature_frame(ticker_df.to_dict('records'))
        
        # Метки
        labels = create_labels(ticker_df, horizon)
        
        # Выравниваем
        min_len = min(len(features_df), len(labels))
        features_df = features_df.iloc[:min_len]
        labels = labels[:min_len]
        
        all_features.append(features_df[FEATURE_COLS])
        all_labels.extend(labels)
    
    # Объединяем
    X = pd.concat(all_features, ignore_index=True)
    y = pd.Series(all_labels)
    
    # Убираем NaN
    mask = ~(X.isna().any(axis=1) | y.isna())
    X = X[mask].reset_index(drop=True)
    y = y[mask].reset_index(drop=True)
    
    print(f"Данных для обучения: {len(X):,}")
    
    # Обучение
    model, metrics = walk_forward_train(X, y, n_splits=N_SPLITS)
    
    # Сохранение
    model_path = f"models/model_time_{horizon}.joblib"
    joblib.dump(model, model_path)
    print(f"\nМодель сохранена: {model_path}")
    
    # Метаданные
    meta["horizons"][horizon] = {
        "win_rate": round(metrics["win_rate"], 3),
        "accuracy": round(metrics["accuracy"], 3),
        "trades": metrics["trades"],
    }
    
    print(f"\nРезультат {horizon}: WR={metrics['win_rate']:.1%}, Acc={metrics['accuracy']:.1%}")

In [None]:
# Сохраняем метаданные
with open("models/meta.json", "w") as f:
    json.dump(meta, f, indent=2)

print("\n" + "="*50)
print("ОБУЧЕНИЕ ЗАВЕРШЕНО!")
print("="*50)
print(json.dumps(meta, indent=2))

In [None]:
# Скачиваем модели
from google.colab import files

!zip -r models.zip models/
files.download('models.zip')

print("\nСкачайте models.zip и загрузите в Replit!")

## Готово!

1. Скачайте `models.zip`
2. Загрузите в Replit → папка `models/`
3. Перезапустите приложение