# MOEX Agent — Обучение в Yandex DataSphere

## Настройка:
1. Создайте проект в https://console.cloud.yandex.ru/datasphere
2. Выберите конфигурацию с GPU (g1.1 или выше)
3. Загрузите этот notebook
4. Запустите все ячейки

In [None]:
# Устанавливаем зависимости
%pip install pandas numpy scikit-learn joblib pyyaml requests

In [None]:
# Клонируем репозиторий
!git clone https://github.com/omnistrateg-ux/moex-agent.git
%cd moex-agent
!mkdir -p data models

In [None]:
# Импорты
import sys
sys.path.insert(0, '.')

import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import TimeSeriesSplit
import joblib
import json
from datetime import datetime

print("Импорты загружены!")

In [None]:
# Загружаем данные с MOEX
from moex_agent.config_schema import load_config
from moex_agent.storage import connect
from moex_agent.bootstrap import bootstrap_recent

config = load_config()
conn = connect(config.sqlite_path)

print("Загружаем данные с MOEX (30 дней)...")
bootstrap_recent(conn, config, days=30)

# Проверяем
cur = conn.execute("SELECT COUNT(*) as cnt FROM candles")
count = cur.fetchone()["cnt"]
print(f"Загружено: {count:,} свечей")

In [None]:
# Конфигурация
HORIZONS = ["5m", "10m", "30m", "1h"]
N_SPLITS = 5

MODEL_PARAMS = {
    "n_estimators": 100,
    "max_depth": 5,
    "learning_rate": 0.1,
    "min_samples_split": 50,
    "min_samples_leaf": 20,
    "random_state": 42,
}

In [None]:
# Функция обучения
from moex_agent.features import build_feature_frame, FEATURE_COLS
from moex_agent.labels import create_labels

def walk_forward_train(X, y, n_splits=5):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    results = []
    best_model = None
    best_score = 0
    
    for fold, (train_idx, test_idx) in enumerate(tscv.split(X)):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
        
        model = GradientBoostingClassifier(**MODEL_PARAMS)
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        y_prob = model.predict_proba(X_test)[:, 1]
        
        acc = (y_pred == y_test).mean()
        high_conf = y_prob > 0.55
        win_rate = ((y_pred == y_test) & high_conf).sum() / max(high_conf.sum(), 1)
        
        results.append({"accuracy": acc, "win_rate": win_rate, "trades": high_conf.sum()})
        
        if acc > best_score:
            best_score = acc
            best_model = model
        
        print(f"  Fold {fold+1}: acc={acc:.3f}, wr={win_rate:.3f}")
    
    return best_model, {
        "accuracy": np.mean([r["accuracy"] for r in results]),
        "win_rate": np.mean([r["win_rate"] for r in results]),
        "trades": int(np.mean([r["trades"] for r in results])),
    }

In [None]:
# Загружаем данные
df = pd.read_sql("SELECT * FROM candles ORDER BY ts", conn)
print(f"Всего свечей: {len(df):,}")

# Метаданные
meta = {
    "trained_at": datetime.now().isoformat(),
    "platform": "Yandex DataSphere",
    "horizons": {},
    "features_count": len(FEATURE_COLS),
    "candles_used": len(df),
}

In [None]:
# Обучаем модели
for horizon in HORIZONS:
    print(f"\n{'='*50}")
    print(f"Обучение: {horizon}")
    print(f"{'='*50}")
    
    all_features = []
    all_labels = []
    
    for ticker in df['secid'].unique():
        ticker_df = df[df['secid'] == ticker].copy()
        if len(ticker_df) < 1000:
            continue
        
        features_df = build_feature_frame(ticker_df.to_dict('records'))
        labels = create_labels(ticker_df, horizon)
        
        min_len = min(len(features_df), len(labels))
        all_features.append(features_df[FEATURE_COLS].iloc[:min_len])
        all_labels.extend(labels[:min_len])
    
    X = pd.concat(all_features, ignore_index=True)
    y = pd.Series(all_labels)
    
    mask = ~(X.isna().any(axis=1) | y.isna())
    X, y = X[mask].reset_index(drop=True), y[mask].reset_index(drop=True)
    
    print(f"Данных: {len(X):,}")
    
    model, metrics = walk_forward_train(X, y, N_SPLITS)
    
    joblib.dump(model, f"models/model_time_{horizon}.joblib")
    meta["horizons"][horizon] = metrics
    
    print(f"✅ {horizon}: WR={metrics['win_rate']:.1%}")

In [None]:
# Сохраняем метаданные
with open("models/meta.json", "w") as f:
    json.dump(meta, f, indent=2, ensure_ascii=False)

print("\n" + "="*50)
print("ГОТОВО!")
print("="*50)
print(json.dumps(meta, indent=2, ensure_ascii=False))

In [None]:
# Сохраняем в Yandex Object Storage
import boto3
import os

# Получаем ключи из секретов DataSphere
# Или укажите напрямую:
# AWS_ACCESS_KEY_ID = "your_key"
# AWS_SECRET_ACCESS_KEY = "your_secret"

s3 = boto3.client(
    's3',
    endpoint_url='https://storage.yandexcloud.net',
    aws_access_key_id=os.environ.get('AWS_ACCESS_KEY_ID'),
    aws_secret_access_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),
)

BUCKET = 'moex-agent-models'  # Создайте bucket заранее

# Загружаем модели
for horizon in HORIZONS:
    filename = f"model_time_{horizon}.joblib"
    s3.upload_file(f"models/{filename}", BUCKET, f"models/{filename}")
    print(f"Uploaded: {filename}")

s3.upload_file("models/meta.json", BUCKET, "models/meta.json")
print("\n✅ Модели загружены в Object Storage!")

## Скачивание моделей в Replit

В Replit Shell выполните:

```bash
pip install boto3
python -c "
import boto3
import os

s3 = boto3.client(
    's3',
    endpoint_url='https://storage.yandexcloud.net',
    aws_access_key_id=os.environ['YANDEX_S3_KEY'],
    aws_secret_access_key=os.environ['YANDEX_S3_SECRET'],
)

BUCKET = 'moex-agent-models'

for h in ['5m', '10m', '30m', '1h']:
    s3.download_file(BUCKET, f'models/model_time_{h}.joblib', f'models/model_time_{h}.joblib')
    print(f'Downloaded: model_time_{h}.joblib')

s3.download_file(BUCKET, 'models/meta.json', 'models/meta.json')
print('Done!')
"
```