# 🧠 Deep Model Tuning for Bitcoin Birth DATE

Проверка гипотезы: "Увеличение сложности модели поможет переварить транзиты к натальной карте".

Дата: **2009-10-10** (Economic Birth / First Rate)
Признаки: Транзиты к наталу + Аспекты транзитов + Фазы (БЕЗ домов)

In [2]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
from itertools import product
from tqdm import tqdm
from datetime import datetime, date, timezone
from sklearn.metrics import classification_report, matthews_corrcoef

PROJECT_ROOT = Path("/home/rut/ostrofun")
sys.path.insert(0, str(PROJECT_ROOT))

from RESEARCH.config import cfg
from RESEARCH.data_loader import load_market_data
from RESEARCH.labeling import create_balanced_labels
from RESEARCH.astro_engine import (
    init_ephemeris,
    calculate_bodies_for_dates_multi,
    calculate_aspects_for_dates,
    calculate_transits_for_dates,
    calculate_phases_for_dates,
    get_natal_bodies,
)
from RESEARCH.features import build_full_features, merge_features_with_labels
from RESEARCH.model_training import split_dataset, prepare_xy, train_xgb_model, tune_threshold, predict_with_threshold, check_cuda_available

In [3]:
# Config
TARGET_DATE = date(2009, 10, 10)
print(f"🧠 Tuning for Birth Date: {TARGET_DATE}")

ASTRO_CONFIG = {
    "coord_mode": "both",
    "orb_mult": 0.1,
    "gauss_window": 200,
    "gauss_std": 70.0,
    "exclude_bodies": None,
}

# Deep Grid Search Space
PARAM_GRID = {
    "n_estimators": [ 500, 900,1300],
    "max_depth": [ 6, 8, 10],  # Пробуем глубокие деревья
    "learning_rate": [0.05, 0.03],
    "colsample_bytree": [0.6, 0.8], 
    "subsample": [0.8],
}

🧠 Tuning for Birth Date: 2009-10-10


In [4]:
# 1. Prepare Data
print("Loading data...")
df_market = load_market_data()
df_market = df_market[df_market["date"] >= "2017-11-01"].reset_index(drop=True)
df_labels = create_balanced_labels(df_market, ASTRO_CONFIG["gauss_window"], ASTRO_CONFIG["gauss_std"])
settings = init_ephemeris()
_, device = check_cuda_available()

print("Calculating astro...")
df_bodies, geo_by_date, helio_by_date = calculate_bodies_for_dates_multi(
    df_market["date"], settings, coord_mode="both"
)
bodies_by_date = geo_by_date
df_phases = calculate_phases_for_dates(bodies_by_date)

# 2. Build Natal Features
print(f"Building natal features for {TARGET_DATE}...")
natal_dt_str = f"{TARGET_DATE.isoformat()}T12:00:00"
natal_bodies = get_natal_bodies(natal_dt_str, settings)

df_transits = calculate_transits_for_dates(
    bodies_by_date, natal_bodies, settings, 
    orb_mult=ASTRO_CONFIG["orb_mult"]
)

# Аспекты между транзитами (Baseline features)
df_aspects = calculate_aspects_for_dates(
    bodies_by_date, settings, 
    orb_mult=ASTRO_CONFIG["orb_mult"]
)

# 3. Full Dataset
print("Merging dataset...")
df_features = build_full_features(
    df_bodies, df_aspects, df_transits=df_transits, df_phases=df_phases, 
    include_pair_aspects=True,    # Включаем baseline аспекты
    include_transit_aspects=True  # Включаем натальные транзиты
)
df_dataset = merge_features_with_labels(df_features, df_labels)

print(f"Dataset Shape: {df_dataset.shape}")
print(f"Columns: {len(df_dataset.columns)}")

  df = pd.read_sql_query(query, conn, params=params)


Loading data...
Loaded 5677 rows from DB for subject=btc
Date range: 2010-07-18 -> 2026-01-31
Labels created: 2814 samples
  UP: 1368 (48.6%)
  DOWN: 1446 (51.4%)
  Date range: 2017-11-01 -> 2025-07-15
Calculating astro...
📍 Расчёт ГЕОЦЕНТРИЧЕСКИХ координат (Земля в центре)...


Calculating bodies: 100%|██████████| 3014/3014 [00:00<00:00, 15439.69it/s]


☀️ Расчёт ГЕЛИОЦЕНТРИЧЕСКИХ координат (Солнце в центре)...


Calculating bodies: 100%|██████████| 3014/3014 [00:00<00:00, 20050.46it/s]


✅ Объединено: 78364 записей из 2 систем координат


Calculating phases & elongations: 100%|██████████| 3014/3014 [00:00<00:00, 162424.13it/s]


✅ Рассчитано 3014 дней: фаза Луны + элонгации планет
Building natal features for 2009-10-10...


Calculating transits (orb=0.1): 100%|██████████| 3014/3014 [00:00<00:00, 22056.49it/s]
Calculating aspects (orb=0.1): 100%|██████████| 3014/3014 [00:00<00:00, 55126.60it/s]


Merging dataset...
Merged dataset: 3014 samples (ALL days, forward-filled)
Features: 2040
Dataset Shape: (3014, 2042)
Columns: 2042


In [5]:
# 4. Grid Search
print("🚀 Starting Deep Grid Search...")

train_df, val_df, test_df = split_dataset(df_dataset)
feature_cols = [c for c in df_dataset.columns if c not in ["date", "target"]]
X_train, y_train = prepare_xy(train_df, feature_cols)
X_val, y_val = prepare_xy(val_df, feature_cols)
X_test, y_test = prepare_xy(test_df, feature_cols)

results = []
keys = PARAM_GRID.keys()
combinations = list(product(*PARAM_GRID.values()))

for vals in tqdm(combinations, desc="Grid Search"):
    params = dict(zip(keys, vals))
    
    # Train
    model = train_xgb_model(
        X_train, y_train, X_val, y_val, feature_cols, 
        n_classes=2, device=device, early_stopping_rounds=50, verbose=False,
        **params
    )
    
    # Evaluate
    best_t, _ = tune_threshold(model, X_val, y_val, metric="recall_min")
    y_test_pred = predict_with_threshold(model, X_test, threshold=best_t)
    
    report = classification_report(y_test, y_test_pred, output_dict=True, zero_division=0)
    r_min = min(report["0"]["recall"], report["1"]["recall"])
    mcc = matthews_corrcoef(y_test, y_test_pred)
    
    res_row = params.copy()
    res_row["R_MIN"] = r_min
    res_row["MCC"] = mcc
    results.append(res_row)

🚀 Starting Deep Grid Search...
Split: Train=2109, Val=452, Test=453


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  return func(**kwargs)
Grid Search:   3%|▎         | 1/36 [00:01<01:07,  1.93s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:   6%|▌         | 2/36 [00:03<01:00,  1.78s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4746, gap=0.1291


Grid Search:   8%|▊         | 3/36 [00:06<01:21,  2.48s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:  11%|█         | 4/36 [00:09<01:15,  2.34s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5382, gap=0.0437


Grid Search:  14%|█▍        | 5/36 [00:10<01:01,  2.00s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  17%|█▋        | 6/36 [00:12<00:58,  1.94s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  19%|█▉        | 7/36 [00:13<00:53,  1.84s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  22%|██▏       | 8/36 [00:15<00:47,  1.71s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5706, gap=0.0876


Grid Search:  25%|██▌       | 9/36 [00:16<00:45,  1.67s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  28%|██▊       | 10/36 [00:18<00:45,  1.76s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  31%|███       | 11/36 [00:20<00:42,  1.72s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  33%|███▎      | 12/36 [00:22<00:41,  1.72s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.4400, gap=0.2041


Grid Search:  36%|███▌      | 13/36 [00:23<00:39,  1.73s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:  39%|███▉      | 14/36 [00:25<00:37,  1.70s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4746, gap=0.1291


Grid Search:  42%|████▏     | 15/36 [00:27<00:35,  1.71s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:  44%|████▍     | 16/36 [00:29<00:36,  1.81s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5382, gap=0.0437


Grid Search:  47%|████▋     | 17/36 [00:31<00:33,  1.77s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  50%|█████     | 18/36 [00:32<00:30,  1.68s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  53%|█████▎    | 19/36 [00:34<00:28,  1.66s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  56%|█████▌    | 20/36 [00:36<00:27,  1.74s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5706, gap=0.0876


Grid Search:  58%|█████▊    | 21/36 [00:39<00:32,  2.14s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  61%|██████    | 22/36 [00:40<00:27,  1.97s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  64%|██████▍   | 23/36 [00:42<00:24,  1.88s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  67%|██████▋   | 24/36 [00:44<00:22,  1.88s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.4400, gap=0.2041


Grid Search:  69%|██████▉   | 25/36 [00:46<00:20,  1.87s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:  72%|███████▏  | 26/36 [00:47<00:18,  1.85s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4746, gap=0.1291


Grid Search:  75%|███████▌  | 27/36 [00:49<00:14,  1.64s/it]

🎯 Best threshold=0.50, RECALL_MIN=0.4109, gap=0.1823


Grid Search:  78%|███████▊  | 28/36 [00:50<00:13,  1.67s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5382, gap=0.0437


Grid Search:  81%|████████  | 29/36 [00:52<00:11,  1.61s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  83%|████████▎ | 30/36 [00:53<00:09,  1.58s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  86%|████████▌ | 31/36 [00:55<00:07,  1.52s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  89%|████████▉ | 32/36 [00:57<00:06,  1.61s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5706, gap=0.0876


Grid Search:  92%|█████████▏| 33/36 [00:58<00:05,  1.68s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.5455, gap=0.0817


Grid Search:  94%|█████████▍| 34/36 [01:00<00:03,  1.70s/it]

🎯 Best threshold=0.47, RECALL_MIN=0.4218, gap=0.2222


Grid Search:  97%|█████████▋| 35/36 [01:02<00:01,  1.71s/it]

🎯 Best threshold=0.49, RECALL_MIN=0.5455, gap=0.0817


Grid Search: 100%|██████████| 36/36 [01:04<00:00,  1.79s/it]

🎯 Best threshold=0.48, RECALL_MIN=0.4400, gap=0.2041





In [6]:
# 5. Analysis
df_res = pd.DataFrame(results).sort_values("R_MIN", ascending=False)
print("\n🏆 TOP 10 MODELS:")
print(df_res.head(10))

best = df_res.iloc[0]
print(f"\n🥇 WINNER PARAMS:")
print(best.to_dict())

baseline_rmin = 0.587
if best["R_MIN"] > baseline_rmin:
    print(f"\n🚀 SUCCESS! Deep model beat baseline! ({best['R_MIN']:.3f} > {baseline_rmin})")
else:
    print(f"\n💀 FAILURE. Still can't beat baseline. ({best['R_MIN']:.3f} <= {baseline_rmin})")
    print("Hypothesis: Natal features are just noise.")


🏆 TOP 10 MODELS:
    n_estimators  max_depth  learning_rate  colsample_bytree  subsample  \
2            500          6           0.03               0.6        0.8   
14           900          6           0.03               0.6        0.8   
26          1300          6           0.03               0.6        0.8   
0            500          6           0.05               0.6        0.8   
24          1300          6           0.05               0.6        0.8   
12           900          6           0.05               0.6        0.8   
34          1300         10           0.03               0.6        0.8   
22           900         10           0.03               0.6        0.8   
10           500         10           0.03               0.6        0.8   
30          1300          8           0.03               0.6        0.8   

       R_MIN       MCC  
2   0.602941  0.315097  
14  0.602941  0.315097  
26  0.602941  0.315097  
0   0.597059  0.309950  
24  0.597059  0.309950  
12  0.

In [8]:
# 6. Export Best Model for Production Service
import joblib

# Retrain best model to save it
best_params = best.to_dict()
print(f"🔄 Retraining best model with params: {best_params}")

# Get only XGBoost params (filter out metrics)
xgb_params = {k: v for k, v in best_params.items() if k not in ["R_MIN", "MCC"]}

# Train final model with best params
final_model = train_xgb_model(
    X_train, y_train, X_val, y_val, feature_cols, 
    n_classes=2, device=device, early_stopping_rounds=50, verbose=False,
    **xgb_params
)

# Prepare model data for export
model_data = {
    "model": final_model,
    "feature_names": feature_cols,
    "config": {
        # Astro config
        "birth_date": str(TARGET_DATE),
        "coord_mode": ASTRO_CONFIG["coord_mode"],
        "orb_mult": ASTRO_CONFIG["orb_mult"],
        "gauss_window": ASTRO_CONFIG["gauss_window"],
        "gauss_std": ASTRO_CONFIG["gauss_std"],
        "exclude_bodies": ASTRO_CONFIG.get("exclude_bodies"),
        # XGBoost params
        **xgb_params,
        # Metrics
        "r_min": float(best["R_MIN"]),
        "mcc": float(best["MCC"]),
    }
}

# Save to models_artifacts
output_path = "../models_artifacts/btc_astro_predictor.joblib"
joblib.dump(model_data, output_path)
print(f"✅ Model exported to: {output_path}")
print(f"   Features: {len(feature_cols)}")
print(f"   R_MIN: {best['R_MIN']:.3f}")
print(f"   MCC: {best['MCC']:.3f}")


NameError: name 'X' is not defined