In [None]:
# ======================================================
# üîë API Keys Configuration
# ======================================================
import os

# Set API keys from environment variables or defaults
ALPHA_VANTAGE_KEY = os.environ.get('ALPHA_VANTAGE_KEY', '1W58NPZXOG5SLHZ6')
BROWSERLESS_TOKEN = os.environ.get('BROWSERLESS_TOKEN', '2TMVUBAjFwrr7Tb283f0da6602a4cb698b81778bda61967f7')

# Set environment variables for downstream code
os.environ['ALPHA_VANTAGE_KEY'] = ALPHA_VANTAGE_KEY
os.environ['BROWSERLESS_TOKEN'] = BROWSERLESS_TOKEN

# Validate
if not ALPHA_VANTAGE_KEY:
    print("‚ö†Ô∏è Warning: ALPHA_VANTAGE_KEY not set!")
else:
    print(f"‚úÖ Alpha Vantage Key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

if not BROWSERLESS_TOKEN:
    print("‚ö†Ô∏è Warning: BROWSERLESS_TOKEN not set!")
else:
    print(f"‚úÖ Browserless Token: {BROWSERLESS_TOKEN[:4]}...{BROWSERLESS_TOKEN[-4:]}")

In [None]:
# ======================================================
# üåç Environment Detection & Setup (MUST RUN FIRST!)
# ======================================================
import os
import sys
from pathlib import Path

# Detect environment
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# Set base paths based on environment
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
elif IN_GHA:
    # GitHub Actions already checks out the repo
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
else:
    # Local development
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER

# Create necessary directories with organized structure
DIRECTORIES = {
    "data_raw": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Display environment info
print("=" * 60)
print(f"üåç Environment: {ENV_NAME}")
print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üîß Python: {sys.version.split()[0]}")
print(f"üìç Working Dir: {os.getcwd()}")
print("=" * 60)

# Validate critical environment variables for GitHub Actions
if IN_GHA:
    required_vars = ["FOREX_PAT", "GIT_USER_NAME", "GIT_USER_EMAIL"]
    missing = [v for v in required_vars if not os.environ.get(v)]
    if missing:
        print(f"‚ö†Ô∏è  Warning: Missing environment variables: {', '.join(missing)}")
        sys.exit(1)  # Fail fast in CI if critical vars missing
    else:
        print("‚úÖ All required environment variables present")

# Export commonly used paths as globals
CSV_FOLDER = DIRECTORIES["data_raw"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_PATH = DIRECTORIES["database"] / "memory_v85.db"
LOG_PATH = DIRECTORIES["logs"] / "pipeline.log"
OUTPUT_PATH = DIRECTORIES["outputs"] / "signals.json"

print(f"\nüìÅ Key Paths:")
print(f"   CSV: {CSV_FOLDER}")
print(f"   Pickles: {PICKLE_FOLDER}")
print(f"   Database: {DB_PATH}")
print(f"   Logs: {LOG_PATH}")
print(f"   Signals: {OUTPUT_PATH}")
print("=" * 60)

In [None]:
# ======================================================
# üìÑ GitHub Sync (Environment-Aware) - ALIGNED VERSION
# ======================================================
import os
import subprocess
import shutil
from pathlib import Path
import urllib.parse
import sys

# ======================================================
# 1Ô∏è‚É£ Environment Detection (MUST MATCH YOUR FIRST CELL!)
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# ======================================================
# 2Ô∏è‚É£ CRITICAL FIX: Use SAME paths as environment detection
# ======================================================
if IN_COLAB:
    # ‚úÖ MATCHES YOUR ENVIRONMENT DETECTION
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"  # Same as env detection!
    REPO_FOLDER = SAVE_FOLDER  # Repo IS the save folder
    print("‚òÅÔ∏è Colab Mode: Cloning directly to /content/forex-ai-models")

elif IN_GHA:
    # ‚úÖ GitHub Actions: Use current directory (already in repo)
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER  # We're already in the repo!
    print("ü§ñ GitHub Actions Mode: Using current directory")

else:
    # ‚úÖ Local: Use current directory
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
    print("üíª Local Mode: Using current directory")

# Create necessary directories WITH your organized structure
DIRECTORIES = {
    "data_raw": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

print("=" * 70)
print(f"üîß Running in: {ENV_NAME}")
print(f"üìÇ Working directory: {os.getcwd()}")
print(f"üíæ Save folder: {SAVE_FOLDER}")
print(f"üì¶ Repo folder: {REPO_FOLDER}")
print(f"üêç Python: {sys.version.split()[0]}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GitHub Configuration
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

# ======================================================
# 4Ô∏è‚É£ GitHub Token (Multi-Source)
# ======================================================
FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if in Colab and PAT not found
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secret.")
    except ImportError:
        pass
    except Exception as e:
        print(f"‚ö†Ô∏è Could not load Colab secret: {e}")

# Validate PAT
if not FOREX_PAT:
    print("‚ö†Ô∏è Warning: FOREX_PAT not found. Git operations may fail.")
    print("   Set FOREX_PAT in:")
    print("   - GitHub Secrets (for Actions)")
    print("   - Colab Secrets (for Colab)")
    print("   - Environment variable (for local)")
    REPO_URL = None
else:
    SAFE_PAT = urllib.parse.quote(FOREX_PAT)
    REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
    print("‚úÖ GitHub token configured")

# ======================================================
# 5Ô∏è‚É£ Handle Repository Based on Environment
# ======================================================
if IN_GHA:
    # ===== GitHub Actions =====
    print("\nü§ñ GitHub Actions Mode")
    print("‚úÖ Repository already checked out by actions/checkout")
    print(f"üìÇ Current directory: {Path.cwd()}")

    # Verify .git exists
    if not (Path.cwd() / ".git").exists():
        print("‚ö†Ô∏è Warning: .git directory not found!")
        print("   Make sure actions/checkout@v4 is in your workflow")
    else:
        print("‚úÖ Git repository confirmed")

elif IN_COLAB:
    # ===== Google Colab =====
    print("\n‚òÅÔ∏è Google Colab Mode")

    if not REPO_URL:
        print("‚ùå Cannot clone repository: FOREX_PAT not available")
    elif not (REPO_FOLDER / ".git").exists():
        # Check if directory exists but isn't a git repo
        if REPO_FOLDER.exists():
            print(f"‚ö†Ô∏è Directory exists but is not a git repo. Removing...")
            shutil.rmtree(REPO_FOLDER)
            print("‚úÖ Cleaned up non-git directory")

        # Clone repository
        print(f"üì• Cloning repository to {REPO_FOLDER}...")
        env = os.environ.copy()
        env["GIT_LFS_SKIP_SMUDGE"] = "1"  # Skip LFS files

        try:
            result = subprocess.run(
                ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                check=True,
                env=env,
                capture_output=True,
                text=True,
                timeout=60
            )
            print("‚úÖ Repository cloned successfully")

            # Change to repo directory
            os.chdir(REPO_FOLDER)
            print(f"üìÇ Changed directory to: {os.getcwd()}")

        except subprocess.CalledProcessError as e:
            print(f"‚ùå Clone failed: {e.stderr}")
            print("Creating directory structure manually...")
            REPO_FOLDER.mkdir(parents=True, exist_ok=True)
        except subprocess.TimeoutExpired:
            print("‚ùå Clone timed out after 60 seconds")
            REPO_FOLDER.mkdir(parents=True, exist_ok=True)
    else:
        # Repository exists, pull latest
        print("‚úÖ Repository already exists, pulling latest changes...")
        os.chdir(REPO_FOLDER)

        try:
            result = subprocess.run(
                ["git", "pull", "origin", BRANCH],
                check=True,
                cwd=REPO_FOLDER,
                capture_output=True,
                text=True,
                timeout=30
            )
            print("‚úÖ Successfully pulled latest changes")
        except subprocess.CalledProcessError as e:
            print(f"‚ö†Ô∏è Pull failed: {e.stderr}")
            print("Continuing with existing files...")
        except subprocess.TimeoutExpired:
            print("‚ö†Ô∏è Pull timed out, continuing anyway...")

    # Configure Git LFS (disable for Colab)
    print("‚öôÔ∏è Configuring Git LFS...")
    try:
        subprocess.run(
            ["git", "lfs", "uninstall"],
            check=False,
            cwd=REPO_FOLDER,
            capture_output=True
        )
        print("‚úÖ LFS disabled for Colab")
    except Exception as e:
        print(f"‚ö†Ô∏è LFS setup warning: {e}")

else:
    # ===== Local Environment =====
    print("\nüíª Local Development Mode")
    print(f"üìÇ Working in: {SAVE_FOLDER}")

    if not (REPO_FOLDER / ".git").exists():
        print("‚ö†Ô∏è Not a git repository")
        print("   Run: git clone https://github.com/rahim-dotAI/forex-ai-models.git")
    else:
        print("‚úÖ Git repository found")

# ======================================================
# 6Ô∏è‚É£ Create Organized Directory Structure
# ======================================================
print("\nüìÅ Creating organized directory structure...")
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"   ‚úÖ {dir_name}: {dir_path}")

# ======================================================
# 7Ô∏è‚É£ Git Global Configuration
# ======================================================
print("\nüîß Configuring Git...")

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

# Set git config
git_configs = [
    (["git", "config", "--global", "user.name", GIT_USER_NAME], "User name"),
    (["git", "config", "--global", "user.email", GIT_USER_EMAIL], "User email"),
    (["git", "config", "--global", "advice.detachedHead", "false"], "Detached HEAD warning"),
    (["git", "config", "--global", "init.defaultBranch", "main"], "Default branch")
]

for cmd, description in git_configs:
    try:
        subprocess.run(cmd, check=False, capture_output=True)
    except Exception as e:
        print(f"‚ö†Ô∏è Could not set {description}: {e}")

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 8Ô∏è‚É£ Export Path Constants (MATCH YOUR ENVIRONMENT DETECTION!)
# ======================================================
CSV_FOLDER = DIRECTORIES["data_raw"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_PATH = DIRECTORIES["database"] / "memory_v85.db"
LOG_PATH = DIRECTORIES["logs"] / "pipeline.log"
OUTPUT_PATH = DIRECTORIES["outputs"] / "signals.json"

# ======================================================
# 9Ô∏è‚É£ Environment Summary & Validation
# ======================================================
print("\n" + "=" * 70)
print("üßæ ENVIRONMENT SUMMARY")
print("=" * 70)
print(f"Environment:      {ENV_NAME}")
print(f"Working Dir:      {os.getcwd()}")
print(f"Save Folder:      {SAVE_FOLDER}")
print(f"Repo Folder:      {REPO_FOLDER}")
print(f"Repository:       https://github.com/{GITHUB_USERNAME}/{GITHUB_REPO}")
print(f"Branch:           {BRANCH}")
print(f"Git Repo Exists:  {(REPO_FOLDER / '.git').exists()}")
print(f"FOREX_PAT Set:    {'‚úÖ Yes' if FOREX_PAT else '‚ùå No'}")

# Check critical paths
print("\nüìã Critical Paths:")
print(f"   CSV Folder:    {CSV_FOLDER}")
print(f"   Pickle Folder: {PICKLE_FOLDER}")
print(f"   Database:      {DB_PATH}")
print(f"   Logs:          {LOG_PATH}")
print(f"   Signals:       {OUTPUT_PATH}")

print("\nüìÇ Directory Status:")
critical_paths = {
    "Repo .git": REPO_FOLDER / ".git",
    "Data Raw": CSV_FOLDER,
    "Data Processed": PICKLE_FOLDER,
    "Database": DIRECTORIES["database"],
    "Logs": DIRECTORIES["logs"],
    "Outputs": DIRECTORIES["outputs"]
}

for name, path in critical_paths.items():
    exists = path.exists()
    icon = "‚úÖ" if exists else "‚ùå"
    print(f"  {icon} {name}: {path}")

print("=" * 70)
print("‚úÖ Setup completed successfully!")
print("=" * 70)

# ======================================================
# üîü Export Variables for Downstream Cells
# ======================================================
# These variables are now available in subsequent cells:
# - ENV_NAME: Environment name
# - IN_COLAB: Boolean for Colab detection
# - IN_GHA: Boolean for GitHub Actions detection
# - SAVE_FOLDER: Path to save files (same as REPO_FOLDER in Colab)
# - REPO_FOLDER: Path to git repository
# - CSV_FOLDER, PICKLE_FOLDER, DB_PATH, LOG_PATH, OUTPUT_PATH: Organized paths
# - GITHUB_USERNAME, GITHUB_REPO, BRANCH: Git config
# - FOREX_PAT: GitHub token (if available)

print("\n‚úÖ All environment variables exported for downstream cells")

In [None]:
!pip install mplfinance firebase-admin dropbox requests beautifulsoup4 pandas numpy ta yfinance pyppeteer nest_asyncio lightgbm joblib matplotlib alpha_vantage tqdm scikit-learn river


In [None]:
#!/usr/bin/env python3
"""
ALPHA VANTAGE FX DATA FETCHER - OPTIMIZED FOR DAILY USE
=======================================================
‚úÖ Designed to run ONCE per day (not every 2 hours)
‚úÖ Reduces API usage from 48/day to 4/day
‚úÖ Environment variable SKIP_ALPHA_VANTAGE support
‚úÖ Data quality validation before saving
‚úÖ Works in GitHub Actions, Google Colab, and Local
‚úÖ Thread-safe operations with retry logic
‚úÖ Clear naming: pair_daily_av.csv (av = Alpha Vantage)
"""

import os
import sys
import time
import hashlib
import requests
import subprocess
import threading
import urllib.parse
from pathlib import Path
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np

# ======================================================
# üÜï SKIP CHECK - Exit early if not needed
# ======================================================
SKIP_ALPHA_VANTAGE = os.environ.get("SKIP_ALPHA_VANTAGE", "false").lower() == "true"

if SKIP_ALPHA_VANTAGE:
    print("=" * 70)
    print("‚è≠Ô∏è  ALPHA VANTAGE SKIPPED (runs separately at midnight)")
    print("=" * 70)
    print("‚ÑπÔ∏è  Alpha Vantage daily data doesn't change hourly")
    print("‚ÑπÔ∏è  Using existing data from last midnight run")
    print("=" * 70)
    sys.exit(0)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION
# ======================================================
print("=" * 70)
print("üöÄ Alpha Vantage FX Data Fetcher - Daily Optimized v2.0")
print("=" * 70)

try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üìç Environment: {ENV_NAME}")
print(f"‚è∞ Current Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}")
print(f"üîÑ Fetch Mode: Daily (saves API calls)")
print("=" * 70)

# ======================================================
# 2Ô∏è‚É£ PATH CONFIGURATION
# ======================================================
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# Directory structure
DIRECTORIES = {
    "data_raw_alpha": SAVE_FOLDER / "data" / "raw" / "alpha_vantage",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "alpha_vantage",
}

for dir_path in DIRECTORIES.values():
    dir_path.mkdir(parents=True, exist_ok=True)

CSV_FOLDER = DIRECTORIES["data_raw_alpha"]
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üìä Alpha Vantage CSV: {CSV_FOLDER}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ DATA QUALITY VALIDATOR
# ======================================================
class DataQualityValidator:
    """Validate data quality before saving"""

    MIN_ROWS = 50
    MIN_PRICE_CV = 0.01  # 0.01% minimum variation
    MIN_UNIQUE_RATIO = 0.01  # 1% unique prices
    MIN_TRUE_RANGE = 1e-10
    MIN_QUALITY_SCORE = 40.0

    @staticmethod
    def validate_dataframe(df, pair):
        """
        Validate DataFrame quality
        Returns: (is_valid, quality_score, metrics, issues)
        """
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            issues.append(f"Too few rows: {len(df)}")

        required_cols = ['open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            issues.append(f"Missing columns: {missing_cols}")
            return False, 0.0, metrics, issues

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            issues.append("No valid OHLC data")
            return False, 0.0, metrics, issues

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean']) * 100 if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))
        metrics['true_range_mean'] = float(np.mean(tr))

        # Quality score (0-100)
        quality_score = 0.0
        quality_score += metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 1.0:
            quality_score += 30
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 1.0) * 30

        quality_score += min(metrics['unique_ratio'] * 20, 20)

        if metrics['true_range_median'] >= 1e-5:
            quality_score += 20
        elif metrics['true_range_median'] >= DataQualityValidator.MIN_TRUE_RANGE:
            quality_score += (metrics['true_range_median'] / 1e-5) * 20

        metrics['quality_score'] = quality_score
        is_valid = (quality_score >= DataQualityValidator.MIN_QUALITY_SCORE)

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 4Ô∏è‚É£ GITHUB CONFIGURATION
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
    except:
        pass

if FOREX_PAT:
    print("‚úÖ GitHub credentials configured")
else:
    print("‚ö†Ô∏è Warning: FOREX_PAT not found")

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
               capture_output=True, check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
               capture_output=True, check=False)

# ======================================================
# 5Ô∏è‚É£ ALPHA VANTAGE CONFIGURATION
# ======================================================
ALPHA_VANTAGE_KEY = os.environ.get("ALPHA_VANTAGE_KEY")

if not ALPHA_VANTAGE_KEY and IN_COLAB:
    try:
        from google.colab import userdata
        ALPHA_VANTAGE_KEY = userdata.get("ALPHA_VANTAGE_KEY")
        if ALPHA_VANTAGE_KEY:
            os.environ["ALPHA_VANTAGE_KEY"] = ALPHA_VANTAGE_KEY
    except:
        pass

if not ALPHA_VANTAGE_KEY:
    raise ValueError("‚ùå ALPHA_VANTAGE_KEY is required")

print(f"‚úÖ Alpha Vantage API key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
print(f"üìä Fetching {len(FX_PAIRS)} pairs: {', '.join(FX_PAIRS)}")
print(f"üí° Daily API usage: {len(FX_PAIRS)} requests/day (16% of 25 limit)")

lock = threading.Lock()

# ======================================================
# 6Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return df

    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)

    return df

def file_hash(filepath, chunk_size=8192):
    """Calculate MD5 hash of file to detect changes"""
    if not filepath.exists():
        return None

    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)

    return md5.hexdigest()

def fetch_alpha_vantage_fx(pair, outputsize='full', max_retries=3, retry_delay=5):
    """
    Fetch FX data from Alpha Vantage API with retry logic

    Returns:
        DataFrame with OHLC data or empty DataFrame on failure
    """
    base_url = 'https://www.alphavantage.co/query'
    from_currency, to_currency = pair.split('/')

    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'outputsize': outputsize,
        'datatype': 'json',
        'apikey': ALPHA_VANTAGE_KEY
    }

    for attempt in range(max_retries):
        try:
            print(f"  üîΩ Fetching {pair} (attempt {attempt + 1}/{max_retries})...")

            r = requests.get(base_url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()

            if 'Error Message' in data:
                raise ValueError(f"API Error: {data['Error Message']}")

            if 'Note' in data:
                print(f"  ‚ö†Ô∏è API rate limit reached for {pair}")
                if attempt < max_retries - 1:
                    time.sleep(retry_delay * 2)
                    continue
                return pd.DataFrame()

            if 'Time Series FX (Daily)' not in data:
                raise ValueError(f"Unexpected response format: {list(data.keys())}")

            ts = data['Time Series FX (Daily)']
            df = pd.DataFrame(ts).T
            df.index = pd.to_datetime(df.index)
            df = df.sort_index()

            df = df.rename(columns={
                '1. open': 'open',
                '2. high': 'high',
                '3. low': 'low',
                '4. close': 'close'
            })

            df = df.astype(float)
            df = ensure_tz_naive(df)

            print(f"  ‚úÖ Fetched {len(df)} rows for {pair}")
            return df

        except requests.RequestException as e:
            print(f"  ‚ö†Ô∏è Network error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                return pd.DataFrame()

        except Exception as e:
            print(f"  ‚ö†Ô∏è Error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                return pd.DataFrame()

    return pd.DataFrame()

# ======================================================
# 7Ô∏è‚É£ PAIR PROCESSING WITH QUALITY VALIDATION
# ======================================================
def process_pair(pair):
    """
    Process single FX pair: fetch, validate quality, merge, save

    Returns:
        Tuple of (filepath if changed, status message, quality_score)
    """
    print(f"\nüîÑ Processing {pair}...")

    filename = pair.replace("/", "_") + "_daily_av.csv"
    file_path = CSV_FOLDER / filename

    # Load existing data
    existing_df = pd.DataFrame()
    if file_path.exists():
        try:
            existing_df = pd.read_csv(file_path, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
            print(f"  üìä Loaded {len(existing_df)} existing rows")
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    old_hash = file_hash(file_path)

    # Fetch new data
    new_df = fetch_alpha_vantage_fx(pair)

    if new_df.empty:
        return None, f"‚ùå {pair}: No data fetched", 0.0

    # Merge with existing data
    if not existing_df.empty:
        combined_df = pd.concat([existing_df, new_df])
        combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
    else:
        combined_df = new_df

    combined_df.sort_index(inplace=True)

    # Validate quality
    is_valid, quality_score, metrics, issues = validator.validate_dataframe(
        combined_df, pair
    )

    print(f"  üìä Quality score: {quality_score:.1f}/100")

    if not is_valid:
        print(f"  ‚ö†Ô∏è Quality issues: {'; '.join(issues[:2])}")
        print(f"     CV: {metrics.get('price_cv', 0):.4f}%, Unique: {metrics.get('unique_ratio', 0):.1%}")

        if quality_score < DataQualityValidator.MIN_QUALITY_SCORE:
            print(f"  ‚ùå Data quality too low - quarantining")

            quarantine_file = QUARANTINE_FOLDER / f"{filename}.bad"
            with lock:
                combined_df.to_csv(quarantine_file)

                report_file = QUARANTINE_FOLDER / f"{filename}.quality.txt"
                with open(report_file, 'w') as f:
                    f.write(f"Quality Report for {pair} (Alpha Vantage)\n")
                    f.write(f"{'='*50}\n")
                    f.write(f"Quality Score: {quality_score:.1f}/100\n")
                    f.write(f"Issues: {'; '.join(issues)}\n")
                    f.write(f"\nMetrics:\n")
                    for k, v in metrics.items():
                        f.write(f"  {k}: {v}\n")

            return None, f"‚ùå {pair}: Quality too low ({quality_score:.1f}/100)", quality_score

    # Save the file
    with lock:
        combined_df.to_csv(file_path)

    new_hash = file_hash(file_path)
    changed = (old_hash != new_hash)

    status = "‚úÖ Updated" if changed else "‚ÑπÔ∏è No changes"
    print(f"  {status} - {len(combined_df)} rows, quality: {quality_score:.1f}/100")

    return (str(file_path) if changed else None), f"{status} {pair} ({len(combined_df)} rows, Q:{quality_score:.0f})", quality_score

# ======================================================
# 8Ô∏è‚É£ EXECUTION WITH RATE LIMITING
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Fetching FX data with quality validation...")
print("=" * 70)

changed_files = []
results = []
quality_scores = {}

# Sequential processing with delays to respect rate limits
for pair in FX_PAIRS:
    try:
        filepath, message, quality = process_pair(pair)
        results.append(message)
        if filepath:
            changed_files.append(filepath)
            quality_scores[filepath] = quality

        # Rate limiting: Wait 15 seconds between requests
        if pair != FX_PAIRS[-1]:  # Don't wait after last pair
            print(f"\n‚è≥ Waiting 15 seconds (rate limiting)...")
            time.sleep(15)

    except Exception as e:
        print(f"‚ùå {pair} processing failed: {e}")
        results.append(f"‚ùå {pair}: Failed")

# ======================================================
# 9Ô∏è‚É£ RESULTS SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

print(f"\nTotal pairs processed: {len(FX_PAIRS)}")
print(f"Files updated: {len(changed_files)}")
print(f"API calls made: {len(FX_PAIRS)}")

if quality_scores:
    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT")
    print("=" * 70)
    avg_quality = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average quality score: {avg_quality:.1f}/100")

    print(f"\nFiles by quality:")
    for fname, score in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        print(f"  {'‚úÖ' if score >= 60 else '‚ö†Ô∏è'} {Path(fname).name}: {score:.1f}/100")

quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# üîü GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Handled by workflow")
    print("=" * 70)

elif changed_files and FOREX_PAT:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"üìä Alpha Vantage daily update - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg Q:{avg_quality:.0f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")

            SAFE_PAT = urllib.parse.quote(FOREX_PAT)
            REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

            for attempt in range(3):
                print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/3)...")
                result = subprocess.run(
                    ["git", "push", REPO_URL, BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print("‚úÖ Successfully pushed to GitHub")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", REPO_URL, BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)

    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        os.chdir(SAVE_FOLDER)

else:
    print("\n‚ÑπÔ∏è No changes to commit")

# ======================================================
# ‚úÖ COMPLETION
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ ALPHA VANTAGE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Files updated: {len(changed_files)}")
print(f"Quality validated: ‚úÖ")
if quality_scores:
    print(f"Average quality: {avg_quality:.1f}/100")
print(f"API calls: {len(FX_PAIRS)}/25 daily limit")
print(f"Status: {'‚úÖ Success' if len(results) == len(FX_PAIRS) else '‚ö†Ô∏è Partial'}")
print("=" * 70)
print("\nüí° Optimization Summary:")
print("   ‚Ä¢ Runs once daily at midnight")
print("   ‚Ä¢ Uses 4 API calls/day (16% of limit)")
print("   ‚Ä¢ Saves 44 calls/day compared to hourly fetching")
print("   ‚Ä¢ Daily OHLC data doesn't change intraday")
print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
YFINANCE FX DATA FETCHER - CLEAN STRUCTURE EDITION
===================================================
‚úÖ Aligned with clean repo structure (data/raw/yfinance)
‚úÖ Relaxed quality thresholds for more data acceptance
‚úÖ Automatic OHLC logic fixing
‚úÖ Enhanced fallback options
‚úÖ Smart data cleaning before validation
‚úÖ Better symbol format handling
‚úÖ Multi-environment support (Colab, GHA, Local)
"""

import os
import time
import hashlib
import subprocess
import shutil
import threading
import urllib.parse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

print("=" * 70)
print("üöÄ YFinance FX Data Fetcher - Clean Structure Edition")
print("=" * 70)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION (MATCHES YOUR SETUP!)
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ UNIFIED PATH CONFIGURATION (MATCHES CLEAN STRUCTURE!)
# ======================================================
if IN_COLAB:
    print("‚òÅÔ∏è Google Colab detected - using clean structure")
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"  # ‚úÖ MATCHES!
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    print("ü§ñ GitHub Actions detected - using repository root")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    print("üíª Local environment detected - using clean structure")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# ‚úÖ CREATE ORGANIZED DIRECTORY STRUCTURE
DIRECTORIES = {
    "data_raw_yfinance": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "yfinance",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Export key paths
CSV_FOLDER = DIRECTORIES["data_raw_yfinance"]  # ‚úÖ YFinance CSVs here
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üì¶ Repo Folder: {REPO_FOLDER}")
print(f"üìä YFinance CSV: {CSV_FOLDER}")
print(f"üóëÔ∏è Quarantine: {QUARANTINE_FOLDER}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GIT CONFIGURATION
# ======================================================
GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if in Colab and PAT not found
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secrets")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not access Colab secrets: {e}")

if not FOREX_PAT:
    raise ValueError("‚ùå FOREX_PAT is required!")

SAFE_PAT = urllib.parse.quote(FOREX_PAT)
REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# Configure git
subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
               capture_output=True, check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
               capture_output=True, check=False)

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 4Ô∏è‚É£ REPOSITORY MANAGEMENT (SIMPLIFIED)
# ======================================================
def ensure_repository():
    """Ensure repository is available and up-to-date"""
    if IN_GHA:
        print("\nü§ñ GitHub Actions: Repository already available")
        if not (REPO_FOLDER / ".git").exists():
            print("‚ö†Ô∏è Warning: .git directory not found")
        else:
            print("‚úÖ Git repository verified")
        return

    print("\nüì• Managing repository...")

    if REPO_FOLDER.exists() and not (REPO_FOLDER / ".git").exists():
        print("‚ö†Ô∏è Directory exists but is not a git repository")
        return

    if (REPO_FOLDER / ".git").exists():
        print(f"üîÑ Pulling latest changes...")
        try:
            result = subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "pull", "origin", BRANCH],
                capture_output=True,
                text=True,
                timeout=30
            )
            if result.returncode == 0:
                print("‚úÖ Repository updated successfully")
            else:
                print(f"‚ö†Ô∏è Pull had issues, continuing anyway")
        except Exception as e:
            print(f"‚ö†Ô∏è Update failed: {e} - continuing with existing repo")
    else:
        print("‚ö†Ô∏è Repository not found. This script expects the repo to be set up first.")
        print("   Please run the GitHub Sync script first!")

ensure_repository()

# ======================================================
# 5Ô∏è‚É£ RATE LIMITER
# ======================================================
class RateLimiter:
    """Rate limiter for API calls"""
    def __init__(self, requests_per_minute=10, requests_per_hour=350):
        self.rpm = requests_per_minute
        self.rph = requests_per_hour
        self.request_times = []
        self.hourly_request_times = []
        self.lock = threading.Lock()
        self.total_requests = 0

    def wait_if_needed(self):
        with self.lock:
            now = time.time()
            self.request_times = [t for t in self.request_times if now - t < 60]
            self.hourly_request_times = [t for t in self.hourly_request_times if now - t < 3600]

            if len(self.request_times) >= self.rpm:
                wait_time = 60 - (now - self.request_times[0])
                if wait_time > 0:
                    time.sleep(wait_time + 1)
                    self.request_times = []

            if len(self.hourly_request_times) >= self.rph:
                wait_time = 3600 - (now - self.hourly_request_times[0])
                if wait_time > 0:
                    time.sleep(wait_time + 1)
                    self.hourly_request_times = []

            self.request_times.append(now)
            self.hourly_request_times.append(now)
            self.total_requests += 1
            time.sleep(1.0 + (hash(str(now)) % 20) / 10)

    def get_stats(self):
        with self.lock:
            return {'total_requests': self.total_requests}

rate_limiter = RateLimiter()

# ======================================================
# 6Ô∏è‚É£ DATA CLEANING & VALIDATION
# ======================================================
def fix_ohlc_logic(df):
    """Fix impossible OHLC relationships"""
    if df is None or df.empty:
        return df

    df = df.copy()
    required_cols = ['open', 'high', 'low', 'close']

    if not all(col in df.columns for col in required_cols):
        return df

    # Fix High: should be maximum of OHLC
    df['high'] = df[required_cols].max(axis=1)

    # Fix Low: should be minimum of OHLC
    df['low'] = df[required_cols].min(axis=1)

    return df

class DataQualityValidator:
    """RELAXED validation for more data acceptance"""

    # ‚úÖ RELAXED THRESHOLDS
    MIN_ROWS = 5  # Down from 10
    MIN_PRICE_CV = 0.01  # Down from 0.1 (1% instead of 10%)
    MIN_UNIQUE_RATIO = 0.005  # Down from 0.05 (0.5% instead of 5%)
    MIN_TRUE_RANGE = 1e-12  # More lenient
    MIN_QUALITY_SCORE = 20.0  # Down from 40.0

    @staticmethod
    def validate_dataframe(df, pair, tf_name):
        """Validate with relaxed criteria"""
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            return False, 0.0, metrics, [f"Too few rows: {len(df)}"]

        required_cols = ['open', 'high', 'low', 'close']
        if not all(col in df.columns for col in required_cols):
            return False, 0.0, metrics, ["Missing OHLC columns"]

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            return False, 0.0, metrics, ["No valid OHLC data"]

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean']) * 100 if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        # Calculate true range
        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))

        # Quality score calculation (more lenient)
        quality_score = metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 0.5:
            quality_score += 40
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 0.5) * 40

        if metrics['unique_ratio'] >= 0.1:
            quality_score += 30
        elif metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO:
            quality_score += (metrics['unique_ratio'] / 0.1) * 30

        metrics['quality_score'] = quality_score

        # Relaxed validation - accept if meets minimum thresholds
        is_valid = (
            quality_score >= DataQualityValidator.MIN_QUALITY_SCORE and
            metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV and
            metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO
        )

        if not is_valid:
            if metrics['price_cv'] < DataQualityValidator.MIN_PRICE_CV:
                issues.append(f"Low CV: {metrics['price_cv']:.4f}%")
            if metrics['unique_ratio'] < DataQualityValidator.MIN_UNIQUE_RATIO:
                issues.append(f"Low unique: {metrics['unique_ratio']:.3%}")

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 7Ô∏è‚É£ CONFIGURATION
# ======================================================
FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# ‚úÖ ENHANCED with more fallback options
TIMEFRAMES = {
    "1d_5y": [
        ("1d", "5y"),
        ("1d", "max"),  # Try max available
        ("1d", "3y"),
        ("1d", "2y"),
    ],
    "1h_2y": [
        ("1h", "2y"),
        ("1h", "1y"),
        ("1h", "730d"),  # Exactly 2 years in days
        ("1h", "6mo")
    ],
    "15m_60d": [
        ("15m", "60d"),
        ("15m", "2mo"),
        ("15m", "30d"),
    ],
    "5m_1mo": [
        ("5m", "1mo"),
        ("5m", "30d"),
        ("5m", "14d"),
    ],
    "1m_7d": [
        ("1m", "7d"),
        ("1m", "5d"),
        ("1m", "3d"),
    ]
}

print(f"\nüìä Configuration:")
print(f"   Pairs: {len(FX_PAIRS)}")
print(f"   Timeframes: {len(TIMEFRAMES)}")
print(f"   Total tasks: {len(FX_PAIRS) * len(TIMEFRAMES)}")
print(f"   Quality threshold: {validator.MIN_QUALITY_SCORE}/100 (RELAXED)")
print("=" * 70)

lock = threading.Lock()

# ======================================================
# 8Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def file_hash(filepath):
    """Calculate MD5 hash of file"""
    if not filepath.exists():
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            md5.update(chunk)
    return md5.hexdigest()

def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)
    return df

def merge_data(existing_df, new_df):
    """Merge existing and new data, removing duplicates"""
    existing_df = ensure_tz_naive(existing_df)
    new_df = ensure_tz_naive(new_df)
    if existing_df.empty:
        return new_df
    if new_df.empty:
        return existing_df
    combined = pd.concat([existing_df, new_df])
    combined = combined[~combined.index.duplicated(keep="last")]
    combined.sort_index(inplace=True)
    return combined

def get_symbol_variants(pair, interval):
    """Get multiple symbol format variations"""
    base_symbol = pair.replace("/", "") + "=X"
    variants = [base_symbol]

    # Additional formats
    if interval in ["1d", "1h"]:
        from_curr, to_curr = pair.split("/")
        variants.append(f"{from_curr}{to_curr}=X")  # No separator
        variants.append(f"{from_curr}=X")  # Just base currency

    return variants

# ======================================================
# 9Ô∏è‚É£ WORKER FUNCTION
# ======================================================
def process_pair_tf(pair, tf_name, interval_period_options, max_retries=3):
    """
    Download YFinance data with OHLC fixing and validation

    ‚úÖ Saves to data/raw/yfinance/ with clear naming

    Returns:
        Tuple of (message, filepath if changed, quality_score)
    """
    # ‚úÖ Save to YFinance folder
    filename = f"{pair.replace('/', '_')}_{tf_name}.csv"
    filepath = CSV_FOLDER / filename

    existing_df = pd.DataFrame()
    if filepath.exists():
        try:
            existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    old_hash = file_hash(filepath)

    for option_idx, (interval, period) in enumerate(interval_period_options):
        symbol_variants = get_symbol_variants(pair, interval)

        for symbol in symbol_variants:
            for attempt in range(max_retries):
                try:
                    rate_limiter.wait_if_needed()

                    ticker = yf.Ticker(symbol)
                    df = ticker.history(
                        period=period,
                        interval=interval,
                        auto_adjust=False,
                        prepost=False,
                        actions=False,
                        raise_errors=False
                    )

                    if df.empty:
                        raise ValueError("Empty data")

                    available_cols = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume']
                                     if c in df.columns]
                    df = df[available_cols]
                    df.rename(columns=lambda x: x.lower(), inplace=True)
                    df = ensure_tz_naive(df)

                    combined_df = merge_data(existing_df, df)

                    # ‚úÖ FIX OHLC LOGIC BEFORE VALIDATION
                    combined_df = fix_ohlc_logic(combined_df)

                    is_valid, quality_score, metrics, issues = validator.validate_dataframe(
                        combined_df, pair, tf_name
                    )

                    if not is_valid:
                        if attempt < max_retries - 1:
                            time.sleep(3 * (2 ** attempt))
                            continue
                        elif option_idx < len(interval_period_options) - 1:
                            break  # Try next option
                        else:
                            # Save anyway but mark as low quality
                            print(f"  ‚ö†Ô∏è Low quality ({quality_score:.1f}) but saving: {pair} {tf_name}")

                    # Save the file
                    with lock:
                        combined_df.to_csv(filepath)

                    new_hash = file_hash(filepath)
                    changed = (old_hash != new_hash)

                    status = "‚úÖ" if quality_score >= 50 else "‚ö†Ô∏è"
                    msg = f"{status} {pair} {tf_name} - {len(combined_df)} rows, Q:{quality_score:.0f}"
                    print(f"  {msg}")
                    return msg, str(filepath) if changed else None, quality_score

                except Exception as e:
                    if attempt < max_retries - 1:
                        time.sleep(3 * (2 ** attempt))
                    else:
                        if option_idx < len(interval_period_options) - 1:
                            break  # Try next option

    return f"‚ùå Failed {pair} {tf_name}", None, 0.0

# ======================================================
# üîü PARALLEL EXECUTION
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Starting YFinance data download...")
print("=" * 70 + "\n")

start_time = time.time()
changed_files = []
results = []
quality_scores = {}

with ThreadPoolExecutor(max_workers=2) as executor:
    tasks = []
    for pair in FX_PAIRS:
        for tf_name, options in TIMEFRAMES.items():
            tasks.append(executor.submit(process_pair_tf, pair, tf_name, options))

    for future in as_completed(tasks):
        try:
            msg, filename, quality = future.result()
            results.append(msg)
            if filename:
                changed_files.append(filename)
                quality_scores[filename] = quality
        except Exception as e:
            results.append(f"‚ùå Error: {e}")

elapsed_time = time.time() - start_time

# ======================================================
# 1Ô∏è‚É£1Ô∏è‚É£ SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

success_count = len([r for r in results if "‚úÖ" in r or "‚ö†Ô∏è" in r])
print(f"\nTotal tasks: {len(results)}")
print(f"Successful: {success_count}/{len(results)}")
print(f"Files updated: {len(changed_files)}")
print(f"Time: {elapsed_time/60:.1f} min")

if quality_scores:
    avg_q = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average quality: {avg_q:.1f}/100")

    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT")
    print("=" * 70)
    for fname, score in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        status = "‚úÖ" if score >= 50 else "‚ö†Ô∏è"
        print(f"  {status} {Path(fname).name}: {score:.1f}/100")

# Check quarantine
quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n" + "=" * 70)
    print(f"‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    print("=" * 70)
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# 1Ô∏è‚É£2Ô∏è‚É£ GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("=" * 70)

elif changed_files:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"Update YFinance data - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg Q:{avg_q:.0f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")

            for attempt in range(3):
                print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/3)...")
                result = subprocess.run(
                    ["git", "push", "origin", BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print("‚úÖ Successfully pushed to GitHub")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)
        else:
            print("‚ÑπÔ∏è  No changes to commit")

    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        os.chdir(SAVE_FOLDER)

else:
    print("\n‚ÑπÔ∏è No changes to commit")

# ======================================================
# ‚úÖ COMPLETION
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ YFINANCE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Files updated: {len(changed_files)}")
print(f"Quality validated: ‚úÖ")
if quality_scores:
    print(f"Average quality: {avg_q:.1f}/100")
print(f"Status: {'‚úÖ Success' if success_count == len(results) else '‚ö†Ô∏è Partial'}")
print(f"Rate limiter: {rate_limiter.get_stats()['total_requests']} requests")
print("=" * 70)
print("\nüìÅ Clean File Structure:")
print(f"   YFinance: {CSV_FOLDER}")
print(f"   ‚îî‚îÄ‚îÄ EUR_USD_1d_5y.csv, EUR_USD_1h_2y.csv, etc.")
print(f"   Alpha Vantage: {SAVE_FOLDER / 'data' / 'raw' / 'alpha_vantage'}")
print(f"   ‚îî‚îÄ‚îÄ EUR_USD_daily_av.csv")
print("\nüéØ All data sources in organized folders!")
print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
FX CSV Combiner + Multi-Type Handler - CLEAN STRUCTURE EDITION
==============================================================
‚úÖ Aligned with clean repo structure (data/raw/, data/processed/)
‚úÖ Combines Alpha Vantage + YFinance data
‚úÖ Full-dataset indicator calculation (not incremental)
‚úÖ ATR preservation (no clipping or scaling)
‚úÖ Quality validation before processing
‚úÖ Multi-environment support (Colab, GHA, Local)
"""

import os
import time
import hashlib
import subprocess
import shutil
import urllib.parse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
import ta
from ta.momentum import WilliamsRIndicator
from ta.volatility import AverageTrueRange
import warnings

warnings.filterwarnings('ignore')

print("=" * 70)
print("üîß CSV Combiner & Multi-Type Handler - Clean Structure Edition")
print("=" * 70)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ UNIFIED PATH CONFIGURATION (MATCHES CLEAN STRUCTURE!)
# ======================================================
if IN_COLAB:
    print("‚òÅÔ∏è Google Colab detected - using clean structure")
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    print("ü§ñ GitHub Actions detected - using repository root")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    print("üíª Local environment detected - using clean structure")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# ‚úÖ CREATE ORGANIZED DIRECTORY STRUCTURE
DIRECTORIES = {
    "data_raw_yfinance": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_raw_alpha": SAVE_FOLDER / "data" / "raw" / "alpha_vantage",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "combiner",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Export key paths
YFINANCE_CSV_FOLDER = DIRECTORIES["data_raw_yfinance"]
ALPHA_CSV_FOLDER = DIRECTORIES["data_raw_alpha"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üì¶ Repo Folder: {REPO_FOLDER}")
print(f"üìä YFinance CSV: {YFINANCE_CSV_FOLDER}")
print(f"üìä Alpha CSV: {ALPHA_CSV_FOLDER}")
print(f"üîß Processed: {PICKLE_FOLDER}")
print(f"üóëÔ∏è Quarantine: {QUARANTINE_FOLDER}")
print("=" * 70)

lock = threading.Lock()

def print_status(msg, level="info"):
    """Print status messages with icons"""
    levels = {"info": "‚ÑπÔ∏è", "success": "‚úÖ", "warn": "‚ö†Ô∏è", "error": "‚ùå", "debug": "üêû"}
    print(f"{levels.get(level, '‚ÑπÔ∏è')} {msg}")

# ======================================================
# 3Ô∏è‚É£ DATA QUALITY VALIDATOR
# ======================================================
class DataQualityValidator:
    """Validate data quality for OHLC files"""

    MIN_ROWS = 10
    MIN_PRICE_CV = 0.01  # 0.01% minimum (relaxed)
    MIN_UNIQUE_RATIO = 0.005  # 0.5% unique prices (relaxed)
    MIN_TRUE_RANGE = 1e-10
    MIN_QUALITY_SCORE = 20.0  # Relaxed from 30

    @staticmethod
    def validate_dataframe(df, filename):
        """Validate DataFrame quality"""
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            issues.append(f"Too few rows: {len(df)}")

        required_cols = ['open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            issues.append(f"Missing columns: {missing_cols}")
            return False, 0.0, metrics, issues

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            issues.append("No valid OHLC data")
            return False, 0.0, metrics, issues

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean'] * 100) if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))

        quality_score = 0.0
        quality_score += metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 0.5:
            quality_score += 40
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 0.5) * 40

        if metrics['unique_ratio'] >= 0.1:
            quality_score += 30
        elif metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO:
            quality_score += (metrics['unique_ratio'] / 0.1) * 30

        metrics['quality_score'] = quality_score

        is_valid = (
            quality_score >= DataQualityValidator.MIN_QUALITY_SCORE and
            metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV
        )

        if not is_valid:
            if metrics['price_cv'] < DataQualityValidator.MIN_PRICE_CV:
                issues.append(f"Low CV: {metrics['price_cv']:.4f}%")
            if metrics['unique_ratio'] < DataQualityValidator.MIN_UNIQUE_RATIO:
                issues.append(f"Low unique: {metrics['unique_ratio']:.3%}")

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 4Ô∏è‚É£ GIT CONFIGURATION
# ======================================================
GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secrets")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not access Colab secrets: {e}")

if FOREX_PAT:
    subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
                   capture_output=True, check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
                   capture_output=True, check=False)
    print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 5Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return pd.DataFrame()

    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_localize(None)

    return df

def safe_numeric(df):
    """Handle infinity/NaN robustly"""
    df_clean = df.copy()
    df_clean.replace([np.inf, -np.inf], np.nan, inplace=True)

    required_columns = ['open', 'high', 'low', 'close']
    existing_columns = [col for col in required_columns if col in df_clean.columns]

    if existing_columns:
        df_clean.dropna(subset=existing_columns, inplace=True)
    else:
        df_clean.dropna(how='all', inplace=True)

    return df_clean

# ======================================================
# 6Ô∏è‚É£ CSV DISCOVERY
# ======================================================
def discover_csv_files():
    """Discover CSV files from both YFinance and Alpha Vantage folders"""
    csv_files = []

    # Search in YFinance folder
    yf_files = list(YFINANCE_CSV_FOLDER.glob("*.csv"))
    if yf_files:
        print_status(f"üìÇ Found {len(yf_files)} YFinance CSV(s)", "debug")
        csv_files.extend(yf_files)

    # Search in Alpha Vantage folder
    alpha_files = list(ALPHA_CSV_FOLDER.glob("*.csv"))
    if alpha_files:
        print_status(f"üìÇ Found {len(alpha_files)} Alpha Vantage CSV(s)", "debug")
        csv_files.extend(alpha_files)

    return csv_files

# ======================================================
# 7Ô∏è‚É£ INDICATOR CALCULATION (FULL DATASET)
# ======================================================
def add_indicators_full(df):
    """
    ‚úÖ Calculate indicators on FULL dataset (not incremental)
    ‚úÖ ATR preserved without clipping or scaling
    """
    if df.empty:
        return None

    required_cols = ['open', 'high', 'low', 'close']
    if not all(col in df.columns for col in required_cols):
        return None

    df = safe_numeric(df)
    if df.empty:
        return None

    df = df.copy()
    df.sort_index(inplace=True)

    # Preserve raw prices
    for col in ['open', 'high', 'low', 'close']:
        if col in df.columns and f'raw_{col}' not in df.columns:
            df[f'raw_{col}'] = df[col].copy()

    print_status(f"  üîß Calculating indicators on {len(df)} rows", "debug")

    try:
        # Trend indicators
        if len(df) >= 10:
            df['SMA_10'] = ta.trend.sma_indicator(df['close'], 10)
            df['EMA_10'] = ta.trend.ema_indicator(df['close'], 10)

        if len(df) >= 20:
            df['SMA_20'] = ta.trend.sma_indicator(df['close'], 20)
            df['EMA_20'] = ta.trend.ema_indicator(df['close'], 20)

        if len(df) >= 50:
            df['SMA_50'] = ta.trend.sma_indicator(df['close'], 50)
            df['EMA_50'] = ta.trend.ema_indicator(df['close'], 50)

        if len(df) >= 200:
            df['SMA_200'] = ta.trend.sma_indicator(df['close'], 200)

        # MACD
        if len(df) >= 26:
            macd = ta.trend.MACD(df['close'])
            df['MACD'] = macd.macd()
            df['MACD_signal'] = macd.macd_signal()
            df['MACD_diff'] = macd.macd_diff()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Trend indicator error: {e}", "warn")

    try:
        # Momentum indicators
        if len(df) >= 14:
            df['RSI_14'] = ta.momentum.rsi(df['close'], 14)
            df['Williams_%R'] = WilliamsRIndicator(
                df['high'], df['low'], df['close'], 14
            ).williams_r()
            df['Stoch_K'] = ta.momentum.stoch(df['high'], df['low'], df['close'], 14)
            df['Stoch_D'] = ta.momentum.stoch_signal(df['high'], df['low'], df['close'], 14)

        if len(df) >= 20:
            df['CCI_20'] = ta.trend.cci(df['high'], df['low'], df['close'], 20)
            df['ROC'] = ta.momentum.roc(df['close'], 12)

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Momentum indicator error: {e}", "warn")

    try:
        # ‚úÖ CRITICAL: ATR calculation - NO CLIPPING!
        if len(df) >= 14:
            atr_values = AverageTrueRange(
                df['high'], df['low'], df['close'], 14
            ).average_true_range()

            # Only fill NaN, don't clip
            df['ATR'] = atr_values.fillna(1e-10)

            atr_median = df['ATR'].median()
            if pd.notna(atr_median):
                print_status(f"  üìä ATR median: {atr_median:.8f}", "debug")

        # Bollinger Bands
        if len(df) >= 20:
            bb = ta.volatility.BollingerBands(df['close'], 20, 2)
            df['BB_upper'] = bb.bollinger_hband()
            df['BB_middle'] = bb.bollinger_mavg()
            df['BB_lower'] = bb.bollinger_lband()
            df['BB_width'] = bb.bollinger_wband()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Volatility indicator error: {e}", "warn")

    try:
        # Derived features
        df['price_change'] = df['close'].pct_change()
        df['price_change_5'] = df['close'].pct_change(5)
        df['high_low_range'] = (df['high'] - df['low']) / df['close']
        df['close_open_range'] = (df['close'] - df['open']) / df['open']

        if 'volume' in df.columns:
            df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()

        if 'SMA_50' in df.columns:
            df['price_vs_sma50'] = (df['close'] - df['SMA_50']) / df['SMA_50']

        if 'RSI_14' in df.columns:
            df['rsi_momentum'] = df['RSI_14'].diff()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Derived features error: {e}", "warn")

    try:
        # ‚úÖ Scale features but PROTECT ATR and raw prices
        numeric_cols = df.select_dtypes(include=[np.number]).columns

        protected_cols = [
            'open', 'high', 'low', 'close', 'volume',
            'raw_open', 'raw_high', 'raw_low', 'raw_close',
            'ATR'  # ‚úÖ PROTECT ATR!
        ]

        scalable_cols = [c for c in numeric_cols if c not in protected_cols]

        if scalable_cols:
            df[scalable_cols] = df[scalable_cols].replace([np.inf, -np.inf], np.nan)
            cols_with_data = [c for c in scalable_cols if not df[c].isna().all()]

            if cols_with_data:
                scaler = RobustScaler()
                df[cols_with_data] = scaler.fit_transform(
                    df[cols_with_data].fillna(0) + 1e-10
                )
                print_status(f"  ‚úÖ Scaled {len(cols_with_data)} features (ATR protected)", "debug")

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Scaling error: {e}", "warn")

    return df

# ======================================================
# 8Ô∏è‚É£ MAIN PROCESSING FUNCTION
# ======================================================
def process_csv_file(csv_file):
    """Process a single CSV file: validate, combine, add indicators, save"""
    try:
        print_status(f"üìã Processing: {csv_file.name}", "info")

        # Load CSV
        df = pd.read_csv(csv_file, index_col=0, parse_dates=True)
        df = ensure_tz_naive(df)

        if df.empty:
            msg = f"‚ö†Ô∏è {csv_file.name}: Empty file"
            print_status(msg, "warn")
            return None, msg

        # ‚úÖ VALIDATE QUALITY
        is_valid, quality_score, metrics, issues = validator.validate_dataframe(df, csv_file.name)

        print_status(f"  üìä Quality score: {quality_score:.1f}/100", "debug")

        if not is_valid:
            print_status(f"  ‚ö†Ô∏è Quality issues: {'; '.join(issues[:2])}", "warn")

            # Quarantine if too low
            if quality_score < validator.MIN_QUALITY_SCORE:
                print_status(f"  ‚ùå Quarantining low quality file", "error")

                quarantine_file = QUARANTINE_FOLDER / f"{csv_file.name}.bad"
                with lock:
                    df.to_csv(quarantine_file)

                    report_file = QUARANTINE_FOLDER / f"{csv_file.name}.quality.txt"
                    with open(report_file, 'w') as f:
                        f.write(f"Quality Report for {csv_file.name}\n")
                        f.write(f"{'='*50}\n")
                        f.write(f"Quality Score: {quality_score:.1f}/100\n")
                        f.write(f"Issues: {'; '.join(issues)}\n")
                        f.write(f"\nMetrics:\n")
                        for k, v in metrics.items():
                            f.write(f"  {k}: {v}\n")

                return None, f"‚ùå {csv_file.name}: Quarantined (Q:{quality_score:.1f})"
            else:
                print_status(f"  ‚ö†Ô∏è Low quality but acceptable", "warn")

        # ‚úÖ ADD INDICATORS (FULL DATASET)
        processed_df = add_indicators_full(df)

        if processed_df is None:
            msg = f"‚ùå {csv_file.name}: Indicator calculation failed"
            print_status(msg, "error")
            return None, msg

        # ‚úÖ SAVE PROCESSED DATA
        pickle_filename = csv_file.stem + ".pkl"
        pickle_path = PICKLE_FOLDER / pickle_filename

        with lock:
            processed_df.to_pickle(pickle_path, compression='gzip', protocol=4)

        atr_median = processed_df['ATR'].median() if 'ATR' in processed_df.columns else 0
        msg = f"‚úÖ {csv_file.name}: {len(processed_df)} rows, Q:{quality_score:.0f}, ATR:{atr_median:.8f}"
        print_status(msg, "success")

        return str(pickle_path), msg

    except Exception as e:
        msg = f"‚ùå Failed {csv_file.name}: {e}"
        print_status(msg, "error")
        import traceback
        traceback.print_exc()
        return None, msg

# ======================================================
# 9Ô∏è‚É£ MAIN EXECUTION
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Discovering CSV files...")
print("=" * 70 + "\n")

csv_files = discover_csv_files()

if csv_files:
    print_status(f"üìä Total CSV files found: {len(csv_files)}", "success")
    for csv_file in csv_files[:5]:
        print_status(f"  ‚Ä¢ {csv_file.name} ({csv_file.stat().st_size / 1024:.1f} KB)", "debug")
    if len(csv_files) > 5:
        print_status(f"  ... and {len(csv_files) - 5} more", "debug")
else:
    print_status("‚ö†Ô∏è No CSV files found!", "warn")
    print_status("   Check that data fetchers have run successfully", "warn")

changed_files = []
quality_scores = {}

# ======================================================
# üîü PROCESS FILES
# ======================================================
if csv_files:
    print("\n" + "=" * 70)
    print(f"‚öôÔ∏è Processing {len(csv_files)} CSV file(s)...")
    print("=" * 70 + "\n")

    with ThreadPoolExecutor(max_workers=min(8, len(csv_files))) as executor:
        futures = [executor.submit(process_csv_file, f) for f in csv_files]

        for future in as_completed(futures):
            file, msg = future.result()
            if file:
                changed_files.append(file)
                # Extract quality info
                if "ATR:" in msg:
                    try:
                        atr_str = msg.split("ATR:")[1].strip()
                        quality_scores[file] = float(atr_str)
                    except:
                        pass

# ======================================================
# 1Ô∏è‚É£1Ô∏è‚É£ QUALITY REPORT
# ======================================================
if quality_scores:
    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT - ATR VALUES")
    print("=" * 70)

    avg_atr = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average ATR: {avg_atr:.8f}")
    print(f"\nATR by file:")

    for filepath, atr in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        filename = Path(filepath).stem
        status = "‚úÖ" if atr > 1e-6 else "‚ö†Ô∏è"
        print(f"  {status} {filename}: {atr:.8f}")

    low_atr_files = [f for f, atr in quality_scores.items() if atr < 1e-6]
    if low_atr_files:
        print(f"\n‚ö†Ô∏è  {len(low_atr_files)} file(s) with suspiciously low ATR")

# Check quarantine
quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n" + "=" * 70)
    print(f"‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    print("=" * 70)
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# 1Ô∏è‚É£2Ô∏è‚É£ GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("=" * 70)

elif changed_files and FOREX_PAT:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"Update processed data - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg ATR: {avg_atr:.6f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print_status("‚úÖ Changes committed", "success")

            for attempt in range(3):
                print_status(f"üì§ Pushing (attempt {attempt + 1}/3)...", "info")
                result = subprocess.run(
                    ["git", "push", "origin", BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print_status("‚úÖ Push successful", "success")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)

        elif "nothing to commit" in result.stdout.lower():
            print_status("‚ÑπÔ∏è No changes to commit", "info")

    except Exception as e:
        print_status(f"‚ùå Git error: {e}", "error")
    finally:
        os.chdir(SAVE_FOLDER)

# ======================================================
# ‚úÖ COMPLETION SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ CSV COMBINER COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"CSV files found: {len(csv_files)}")
print(f"Files processed: {len(changed_files)}")
print(f"Files quarantined: {len(quarantined)}")

if quality_scores:
    print(f"\nüìà ATR Statistics:")
    print(f"   Average: {avg_atr:.8f}")
    print(f"   Files analyzed: {len(quality_scores)}")

print("\nüîß KEY FEATURES:")
print("   ‚úÖ Full-dataset indicator calculation")
print("   ‚úÖ ATR preserved (no clipping/scaling)")
print("   ‚úÖ Quality validation with quarantine")
print("   ‚úÖ Clean organized structure")
print("   ‚úÖ Thread-safe processing")

print("\nüìÅ Output Locations:")
print(f"   Processed pickles: {PICKLE_FOLDER}")
print(f"   Quarantine: {QUARANTINE_FOLDER}")

print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
ULTRA-PERSISTENT SELF-LEARNING FX PIPELINE v5.0
================================================
üéâ ZERO CORRUPTION GUARANTEE - No model files saved!

KEY CHANGES FROM v4.3:
‚úÖ Models rebuilt fresh from data each run (no pickle files)
‚úÖ No file corruption possible (no model file I/O)
‚úÖ No Git conflicts (no model files to commit)
‚úÖ Simpler code, fewer bugs
‚úÖ Always fresh predictions from latest data
‚úÖ Works perfectly in GitHub Actions, Colab, and Local

PERFORMANCE:
- Fast: SGD trains in seconds, RF limited to 50 trees
- Memory efficient: Models exist only during runtime
- Scalable: Processes 24+ pairs in under a minute
"""

import os
import time
import json
import sqlite3
import subprocess
import pickle
import gzip
from pathlib import Path
from datetime import datetime, timezone, timedelta
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

print("=" * 70)
print("üöÄ Ultra-Persistent FX Pipeline v5.0 - CORRUPTION-FREE")
print("=" * 70)

# ======================================================
# SIMPLE DATA LOADER (NO MODEL SAVING/LOADING)
# ======================================================

class SimpleDataLoader:
    """
    Loads data pickles only (not models)
    Models are rebuilt fresh each run - no corruption possible!
    """

    @staticmethod
    def load_data(filepath):
        """Load data pickle with basic validation"""
        if not filepath.exists():
            return None

        try:
            # Check if gzipped
            with open(filepath, 'rb') as f:
                magic = f.read(2)

            # Load appropriately
            if magic == b'\x1f\x8b':  # gzip magic
                with gzip.open(filepath, 'rb') as f:
                    return pickle.load(f)
            else:  # raw pickle
                with open(filepath, 'rb') as f:
                    return pickle.load(f)

        except Exception as e:
            print(f"‚ö†Ô∏è  Cannot load {filepath.name}: {e}")
            return None

# Global loader instance
data_loader = SimpleDataLoader()

# ======================================================
# ENVIRONMENT DETECTION
# ======================================================

try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# Path configuration
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

DIRECTORIES = {
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

for dir_path in DIRECTORIES.values():
    dir_path.mkdir(parents=True, exist_ok=True)

PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_FOLDER = DIRECTORIES["database"]
PERSISTENT_DB = DB_FOLDER / "memory_v85.db"

print(f"üìÇ Base: {BASE_FOLDER}")
print(f"üíæ Save: {SAVE_FOLDER}")
print(f"üìä Data: {PICKLE_FOLDER}")
print(f"üíø Database: {PERSISTENT_DB}")
print("=" * 70)

# ======================================================
# CLEANUP OLD MODEL FILES (ONE-TIME)
# ======================================================

def cleanup_old_model_files():
    """
    Delete old model pickle files - we don't use them anymore!
    This runs once on startup to clean up legacy files
    """
    print("\nüßπ Cleaning up old model files...")

    deleted = 0
    patterns = ['*_sgd_model.pkl', '*_rf_model.pkl', '*_model.pkl']

    for pattern in patterns:
        for model_file in PICKLE_FOLDER.glob(pattern):
            try:
                model_file.unlink()
                deleted += 1
            except Exception:
                pass

    # Clean up corrupted folder
    corrupted_folder = PICKLE_FOLDER / "corrupted"
    if corrupted_folder.exists():
        try:
            import shutil
            shutil.rmtree(corrupted_folder)
        except Exception:
            pass

    if deleted > 0:
        print(f"   ‚úì Cleaned up {deleted} old model files")
    else:
        print(f"   ‚úì No old model files found")

cleanup_old_model_files()

# ======================================================
# UTILITY FUNCTIONS
# ======================================================

def is_weekend(dt=None):
    """Check if it's weekend (market closed)"""
    if dt is None:
        dt = datetime.now(timezone.utc)
    return dt.weekday() in [5, 6]

def get_trade_age_hours():
    """Get trade age threshold based on market hours"""
    return 0.5 if is_weekend() else 2.0

def is_market_open_for_trading():
    """Check if market is open"""
    return not is_weekend()

def print_status(msg, level="info"):
    """Print status with icon"""
    icons = {
        "info": "‚ÑπÔ∏è", "success": "‚úÖ", "warn": "‚ö†Ô∏è", "debug": "üêû",
        "error": "‚ùå", "data": "üìä", "weekend": "üèñÔ∏è", "trading": "üíπ"
    }
    icon = icons.get(level, '‚ÑπÔ∏è')
    print(f"{icon} {msg}")

# ======================================================
# GIT CONFIGURATION
# ======================================================

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
FOREX_PAT = os.environ.get("FOREX_PAT")

if FOREX_PAT:
    subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
                   capture_output=True, check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
                   capture_output=True, check=False)
    print_status(f"Git configured: {GIT_USER_NAME}", "success")

# ======================================================
# ML IMPORTS
# ======================================================

try:
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.linear_model import SGDClassifier
    from sklearn.ensemble import RandomForestClassifier
    print_status("ML libraries loaded", "success")
except ImportError as e:
    print_status(f"ML libraries missing: {e}", "error")
    raise

print("=" * 70)

# ======================================================
# FRESH MODEL TRAINING (NO FILE I/O)
# ======================================================

def train_and_predict_fresh(df, pair_name, timeframe):
    """
    Train models from scratch using data

    No file saving = No corruption possible!
    This is fast because:
    - SGD trains incrementally (seconds)
    - RF limited to 50 trees
    - Only processes recent data

    Args:
        df: DataFrame with features and price data
        pair_name: e.g. "EUR/USD"
        timeframe: e.g. "1h"

    Returns:
        (sgd_pred, rf_pred, confidence) or (None, None, 0.5) on error
    """
    try:
        # Prepare features
        exclude_cols = [
            'close', 'raw_close', 'raw_open', 'raw_high', 'raw_low',
            'open', 'high', 'low', 'volume', 'vwap'
        ]

        feature_cols = [c for c in df.columns if c not in exclude_cols]

        if not feature_cols or len(df) < 50:
            return None, None, 0.5

        X = df[feature_cols].fillna(0)
        y = (df['close'].diff() > 0).astype(int).fillna(0)

        # Train SGDClassifier (fast, incremental learning)
        sgd = SGDClassifier(
            max_iter=1000,
            tol=1e-3,
            random_state=42,
            warm_start=False
        )
        sgd.fit(X, y)
        sgd_pred = int(sgd.predict(X.iloc[[-1]])[0])

        # Train RandomForest (limited trees for speed)
        rf = RandomForestClassifier(
            n_estimators=50,
            max_depth=10,
            class_weight='balanced',
            random_state=42,
            n_jobs=-1
        )
        rf.fit(X, y)
        rf_pred = int(rf.predict(X.iloc[[-1]])[0])

        # Calculate confidence
        confidence = (sgd_pred + rf_pred) / 2.0

        return sgd_pred, rf_pred, confidence

    except Exception as e:
        print_status(f"Training error for {pair_name} {timeframe}: {e}", "debug")
        return None, None, 0.5

# ======================================================
# PROCESS SINGLE PICKLE FILE
# ======================================================

def process_pickle_file(pickle_path):
    """
    Process data pickle and generate trading signals

    Args:
        pickle_path: Path to data pickle file

    Returns:
        (pair, signal_data, aggregated_signal)
    """
    filename = pickle_path.stem

    # Extract currency pair
    currencies = ['EUR', 'USD', 'GBP', 'JPY', 'AUD', 'NZD', 'CAD', 'CHF']
    pair = None

    for curr1 in currencies:
        for curr2 in currencies:
            if curr1 != curr2 and filename.startswith(f"{curr1}_{curr2}"):
                pair = f"{curr1}/{curr2}"
                break
        if pair:
            break

    if not pair:
        return None, {}, "HOLD"

    # Extract timeframe from filename
    fname_lower = filename.lower()
    if "1d" in fname_lower or "daily" in fname_lower:
        timeframe = "1d"
    elif "1h" in fname_lower:
        timeframe = "1h"
    elif "15m" in fname_lower:
        timeframe = "15m"
    elif "5m" in fname_lower:
        timeframe = "5m"
    elif "1m" in fname_lower:
        timeframe = "1m"
    else:
        timeframe = "unknown"

    try:
        # Load data (only disk operation)
        df = data_loader.load_data(pickle_path)

        if df is None or df.empty:
            return pair, {}, "HOLD"

        # Get current price
        current_price = df['raw_close'].iloc[-1] if 'raw_close' in df.columns else df['close'].iloc[-1]

        # Calculate Stop Loss and Take Profit
        if 'ATR' in df.columns:
            atr = df['ATR'].iloc[-1]
            mult = 2.0
            sl = max(0, round(current_price - atr * mult, 5))
            tp = round(current_price + atr * mult, 5)
        else:
            atr_fallback = current_price * 0.01
            sl = max(0, round(current_price - atr_fallback * 2, 5))
            tp = round(current_price + atr_fallback * 2, 5)

        # Train fresh models and predict
        sgd_pred, rf_pred, confidence = train_and_predict_fresh(df, pair, timeframe)

        if sgd_pred is None:
            return pair, {}, "HOLD"

        # Ensemble prediction (majority vote)
        ensemble_pred = 1 if (sgd_pred + rf_pred) >= 1 else 0

        signal_data = {
            "signal": ensemble_pred,
            "sgd_pred": sgd_pred,
            "rf_pred": rf_pred,
            "live": current_price,
            "SL": sl,
            "TP": tp,
            "confidence": confidence,
            "timeframe": timeframe
        }

        # Print signal
        print(f"{'‚úì':2} {pair:8} | {timeframe:3} | Ens:{ensemble_pred} (SGD:{sgd_pred} RF:{rf_pred}) | Price:{current_price:.5f}")

        return pair, {timeframe: signal_data}, "LONG" if ensemble_pred == 1 else "SHORT"

    except Exception as e:
        print_status(f"Error processing {pickle_path.name}: {e}", "error")
        return pair, {}, "HOLD"

# ======================================================
# MAIN PIPELINE EXECUTION
# ======================================================

def main():
    """
    Main pipeline execution
    Processes all data pickles and generates trading signals
    """
    print_status("Starting Ultra-Persistent Pipeline v5.0", "success")
    print()

    # Find data pickle files
    pickle_files = list(PICKLE_FOLDER.glob("*.pkl"))

    # Exclude old model files (shouldn't exist but just in case)
    pickle_files = [f for f in pickle_files
                   if not any(suffix in f.name for suffix in
                             ['_sgd_model', '_rf_model', 'indicator_cache'])]

    if not pickle_files:
        print_status("No data pickles found!", "warn")
        return {}

    print_status(f"Found {len(pickle_files)} data files", "success")
    print()

    # Process all pickle files
    signals = {}

    for pkl_file in pickle_files:
        pair, pair_signals, agg = process_pickle_file(pkl_file)

        if pair and pair_signals:
            if pair not in signals:
                signals[pair] = {"signals": {}, "aggregated": "HOLD"}

            signals[pair]["signals"].update(pair_signals)

            if agg != "HOLD":
                signals[pair]["aggregated"] = agg

    print()
    print_status(f"Generated signals for {len(signals)} pairs", "success")

    return signals

# ======================================================
# ENTRY POINT
# ======================================================

if __name__ == "__main__":
    try:
        start_time = time.time()

        signals = main()

        elapsed = time.time() - start_time

        print()
        print("=" * 70)
        print(f"‚úÖ Pipeline completed in {elapsed:.2f}s")
        print("üéâ NO CORRUPTION POSSIBLE - Models built fresh from data!")
        print("=" * 70)

        # Optional: Save signals to JSON for reference
        if signals:
            output_file = DIRECTORIES["outputs"] / f"signals_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            with open(output_file, 'w') as f:
                json.dump(signals, f, indent=2)
            print(f"üìÑ Signals saved to: {output_file.name}")

    except Exception as e:
        print_status(f"Pipeline error: {e}", "error")
        import traceback
        traceback.print_exc()

In [None]:
#!/usr/bin/env python3
"""
TRADE BEACON v20.1 OPTIMIZED - Research-Backed RL Trading System
================================================================
üéì Research-Backed: RBED (arXiv:1910.13701), Balanced Rewards, Conservative Replay
üß† Deep Q-Learning with Experience Replay & Pipeline Integration
‚ö° Optimized: 40% shorter code, 100% functionality preserved
"""
import os, sys, json, gzip, random, re, smtplib, subprocess, logging, warnings, shutil, sqlite3
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timezone
from collections import defaultdict, deque
from dataclasses import dataclass, field, asdict
from typing import Dict, List, Tuple, Any, Optional
from contextlib import contextmanager
import numpy as np
import pandas as pd
import requests

warnings.filterwarnings('ignore')

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# ENVIRONMENT & CONFIG
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
try:
    import google.colab
    IN_COLAB, IN_GHA, ENV_NAME = True, False, "Colab"
except ImportError:
    IN_COLAB, IN_GHA = False, "GITHUB_ACTIONS" in os.environ
    ENV_NAME = "GHA" if IN_GHA else "Local"

BASE = Path("/content" if IN_COLAB else Path.cwd())
SAVE = BASE if IN_GHA else (BASE / "forex-ai-models" if IN_COLAB else BASE)
DIRS = {k: SAVE / v for k, v in {"data": "data/processed", "db": "database", "logs": "logs",
    "out": "outputs", "state": "omega_state", "rl": "rl_memory", "backup": "backups"}.items()}
for d in DIRS.values(): d.mkdir(parents=True, exist_ok=True)

# Paths
DB_FILE = DIRS["db"] / "memory_v85.db"
RL_MEM = DIRS["rl"] / "experience_replay.json.gz"
RL_STATS = DIRS["rl"] / "learning_stats.json"
RL_WEIGHTS = DIRS["rl"] / "network_weights.json"
SIGNALS = DIRS["out"] / "omega_signals.json"
ITER_FILE = DIRS["state"] / "omega_iteration.json"
TRADES = DIRS["rl"] / "trade_history.json"
VERSION_FILE = DIRS["rl"] / "version.txt"

logging.basicConfig(filename=str(DIRS["logs"] / f"beacon_{datetime.now():%Y%m%d_%H%M%S}.log"),
                   level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')

def log(msg, lvl="info"):
    ico = {"info":"‚ÑπÔ∏è","success":"‚úÖ","warn":"‚ö†Ô∏è","error":"‚ùå","rocket":"üöÄ","brain":"üß†","money":"üí∞","db":"üíæ"}
    getattr(logging, "warning" if lvl=="warn" else lvl, logging.info)(msg)
    print(f"{ico.get(lvl,'‚ÑπÔ∏è')} {msg}")

# Config
GH_USER, GH_REPO = "rahim-dotAI", "forex-ai-models"
PAT = os.getenv("FOREX_PAT", "").strip()
if not PAT and IN_COLAB:
    try:
        from google.colab import userdata
        PAT = userdata.get("FOREX_PAT")
        if PAT: os.environ["FOREX_PAT"] = PAT
    except: pass

GMAIL = os.getenv("GMAIL_USER", "nakatonabira3@gmail.com")
GMAIL_PWD = os.getenv("GMAIL_APP_PASSWORD", "").strip() or "gmwohahtltmcewug"
BROWSER_TOKEN = os.getenv("BROWSERLESS_TOKEN", "")

PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
ATR_PER, MIN_ATR, EPS = 14, 1e-5, 1e-8
CAPITAL, RISK, MAX_POS, MAX_CAP = 100, 0.02, 2, 10.0
STATE_SIZE, ACTIONS = 30, 3

# Research-Backed Hyperparameters
LR, GAMMA, TARGET_UPD, BATCH, MEM_CAP = 0.0007, 0.93, 50, 96, 15000
PROFIT_SCALE, LOSS_SCALE, WIN_BONUS, LOSS_PEN, SHARPE_SCALE = 250.0, 125.0, 30.0, 25.0, 40.0
ATR_SL, ATR_TP = 2.0, 2.5

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PERSISTENCE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class Persist:
    @staticmethod
    def save(path: Path, data: Any, compress=True) -> bool:
        try:
            if path.exists():
                backup = DIRS["backup"] / f"{path.stem}_backup{path.suffix}"
                try: shutil.copy2(path, backup)
                except: pass
            tmp = path.parent / f".tmp_{path.name}"
            opener = gzip.open if compress else open
            mode = 'wt' if compress else 'w'
            with opener(tmp, mode, encoding='utf-8') as f:
                json.dump(data, f, indent=2, default=str)
            tmp.replace(path)
            return True
        except Exception as e:
            log(f"Save failed {path.name}: {e}", "error")
            if tmp.exists(): tmp.unlink(missing_ok=True)
            return False

    @staticmethod
    def load(path: Path, default=None, compress=True) -> Any:
        if not path.exists():
            backup = DIRS["backup"] / f"{path.stem}_backup{path.suffix}"
            path = backup if backup.exists() else path
            if not path.exists(): return default
        try:
            opener = gzip.open if compress else open
            mode = 'rt' if compress else 'r'
            with opener(path, mode, encoding='utf-8') as f:
                return json.load(f)
        except:
            return default

P = Persist()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# UTILITIES
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
is_weekend = lambda: datetime.now().weekday() in [5, 6]
get_mode = lambda: "WEEKEND_LEARNING" if is_weekend() else "LIVE_TRADING"

def load_iter():
    data = P.load(ITER_FILE, compress=False)
    if not data or not isinstance(data, dict) or 'total' not in data:
        return {'total': 0, 'start_date': datetime.now(timezone.utc).isoformat(), 'history': []}
    return data

def inc_iter():
    data = load_iter()
    data['total'] += 1
    data['last_update'] = datetime.now(timezone.utc).isoformat()
    data['history'].append({'iteration': data['total'], 'timestamp': datetime.now(timezone.utc).isoformat(),
                           'env': ENV_NAME, 'mode': get_mode()})
    if len(data['history']) > 1000: data['history'] = data['history'][-1000:]
    P.save(ITER_FILE, data, compress=False)
    return data['total']

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# DATA CLASSES
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
@dataclass
class Experience:
    state: List[float]
    action: int
    reward: float
    next_state: List[float]
    done: bool
    metadata: Dict = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    def to_dict(self): return asdict(self)
    @classmethod
    def from_dict(cls, d): return cls(**d)

@dataclass
class TradeOutcome:
    pair: str
    action: str
    entry_price: float
    exit_price: float
    sl: float
    tp: float
    position_size: float
    pnl: float
    duration: float
    hit_tp: bool
    timestamp_entry: str
    timestamp_exit: str
    state_at_entry: List[float]
    confidence: float
    regime: str
    session: str

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# PIPELINE DATABASE
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class PipelineDB:
    def __init__(self):
        self.conn = None
        if not DB_FILE.exists():
            log(f"Pipeline DB missing: {DB_FILE}", "warn")
            return
        try:
            self.conn = sqlite3.connect(str(DB_FILE), timeout=30, check_same_thread=False)
            log("Pipeline DB connected", "db")
        except Exception as e:
            log(f"DB connect failed: {e}", "error")

    @contextmanager
    def cursor(self):
        if not self.conn:
            yield None
            return
        c = self.conn.cursor()
        try: yield c
        finally: c.close()

    def get_trades(self, since=None, limit=1000):
        if not self.conn: return []
        try:
            with self.cursor() as c:
                q = '''SELECT pair, timeframe, model_used, entry_price, exit_price, sl_price, tp_price,
                       prediction, hit_tp, pnl, pnl_percent, duration_hours, created_at, evaluated_at
                       FROM completed_trades'''
                if since:
                    c.execute(q + ' WHERE evaluated_at > ? ORDER BY evaluated_at DESC LIMIT ?', (since, limit))
                else:
                    c.execute(q + ' ORDER BY evaluated_at DESC LIMIT ?', (limit,))
                return c.fetchall()
        except Exception as e:
            log(f"Fetch trades failed: {e}", "warn")
            return []

    def get_stats(self):
        if not self.conn: return {}
        try:
            with self.cursor() as c:
                c.execute('''SELECT COUNT(*) as total, SUM(CASE WHEN hit_tp THEN 1 ELSE 0 END) as wins,
                    SUM(pnl) as pnl, AVG(pnl) as avg, MAX(evaluated_at) as last FROM completed_trades''')
                r = c.fetchone()
                if r:
                    return {'total_trades': r[0] or 0, 'wins': r[1] or 0, 'total_pnl': r[2] or 0.0,
                           'avg_pnl': r[3] or 0.0, 'win_rate': (r[1]/r[0]*100) if r[0] else 0.0, 'last_trade': r[4]}
        except: pass
        return {}

    def close(self):
        if self.conn: self.conn.close()

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# TECHNICAL INDICATORS
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def calc_rsi(prices: pd.Series, per=14) -> pd.Series:
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(per, min_periods=1).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(per, min_periods=1).mean()
    return 100 - (100 / (1 + gain / (loss + EPS)))

def calc_macd(prices: pd.Series, fast=12, slow=26, sig=9):
    ema_f = prices.ewm(span=fast, adjust=False).mean()
    ema_s = prices.ewm(span=slow, adjust=False).mean()
    macd = ema_f - ema_s
    signal = macd.ewm(span=sig, adjust=False).mean()
    return macd, signal, macd - signal

def calc_bb(prices: pd.Series, per=20, std=2):
    sma = prices.rolling(per, min_periods=1).mean()
    s = prices.rolling(per, min_periods=1).std()
    return sma + (s * std), sma, sma - (s * std)

def create_state(df_1h: pd.DataFrame, df_1d: pd.DataFrame, pair: str) -> np.ndarray:
    if len(df_1h) < 50 or len(df_1d) < 30: return np.zeros(STATE_SIZE)
    feat = []
    try:
        close = df_1h['close'].iloc[-1]
        h20, l20 = df_1h['high'].iloc[-20:].max(), df_1h['low'].iloc[-20:].min()
        feat.append((close - l20) / (h20 - l20 + EPS))
        feat.extend(df_1h['close'].pct_change().iloc[-5:].values)
        feat.extend([calc_rsi(df_1h['close']).iloc[-1]/100, calc_rsi(df_1d['close']).iloc[-1]/100])
        macd, sig, _ = calc_macd(df_1h['close'])
        feat.extend([np.tanh(macd.iloc[-1]*100), np.tanh(sig.iloc[-1]*100)])
        upper, mid, lower = calc_bb(df_1h['close'])
        feat.extend([(close-lower.iloc[-1])/(upper.iloc[-1]-lower.iloc[-1]+EPS),
                    (upper.iloc[-1]-lower.iloc[-1])/mid.iloc[-1]])
        atr = df_1h['atr'].iloc[-1]
        feat.extend([atr/(df_1h['atr'].rolling(20).mean().iloc[-1]+EPS), df_1h['close'].pct_change().std()*100])
        ema_f = df_1h['close'].ewm(span=12).mean().iloc[-1]
        ema_s = df_1h['close'].ewm(span=26).mean().iloc[-1]
        ema_f1d = df_1d['close'].ewm(span=12).mean().iloc[-1]
        ema_s1d = df_1d['close'].ewm(span=26).mean().iloc[-1]
        feat.extend([(ema_f-ema_s)/ema_s*10, (ema_f1d-ema_s1d)/ema_s1d*10,
                    (df_1h['close'].iloc[-1]-df_1h['close'].iloc[-20])/df_1h['close'].iloc[-20]*10])
        vol = 1.0
        if 'volume' in df_1h.columns and df_1h['volume'].sum() > 0:
            vol = df_1h['volume'].iloc[-5:].mean()/(df_1h['volume'].iloc[-50:].mean()+EPS)
        feat.append(vol)
        h = datetime.now().hour
        feat.extend([1.0 if 0<=h<8 else 0.0, 1.0 if 8<=h<16 else 0.0, 1.0 if 16<=h<24 else 0.0])
        feat.extend([datetime.now().weekday()/6.0, h/23.0])
        closes = df_1h['close'].values[-20:]
        mom = (closes[-1]-closes[-10])/(closes[-10]+EPS)
        vol = np.std(closes)/(np.mean(closes)+EPS)
        trend = (closes[-1]-closes[0])/(closes[0]+EPS)
        feat.extend([np.tanh(mom*10), vol, np.tanh(trend*10), 0.0, 0.0, 0.0])
        feat = feat[:STATE_SIZE]
        while len(feat) < STATE_SIZE: feat.append(0.0)
        return np.array(feat, dtype=np.float32)
    except:
        return np.zeros(STATE_SIZE)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# Q-NETWORK
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class QNet:
    def __init__(self, state_size=STATE_SIZE, action_size=ACTIONS):
        self.ss, self.as_ = state_size, action_size
        h1, h2, h3, h4 = 192, 96, 48, 24
        self.w1 = np.random.randn(state_size, h1) * np.sqrt(1/state_size)
        self.b1 = np.zeros(h1)
        self.w2 = np.random.randn(h1, h2) * np.sqrt(1/h1)
        self.b2 = np.zeros(h2)
        self.w3 = np.random.randn(h2, h3) * np.sqrt(1/h2)
        self.b3 = np.zeros(h3)
        self.w4 = np.random.randn(h3, h4) * np.sqrt(1/h3)
        self.b4 = np.zeros(h4)
        self.w5 = np.random.randn(h4, action_size) * np.sqrt(1/h4)
        self.b5 = np.zeros(action_size)

    def relu(self, x): return np.maximum(0, x)

    def forward(self, s):
        h1 = self.relu(np.dot(s, self.w1) + self.b1)
        h2 = self.relu(np.dot(h1, self.w2) + self.b2)
        h3 = self.relu(np.dot(h2, self.w3) + self.b3)
        h4 = self.relu(np.dot(h3, self.w4) + self.b4)
        return np.dot(h4, self.w5) + self.b5

    def predict(self, s):
        return self.forward(s[0] if s.ndim > 1 else s)

    def update(self, states, targets, lr=LR):
        for s, tgt in zip(states, targets):
            h1 = self.relu(np.dot(s, self.w1) + self.b1)
            h2 = self.relu(np.dot(h1, self.w2) + self.b2)
            h3 = self.relu(np.dot(h2, self.w3) + self.b3)
            h4 = self.relu(np.dot(h3, self.w4) + self.b4)
            q = np.dot(h4, self.w5) + self.b5
            err = np.clip(q - tgt, -1, 1)
            dw5 = np.clip(np.outer(h4, err), -1, 1)
            dh4 = np.dot(err, self.w5.T) * (h4 > 0)
            dw4 = np.clip(np.outer(h3, dh4), -1, 1)
            dh3 = np.dot(dh4, self.w4.T) * (h3 > 0)
            dw3 = np.clip(np.outer(h2, dh3), -1, 1)
            dh2 = np.dot(dh3, self.w3.T) * (h2 > 0)
            dw2 = np.clip(np.outer(h1, dh2), -1, 1)
            dh1 = np.dot(dh2, self.w2.T) * (h1 > 0)
            dw1 = np.clip(np.outer(s, dh1), -1, 1)
            self.w5 -= lr * dw5
            self.b5 -= lr * np.clip(err, -1, 1)
            self.w4 -= lr * dw4
            self.b4 -= lr * np.clip(dh4, -1, 1)
            self.w3 -= lr * dw3
            self.b3 -= lr * np.clip(dh3, -1, 1)
            self.w2 -= lr * dw2
            self.b2 -= lr * np.clip(dh2, -1, 1)
            self.w1 -= lr * dw1
            self.b1 -= lr * np.clip(dh1, -1, 1)

    def clone(self):
        new = QNet(self.ss, self.as_)
        for attr in ['w1','b1','w2','b2','w3','b3','w4','b4','w5','b5']:
            setattr(new, attr, getattr(self, attr).copy())
        return new

    def to_dict(self):
        return {k: getattr(self, k).tolist() for k in ['w1','b1','w2','b2','w3','b3','w4','b4','w5','b5']}

    def from_dict(self, d):
        try:
            for k in ['w1','b1','w2','b2','w3','b3','w4','b4','w5','b5']:
                setattr(self, k, np.array(d[k]))
            return True
        except: return False

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# RBED & REPLAY BUFFER
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class RBED:
    def __init__(self):
        self.eps, self.min_eps, self.decay = 0.7, 0.10, 0.95
        self.thresh, self.inc = 0.0, 50.0

    def update(self, pnl: float, n: int):
        if n < 20: return self.eps
        if pnl >= self.thresh:
            self.eps = max(self.min_eps, self.eps * self.decay)
            self.thresh += self.inc
            log(f"‚úÖ RBED: Œµ‚Üí{self.eps:.3f}, Next: ${self.thresh:.2f}", "success")
        return self.eps

class PriorityReplay:
    def __init__(self, cap=MEM_CAP, alpha=0.6):
        self.cap, self.alpha = cap, alpha
        self.buf, self.pri, self.pos = [], [], 0

    def add(self, exp, td=1.0):
        p = (abs(td) + 0.01) ** self.alpha
        if len(self.buf) < self.cap:
            self.buf.append(exp)
            self.pri.append(p)
        else:
            self.buf[self.pos], self.pri[self.pos] = exp, p
            self.pos = (self.pos + 1) % self.cap

    def sample(self, n):
        if len(self.buf) < n: return []
        win_idx = [i for i, e in enumerate(self.buf) if e.metadata.get('pnl', 0) > 0]
        num_win = min(n//3, len(win_idx))
        samples = random.sample(win_idx, num_win) if win_idx else []
        rem = n - len(samples)
        if rem > 0:
            loss_idx = list(set(range(len(self.buf))) - set(samples))
            if loss_idx:
                pri = np.array(self.pri)
                probs = pri[loss_idx] / pri[loss_idx].sum()
                samples.extend(np.random.choice(loss_idx, size=rem, replace=False, p=probs))
        return [self.buf[i] for i in samples]

    def __len__(self): return len(self.buf)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# CONFIDENCE SYSTEM
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class Confidence:
    def __init__(self):
        self.temp = 1.0

    def softmax(self, q): return np.exp((q-np.max(q))/self.temp) / np.sum(np.exp((q-np.max(q))/self.temp))

    def get_conf(self, q: np.ndarray, eps: float, force=False) -> Tuple[bool, float, Dict]:
        sq = np.sort(q)[::-1]
        spread = sq[0] - sq[1] if len(sq) > 1 else 0.0
        probs = self.softmax(q)
        best = np.max(probs)
        ent = -np.sum(np.clip(probs, 1e-10, 1) * np.log(np.clip(probs, 1e-10, 1)))
        norm_ent = ent / np.log(len(q))
        conf = (0.5*best + 0.3*(1-norm_ent) + 0.2*np.tanh(spread*5)) * 100
        prog = 1 - (eps - 0.10) / 0.6

        if force: thresh = 5.0
        elif eps > 0.6: thresh = 10.0
        elif eps > 0.4: thresh = 14.0
        elif eps > 0.25: thresh = 18.0
        elif eps > 0.15: thresh = 21.0
        else: thresh = 25.0

        metrics = {'q_spread': float(spread), 'best_prob': float(best), 'entropy': float(norm_ent),
                  'confidence': float(np.clip(conf, 0, 100)), 'threshold': float(thresh), 'progress': float(prog)}
        trade = conf >= thresh or spread >= 0.03 or force
        return trade, conf, metrics

    def calc_size(self, base: float, conf: float) -> float:
        return base * (0.5 + (conf/100) * 0.5)

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# RL AGENT
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class RLAgent:
    def __init__(self):
        self.qnet = QNet()
        self.tnet = QNet()
        self.mem = PriorityReplay()
        self.rbed = RBED()
        self.cnt = 0
        self.stats = {'total_updates': 0, 'total_trades': 0, 'profitable_trades': 0, 'total_pnl': 0.0,
                     'win_rate': 0.0, 'avg_reward': 0.0, 'epsilon_history': [], 'q_value_history': [],
                     'pipeline_trades_learned': 0, 'last_pipeline_sync': None, 'rbed_milestones': [],
                     'consecutive_losses': 0}
        self.consec_loss = 0
        self.load()
        log(f"üß† Agent init: {len(self.mem)} exp", "brain")

    def select(self, s, greedy=False, bt=False):
        eps = self.rbed.update(self.stats['total_pnl'], self.stats['total_trades'])
        if bt: eps = 0.3
        if not greedy and random.random() < eps:
            return random.randint(0, ACTIONS-1)
        q = self.qnet.predict(s)
        if bt and np.max(q) - np.sort(q)[-2] < 0.1:
            q[2] = -999
        return int(np.argmax(q))

    def remember(self, exp, td=1.0):
        self.mem.add(exp, td)

    def learn(self):
        if len(self.mem) < 200: return
        batch = self.mem.sample(min(BATCH, len(self.mem)))
        if not batch: return
        states = np.array([np.array(e.state) for e in batch])
        actions = np.array([e.action for e in batch])
        rewards = np.array([e.reward for e in batch])
        next_states = np.array([np.array(e.next_state) for e in batch])
        dones = np.array([e.done for e in batch])
        curr_q = np.array([self.qnet.forward(s) for s in states])
        next_q = np.array([self.tnet.forward(s) for s in next_states])
        tgts = curr_q.copy()
        for i in range(len(batch)):
            tgts[i][actions[i]] = rewards[i] if dones[i] else rewards[i] + GAMMA * np.max(next_q[i])
        self.qnet.update(states, tgts, LR)
        self.cnt += 1
        self.stats['total_updates'] += 1
        avg_q = float(np.mean([np.max(q) for q in curr_q]))
        self.stats['q_value_history'].append(avg_q)
        self.stats['epsilon_history'].append(self.rbed.eps)
        if self.cnt % TARGET_UPD == 0:
            self.tnet = self.qnet.clone()
            log(f"üéØ Target update #{self.cnt}", "brain")

    def calc_reward(self, t: TradeOutcome) -> float:
        r = 0.0
        if t.pnl > 0:
            r += t.pnl * PROFIT_SCALE + WIN_BONUS
        else:
            r += t.pnl * LOSS_SCALE - LOSS_PEN
        risk = abs(t.entry_price - t.sl) + EPS
        r += (t.pnl / risk) * SHARPE_SCALE
        if t.pnl < 0 and t.duration > 48: r -= 15
        elif t.pnl > 0 and t.duration < 24: r += 10
        if t.hit_tp: r += WIN_BONUS * 0.3
        if t.pnl < 0:
            self.consec_loss += 1
            r -= self.consec_loss * 8
        else:
            self.consec_loss = 0
        self.stats['consecutive_losses'] = self.consec_loss
        return float(np.clip(r, -800, 800))

    def record(self, t: TradeOutcome):
        self.stats['total_trades'] += 1
        self.stats['total_pnl'] += t.pnl
        if t.pnl > 0: self.stats['profitable_trades'] += 1
        self.stats['win_rate'] = self.stats['profitable_trades'] / self.stats['total_trades']
        r = self.calc_reward(t)
        self.stats['avg_reward'] = (self.stats['avg_reward']*(self.stats['total_trades']-1)+r)/self.stats['total_trades']
        act = 0 if t.action == 'BUY' else 1 if t.action == 'SELL' else 2
        exp = Experience(state=t.state_at_entry if isinstance(t.state_at_entry, list) else t.state_at_entry.tolist(),
            action=act, reward=r, next_state=t.state_at_entry if isinstance(t.state_at_entry, list) else t.state_at_entry.tolist(),
            done=True, metadata={'pair': t.pair, 'pnl': t.pnl, 'hit_tp': t.hit_tp, 'duration': t.duration})
        td = abs(r - self.qnet.predict(np.array(exp.state))[act])
        self.remember(exp, td)
        if len(self.mem) >= 200: self.learn()

    def learn_pipeline(self, db: PipelineDB, data: Dict) -> int:
        since = self.stats.get('last_pipeline_sync')
        since_ts = None
        if since:
            try: since_ts = int(datetime.fromisoformat(since.replace('Z','+00:00')).timestamp())
            except: pass
        trades = db.get_trades(since=since_ts, limit=500)
        learned = 0
        for row in trades:
            try:
                pair, tf, model, entry, exit, sl, tp, pred, hit_tp, pnl, pnl_pct, dur, created, eval = row
                action = 'BUY' if pred.lower() == 'buy' or exit > entry else 'SELL' if pred.lower() == 'sell' or exit < entry else None
                if not action: continue
                state = np.zeros(STATE_SIZE)
                if pair in data and '1h' in data[pair]:
                    try: state = create_state(data[pair]['1h'], data[pair]['1d'], pair)
                    except: pass
                t = TradeOutcome(pair=pair, action=action, entry_price=float(entry), exit_price=float(exit),
                    sl=float(sl), tp=float(tp), position_size=1.0, pnl=float(pnl), duration=float(dur),
                    hit_tp=bool(hit_tp), timestamp_entry=created, timestamp_exit=eval,
                    state_at_entry=state.tolist(), confidence=0.0, regime='PIPELINE', session='BACKFILL')
                self.record(t)
                learned += 1
            except: continue
        if trades: self.stats['last_pipeline_sync'] = trades[0][-1]
        self.stats['pipeline_trades_learned'] = self.stats.get('pipeline_trades_learned', 0) + learned
        log(f"üß† Pipeline: {learned} trades", "brain")
        return learned

    def save(self):
        try:
            P.save(RL_MEM, [e.to_dict() for e in list(self.mem.buf)], compress=True)
            P.save(RL_WEIGHTS, {'q_network': self.qnet.to_dict(), 'target_network': self.tnet.to_dict()}, compress=False)
            P.save(RL_STATS, self.stats, compress=False)
            log(f"üíæ Saved: {len(self.mem)} exp, {self.stats['total_trades']} trades", "success")
        except Exception as e:
            log(f"Save failed: {e}", "warn")

    def load(self):
        try:
            mem_data = P.load(RL_MEM, compress=True)
            if mem_data:
                for e in mem_data: self.mem.add(Experience.from_dict(e), 1.0)
                log(f"‚úÖ Loaded {len(mem_data)} exp", "success")
            net_data = P.load(RL_WEIGHTS, compress=False)
            if net_data:
                if self.qnet.from_dict(net_data.get('q_network',{})) and self.tnet.from_dict(net_data.get('target_network',{})):
                    log("‚úÖ Loaded networks", "success")
            stats = P.load(RL_STATS, compress=False)
            if stats:
                self.stats = stats
                if self.stats.get('epsilon_history'):
                    self.rbed.eps = self.stats['epsilon_history'][-1]
                if self.stats.get('rbed_milestones'):
                    self.rbed.thresh = self.stats['rbed_milestones'][-1] + 50.0 if self.stats['rbed_milestones'] else 0.0
                log(f"‚úÖ Stats: {self.stats['total_trades']} trades, Œµ={self.rbed.eps:.3f}", "success")
        except Exception as e:
            log(f"Load failed: {e}", "warn")

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# TRADING ENVIRONMENT
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
class TradingEnv:
    def __init__(self):
        self.active = {}
        self.history = P.load(TRADES, compress=False) or []

    def save_hist(self):
        P.save(TRADES, self.history, compress=False)

    def execute(self, pair: str, action: str, price: float, sl: float, tp: float, size: float,
                state: np.ndarray, meta: Dict) -> str:
        tid = f"{pair}_{datetime.now():%Y%m%d_%H%M%S}"
        self.active[tid] = {'pair': pair, 'action': action, 'entry_price': price, 'sl': sl, 'tp': tp,
            'size': size, 'entry_time': datetime.now(timezone.utc).isoformat(), 'state_at_entry': state.tolist(),
            'metadata': meta}
        log(f"üí∞ {tid} - {action} {pair} @ {price:.5f}", "money")
        return tid

    def check_exits(self, prices: Dict[str, float]) -> List[TradeOutcome]:
        completed = []
        for tid, trade in list(self.active.items()):
            pair = trade['pair']
            if pair not in prices: continue
            cp = prices[pair]
            hit_tp = hit_sl = False
            if trade['action'] == 'BUY':
                hit_tp, hit_sl = cp >= trade['tp'], cp <= trade['sl']
            else:
                hit_tp, hit_sl = cp <= trade['tp'], cp >= trade['sl']
            if hit_tp or hit_sl:
                exit_p = trade['tp'] if hit_tp else trade['sl']
                pnl = ((exit_p-trade['entry_price']) if trade['action']=='BUY' else (trade['entry_price']-exit_p))*trade['size']
                pnl -= exit_p * 0.0003 + exit_p * trade['size'] * 0.0005
                entry_t = datetime.fromisoformat(trade['entry_time'])
                exit_t = datetime.now(timezone.utc)
                dur = (exit_t - entry_t).total_seconds() / 3600.0
                out = TradeOutcome(pair=pair, action=trade['action'], entry_price=trade['entry_price'],
                    exit_price=exit_p, sl=trade['sl'], tp=trade['tp'], position_size=trade['size'],
                    pnl=pnl, duration=dur, hit_tp=hit_tp, timestamp_entry=trade['entry_time'],
                    timestamp_exit=exit_t.isoformat(), state_at_entry=trade['state_at_entry'],
                    confidence=trade['metadata'].get('confidence',0), regime=trade['metadata'].get('regime','UNKNOWN'),
                    session=trade['metadata'].get('session','UNKNOWN'))
                completed.append(out)
                self.history.append({'trade_id': tid, 'pair': pair, 'action': trade['action'],
                    'entry': trade['entry_price'], 'exit': exit_p, 'pnl': pnl,
                    'result': 'WIN' if hit_tp else 'LOSS', 'duration_hours': dur,
                    'timestamp': exit_t.isoformat()})
                del self.active[tid]
                log(f"‚úÖ {tid} - {'WIN' if hit_tp else 'LOSS'} | ${pnl:.2f}", "success" if pnl>0 else "warn")
        if completed: self.save_hist()
        return completed

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# WEEKEND BACKTEST
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def backtest(data: Dict, agent: RLAgent, conf: Confidence):
    log("\nüéì Weekend backtest...", "brain")
    learned = 0
    for pair in PAIRS:
        if pair not in data or '1h' not in data[pair]: continue
        df_1h, df_1d = data[pair]['1h'], data[pair]['1d']
        start, end = max(100, len(df_1h)-1200), len(df_1h)-10
        samples = list(range(start, end, 2))[-400:]
        for i in samples:
            try:
                state = create_state(df_1h.iloc[:i], df_1d.iloc[:max(0,i-24)], pair)
                act = agent.select(state, greedy=False, bt=True)
                q = agent.qnet.predict(state)
                should, confidence, _ = conf.get_conf(q, agent.rbed.eps, force=True)
                direction = ['BUY','SELL','HOLD'][act]
                if direction == 'HOLD': continue
                entry = df_1h['close'].iloc[i]
                atr = df_1h['atr'].iloc[i]
                sl = entry - (atr*ATR_SL) if direction=='BUY' else entry + (atr*ATR_SL)
                tp = entry + (atr*ATR_TP) if direction=='BUY' else entry - (atr*ATR_TP)
                hit_tp = hit_sl = False
                exit_i = i + 1
                for j in range(i+1, min(i+72, len(df_1h))):
                    cp = df_1h['close'].iloc[j]
                    if direction == 'BUY':
                        if cp >= tp: hit_tp, exit_i = True, j; break
                        elif cp <= sl: hit_sl, exit_i = True, j; break
                    else:
                        if cp <= tp: hit_tp, exit_i = True, j; break
                        elif cp >= sl: hit_sl, exit_i = True, j; break
                if not hit_tp and not hit_sl: exit_i = min(i+72, len(df_1h)-1)
                exit_p = tp if hit_tp else (sl if hit_sl else df_1h['close'].iloc[exit_i])
                pnl = ((exit_p-entry) if direction=='BUY' else (entry-exit_p)) * 1.0
                out = TradeOutcome(pair=pair, action=direction, entry_price=entry, exit_price=exit_p,
                    sl=sl, tp=tp, position_size=1.0, pnl=pnl, duration=(exit_i-i)*1.0, hit_tp=hit_tp,
                    timestamp_entry=str(df_1h.index[i]), timestamp_exit=str(df_1h.index[exit_i]),
                    state_at_entry=state.tolist(), confidence=confidence, regime='BACKTEST', session='WEEKEND')
                agent.record(out)
                learned += 1
            except: continue
    log(f"‚úÖ Backtest: {learned} trades", "success")
    return learned

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# DATA LOADING
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def fetch_price(pair, timeout=10):
    if not BROWSER_TOKEN: return None
    try:
        fc, tc = pair.split("/")
        r = requests.post(f"https://production-sfo.browserless.io/content?token={BROWSER_TOKEN}",
            json={"url": f"https://www.x-rates.com/calculator/?from={fc}&to={tc}&amount=1"}, timeout=timeout)
        m = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', r.text)
        return float(m.group(1).replace(",", "")) if m else None
    except: return None

def ensure_atr(df):
    if "atr" in df.columns and df["atr"].median() > MIN_ATR:
        return df.assign(atr=df["atr"].fillna(MIN_ATR).clip(lower=MIN_ATR))
    h, l, c = df["high"].values, df["low"].values, df["close"].values
    tr = np.maximum.reduce([h-l, np.abs(h-np.roll(c,1)), np.abs(l-np.roll(c,1))])
    tr[0] = h[0] - l[0] if len(tr) > 0 else MIN_ATR
    df["atr"] = pd.Series(tr, index=df.index).rolling(ATR_PER, min_periods=1).mean().fillna(MIN_ATR).clip(lower=MIN_ATR)
    return df

def update_data():
    log("üîÑ Updating data...", "info")
    cnt = 0
    for pair in PAIRS:
        p = fetch_price(pair)
        if not p or p <= 0: continue
        pk = pair.replace("/", "_")
        for pkl in DIRS["data"].glob(f"{pk}*.pkl"):
            if any(x in pkl.name for x in ['_model','indicator_cache','.bak']): continue
            try:
                try: df = pd.read_pickle(pkl, compression='gzip')
                except: df = pd.read_pickle(pkl, compression=None)
                if not isinstance(df, pd.DataFrame) or len(df) < 10: continue
                if not all(c in df.columns for c in ['open','high','low','close']): continue
                last_t = df.index[-1]
                new_t = datetime.now().replace(second=0, microsecond=0)
                if new_t > last_t:
                    new_row = pd.DataFrame({'open':[float(p)],'high':[float(p)],'low':[float(p)],
                        'close':[float(p)],'volume':[0]}, index=[new_t])
                    df = pd.concat([df, new_row]).tail(5000).ffill().bfill()
                    df = ensure_atr(df)
                    df.to_pickle(pkl, compression='gzip')
                    cnt += 1
            except: pass
    log(f"‚úÖ Updated {cnt} files", "success")
    return cnt

def load_data(folder):
    log(f"üìÇ Loading from {folder}", "info")
    if not folder.exists(): return {}
    all_pkl = [p for p in folder.glob("*.pkl") if not any(s in p.name for s in ['_model','indicator_cache','.bak'])]
    pair_files = defaultdict(list)
    curr = ["EUR","GBP","USD","AUD","NZD","CAD","CHF","JPY"]
    for pkl in all_pkl:
        parts = pkl.stem.split('_')
        if len(parts) >= 2 and parts[0] in curr and parts[1] in curr:
            pair_files[f"{parts[0]}_{parts[1]}"].append(pkl)
    combined = {}
    for pk, files in pair_files.items():
        pair = f"{pk[:3]}/{pk[4:]}"
        if pair not in PAIRS: continue
        pair_data = {}
        for pkl in files:
            try:
                try: df = pd.read_pickle(pkl, compression='gzip')
                except: df = pd.read_pickle(pkl, compression=None)
                if not isinstance(df, pd.DataFrame) or len(df) < 50: continue
                if not all(c in df.columns for c in ['open','high','low','close']): continue
                df = df.ffill().bfill().dropna(subset=['open','high','low','close'])
                df.index = pd.to_datetime(df.index, errors="coerce")
                if df.index.tz: df.index = df.index.tz_localize(None)
                df = df[df.index.notna()]
                tf = "1d" if "1d" in pkl.stem or "daily" in pkl.stem else "1h"
                if tf not in ["1d","1h"]: continue
                df = ensure_atr(df)
                pair_data[tf] = df
                log(f"‚úÖ {pair} [{tf}]: {len(df)} rows", "success")
            except: pass
        if pair_data: combined[pair] = pair_data
    log(f"‚úÖ Loaded {len(combined)} pairs", "success")
    return combined

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# EMAIL & GIT
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def send_email(sigs, it, stats, mode, pipe_stats):
    if not GMAIL_PWD: return
    try:
        msg = MIMEMultipart('alternative')
        msg['Subject'] = f"üéì BEACON v20.1 [{mode}] #{it}"
        msg['From'] = msg['To'] = GMAIL
        active = sum(1 for s in sigs.values() if s.get('direction')!='HOLD')
        wr = stats.get('win_rate',0)*100
        pnl = stats.get('total_pnl',0.0)
        html = f"""<!DOCTYPE html><html><head><style>
body{{font-family:sans-serif;background:#0f172a;margin:0;padding:20px}}
.c{{max-width:1000px;margin:0 auto;background:#fff;border-radius:12px;box-shadow:0 10px 40px rgba(0,0,0,0.4)}}
.h{{background:linear-gradient(135deg,#2563eb,#1d4ed8);color:#fff;padding:50px;text-align:center}}
.h h1{{margin:0;font-size:38px;font-weight:900}}
.badge{{background:{"#f59e0b" if mode=="WEEKEND_LEARNING" else "#10b981"};padding:12px 24px;border-radius:30px;margin-top:18px;font-weight:800}}
.stats{{background:#dbeafe;padding:25px;margin:25px;border-radius:10px}}
.grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:15px}}
.item{{background:#fff;padding:15px;border-radius:8px;text-align:center}}
.val{{font-size:28px;font-weight:900;color:#2563eb}}
.lbl{{font-size:12px;color:#6b7280;margin-top:5px}}
</style></head><body><div class="c"><div class="h"><h1>üéì BEACON v20.1</h1>
<div class="badge">{mode}</div><p style="margin:20px 0 0">#{it} | {datetime.now():%Y-%m-%d %H:%M UTC}</p></div>
<div class="stats"><div style="font-size:20px;font-weight:800;margin-bottom:15px">üß† RL Stats</div>
<div class="grid">
<div class="item"><div class="val">{stats.get('total_trades',0)}</div><div class="lbl">TRADES</div></div>
<div class="item"><div class="val">{wr:.1f}%</div><div class="lbl">WIN RATE</div></div>
<div class="item"><div class="val">${pnl:.2f}</div><div class="lbl">P&L</div></div>
<div class="item"><div class="val">{active}</div><div class="lbl">SIGNALS</div></div>
</div></div></div></body></html>"""
        msg.attach(MIMEText(html, 'html'))
        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as srv:
            srv.login(GMAIL, GMAIL_PWD)
            srv.send_message(msg)
        log("‚úÖ Email sent", "success")
    except Exception as e:
        log(f"Email failed: {e}", "error")

def push_git(files, msg):
    if IN_GHA or not PAT: return False
    try:
        url = f"https://{GH_USER}:{PAT}@github.com/{GH_USER}/{GH_REPO}.git"
        repo = SAVE if (SAVE/".git").exists() else BASE
        if not (repo/".git").exists():
            subprocess.run(["git","clone",url,str(repo)], check=True)
        os.chdir(repo)
        for f in files:
            if (repo/f).exists(): subprocess.run(["git","add",str(f)], check=False)
        subprocess.run(["git","commit","-m",msg], check=False)
        subprocess.run(["git","pull","--rebase","origin","main"], check=False)
        return subprocess.run(["git","push","origin","main"]).returncode == 0
    except: return False
    finally:
        try: os.chdir(SAVE)
        except: pass

# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
# MAIN
# ‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê‚ïê
def main():
    log("="*70, "rocket")
    log("üéì TRADE BEACON v20.1 OPTIMIZED", "brain")
    log("="*70, "rocket")
    mode = get_mode()
    log(f"üìÖ Mode: {mode}", "info")

    # Smart Reset
    ver = "20.1"
    needs_reset = False
    if VERSION_FILE.exists():
        try:
            with open(VERSION_FILE, 'r') as f:
                if f.read().strip() != ver: needs_reset = True
        except: needs_reset = True
    else: needs_reset = True

    if needs_reset:
        log("üî• SMART RESET - Preserving experiences", "warn")
        old_stats = P.load(RL_STATS, compress=False)
        if RL_WEIGHTS.exists(): RL_WEIGHTS.unlink()
        new_stats = {
            'total_updates': 0,
            'total_trades': old_stats.get('total_trades',0) if old_stats else 0,
            'profitable_trades': old_stats.get('profitable_trades',0) if old_stats else 0,
            'total_pnl': old_stats.get('total_pnl',0.0) if old_stats else 0.0,
            'win_rate': old_stats.get('win_rate',0.0) if old_stats else 0.0,
            'avg_reward': 0.0, 'epsilon_history': [], 'q_value_history': [],
            'pipeline_trades_learned': old_stats.get('pipeline_trades_learned',0) if old_stats else 0,
            'rbed_milestones': [], 'consecutive_losses': 0,
            'v20_0_archive': old_stats if old_stats else {},
            'reset_date': datetime.now(timezone.utc).isoformat(),
            'reset_reason': 'v20.1 Smart Reset'
        }
        P.save(RL_STATS, new_stats, compress=False)
        with open(VERSION_FILE, 'w') as f: f.write(ver)
        log("‚úÖ Reset complete", "success")

    it = inc_iter()
    agent = RLAgent()
    if needs_reset: agent.rbed.eps = 0.7
    env = TradingEnv()
    conf = Confidence()
    db = PipelineDB()

    try:
        log(f"\nüìä Iteration #{it} | {ENV_NAME} | {mode}", "info")
        if mode == "LIVE_TRADING": update_data()
        data = load_data(DIRS["data"])
        if not data: raise ValueError("No data")

        if db.conn:
            pipe_learned = agent.learn_pipeline(db, data)
            pipe_stats = db.get_stats()
        else:
            pipe_learned, pipe_stats = 0, {}

        if mode == "WEEKEND_LEARNING":
            bt_trades = backtest(data, agent, conf)
            log(f"üéì Weekend: {bt_trades} backtest + {pipe_learned} pipeline", "brain")

        prices = {}
        for pair in PAIRS:
            if mode == "WEEKEND_LEARNING":
                if pair in data and '1h' in data[pair]:
                    prices[pair] = data[pair]['1h'].iloc[-1]['close']
            else:
                p = fetch_price(pair) or (data[pair]['1h'].iloc[-1]['close'] if pair in data and '1h' in data[pair] else None)
                if p: prices[pair] = p

        completed = env.check_exits(prices)
        for t in completed: agent.record(t)

        sigs = {}
        for pair in PAIRS:
            if pair not in data or '1h' not in data[pair] or '1d' not in data[pair]:
                sigs[pair] = {'direction': 'HOLD', 'last_price': prices.get(pair, 0)}
                continue
            state = create_state(data[pair]['1h'], data[pair]['1d'], pair)
            q = agent.qnet.predict(state)
            should, confidence, metrics = conf.get_conf(q, agent.rbed.eps)
            best = np.argmax(q)
            direction = ['BUY','SELL','HOLD'][best] if should else 'HOLD'
            price = prices.get(pair, 0)
            atr = data[pair]['1h']['atr'].iloc[-1]
            if direction == 'BUY':
                sl, tp = price - (atr*ATR_SL), price + (atr*ATR_TP)
            elif direction == 'SELL':
                sl, tp = price + (atr*ATR_SL), price - (atr*ATR_TP)
            else:
                sl = tp = price
            sigs[pair] = {'direction': direction, 'last_price': price, 'SL': float(sl), 'TP': float(tp),
                'confidence': confidence, 'threshold': metrics['threshold'], 'rb_epsilon': agent.rbed.eps,
                'timestamp': datetime.now(timezone.utc).isoformat()}

            if direction != 'HOLD' and len(env.active) < MAX_POS and mode == "LIVE_TRADING":
                base = (CAPITAL * RISK) / (abs(price - sl) + EPS)
                size = conf.calc_size(base, confidence)
                size = min(size, MAX_CAP / price)
                env.execute(pair, direction, price, sl, tp, size, state,
                    {'confidence': confidence, 'rb_epsilon': agent.rbed.eps, 'regime': 'RL_v20.1', 'session': 'LIVE'})

        output = {'timestamp': datetime.now(timezone.utc).isoformat(), 'iteration': it,
            'version': 'v20.1-optimized', 'mode': mode, 'signals': sigs, 'rl_stats': agent.stats,
            'active_trades': len(env.active), 'pipeline_stats': pipe_stats}
        P.save(SIGNALS, output, compress=False)
        agent.save()

        log(f"\nüìä Stats: {agent.stats['total_trades']} trades, {agent.stats['win_rate']*100:.1f}% WR, ${agent.stats['total_pnl']:.2f} P&L, Œµ={agent.rbed.eps:.4f}", "brain")

        # Diagnostics
        log("\n" + "="*70, "brain")
        log("üìä RL AGENT DIAGNOSTICS", "brain")
        log("="*70, "brain")
        log(f"üéØ Performance: {agent.stats['total_trades']} trades | {agent.stats['win_rate']*100:.1f}% WR | ${agent.stats['total_pnl']:.2f} P&L", "brain")
        log(f"üß† Learning: Œµ={agent.rbed.eps:.4f} ({agent.rbed.eps*100:.1f}% explore) | Updates={agent.stats['total_updates']} | Memory={len(agent.mem)}/{MEM_CAP}", "brain")
        if agent.stats.get('q_value_history'):
            recent_q = agent.stats['q_value_history'][-100:]
            log(f"üìà Q-Values: Œº={np.mean(recent_q):.4f}, œÉ={np.std(recent_q):.4f}", "brain")
        if agent.stats['win_rate'] < 0.25:
            log("‚ö†Ô∏è Win rate < 25% - Continue training", "warn")
        elif agent.stats['win_rate'] < 0.35:
            log("‚ö° Win rate 25-35% - Getting better", "warn")
        else:
            log("‚úÖ Win rate > 35% - Ready for live", "success")
        log("="*70, "brain")

        if mode == "LIVE_TRADING": send_email(sigs, it, agent.stats, mode, pipe_stats)

        # File format summary
        log("\nüìù File Format Summary:", "info")
        log("  ‚úÖ omega_signals.json - UNCOMPRESSED (dashboard ready)", "success")
        log("  ‚úÖ omega_iteration.json - UNCOMPRESSED (dashboard ready)", "success")
        log("  ‚úÖ learning_stats.json - UNCOMPRESSED (dashboard ready)", "success")
        log("  ‚úÖ network_weights.json - UNCOMPRESSED (dashboard ready)", "success")
        log("  ‚úÖ trade_history.json - UNCOMPRESSED (dashboard ready)", "success")
        log("  ‚úÖ experience_replay.json.gz - COMPRESSED (space-efficient)", "success")

        files = [f"outputs/{SIGNALS.name}", f"omega_state/{ITER_FILE.name}", f"rl_memory/{RL_MEM.name}",
                f"rl_memory/{RL_STATS.name}", f"rl_memory/{TRADES.name}", f"rl_memory/{RL_WEIGHTS.name}",
                f"rl_memory/version.txt"]
        commit = f"üéì v20.1 #{it} [{mode}] RBŒµ={agent.rbed.eps:.3f} WR={agent.stats['win_rate']*100:.1f}% P&L=${agent.stats['total_pnl']:.2f}"
        push_git(files, commit)

        log("\n" + "="*70, "success")
        log("‚úÖ CYCLE COMPLETE - RESEARCH-BACKED & OPTIMIZED!", "success")
        log("="*70, "success")
        log(f"Iteration: #{it} ({ENV_NAME})", "info")
        log(f"Mode: {mode}", "info")
        log(f"RL Trades: {agent.stats['total_trades']}", "brain")
        log(f"Pipeline Trades: {agent.stats.get('pipeline_trades_learned', 0)}", "db")
        log(f"Win Rate: {agent.stats['win_rate']*100:.1f}%", "info")
        log(f"Total P&L: ${agent.stats['total_pnl']:.2f}", "money")
        log(f"Active Trades: {len(env.active)}", "info")
        log(f"Epsilon: {agent.rbed.eps:.3f}", "info")
        log(f"Memory: {len(agent.mem)} samples", "brain")

        # Health check
        if agent.stats['total_trades'] > 100:
            if agent.stats['win_rate'] >= 0.35:
                log("\n‚úÖ Agent ready for cautious live trading", "success")
            elif agent.stats['win_rate'] >= 0.25:
                log("\n‚ö° Continue weekend training before live", "warn")
            else:
                log("\n‚ö†Ô∏è More training needed", "warn")
    except Exception as e:
        log(f"\n‚ùå Error: {e}", "error")
        logging.exception("Fatal error")
        raise
    finally:
        if db.conn: db.close()
        log(f"\nüéì Cycle complete #{it}", "brain")

if __name__ == "__main__":
    main()
    log("\nüéì Research-Backed Implementation Ready", "success")