In [None]:
# ======================================================
# üîë API Keys Configuration
# ======================================================
import os

# Set API keys from environment variables or defaults
ALPHA_VANTAGE_KEY = os.environ.get('ALPHA_VANTAGE_KEY', '1W58NPZXOG5SLHZ6')
BROWSERLESS_TOKEN = os.environ.get('BROWSERLESS_TOKEN', '2TMVUBAjFwrr7Tb283f0da6602a4cb698b81778bda61967f7')

# Set environment variables for downstream code
os.environ['ALPHA_VANTAGE_KEY'] = ALPHA_VANTAGE_KEY
os.environ['BROWSERLESS_TOKEN'] = BROWSERLESS_TOKEN

# Validate
if not ALPHA_VANTAGE_KEY:
    print("‚ö†Ô∏è Warning: ALPHA_VANTAGE_KEY not set!")
else:
    print(f"‚úÖ Alpha Vantage Key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

if not BROWSERLESS_TOKEN:
    print("‚ö†Ô∏è Warning: BROWSERLESS_TOKEN not set!")
else:
    print(f"‚úÖ Browserless Token: {BROWSERLESS_TOKEN[:4]}...{BROWSERLESS_TOKEN[-4:]}")

In [None]:
# ======================================================
# üåç Environment Detection & Setup (MUST RUN FIRST!)
# ======================================================
import os
import sys
from pathlib import Path

# Detect environment
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# Set base paths based on environment
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
elif IN_GHA:
    # GitHub Actions already checks out the repo
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
else:
    # Local development
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER

# Create necessary directories with organized structure
DIRECTORIES = {
    "data_raw": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Display environment info
print("=" * 60)
print(f"üåç Environment: {ENV_NAME}")
print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üîß Python: {sys.version.split()[0]}")
print(f"üìç Working Dir: {os.getcwd()}")
print("=" * 60)

# Validate critical environment variables for GitHub Actions
if IN_GHA:
    required_vars = ["FOREX_PAT", "GIT_USER_NAME", "GIT_USER_EMAIL"]
    missing = [v for v in required_vars if not os.environ.get(v)]
    if missing:
        print(f"‚ö†Ô∏è  Warning: Missing environment variables: {', '.join(missing)}")
        sys.exit(1)  # Fail fast in CI if critical vars missing
    else:
        print("‚úÖ All required environment variables present")

# Export commonly used paths as globals
CSV_FOLDER = DIRECTORIES["data_raw"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_PATH = DIRECTORIES["database"] / "memory_v85.db"
LOG_PATH = DIRECTORIES["logs"] / "pipeline.log"
OUTPUT_PATH = DIRECTORIES["outputs"] / "signals.json"

print(f"\nüìÅ Key Paths:")
print(f"   CSV: {CSV_FOLDER}")
print(f"   Pickles: {PICKLE_FOLDER}")
print(f"   Database: {DB_PATH}")
print(f"   Logs: {LOG_PATH}")
print(f"   Signals: {OUTPUT_PATH}")
print("=" * 60)

In [None]:
# ======================================================
# üìÑ GitHub Sync (Environment-Aware) - ALIGNED VERSION
# ======================================================
import os
import subprocess
import shutil
from pathlib import Path
import urllib.parse
import sys

# ======================================================
# 1Ô∏è‚É£ Environment Detection (MUST MATCH YOUR FIRST CELL!)
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# ======================================================
# 2Ô∏è‚É£ CRITICAL FIX: Use SAME paths as environment detection
# ======================================================
if IN_COLAB:
    # ‚úÖ MATCHES YOUR ENVIRONMENT DETECTION
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"  # Same as env detection!
    REPO_FOLDER = SAVE_FOLDER  # Repo IS the save folder
    print("‚òÅÔ∏è Colab Mode: Cloning directly to /content/forex-ai-models")

elif IN_GHA:
    # ‚úÖ GitHub Actions: Use current directory (already in repo)
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER  # We're already in the repo!
    print("ü§ñ GitHub Actions Mode: Using current directory")

else:
    # ‚úÖ Local: Use current directory
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
    print("üíª Local Mode: Using current directory")

# Create necessary directories WITH your organized structure
DIRECTORIES = {
    "data_raw": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

print("=" * 70)
print(f"üîß Running in: {ENV_NAME}")
print(f"üìÇ Working directory: {os.getcwd()}")
print(f"üíæ Save folder: {SAVE_FOLDER}")
print(f"üì¶ Repo folder: {REPO_FOLDER}")
print(f"üêç Python: {sys.version.split()[0]}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GitHub Configuration
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

# ======================================================
# 4Ô∏è‚É£ GitHub Token (Multi-Source)
# ======================================================
FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if in Colab and PAT not found
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secret.")
    except ImportError:
        pass
    except Exception as e:
        print(f"‚ö†Ô∏è Could not load Colab secret: {e}")

# Validate PAT
if not FOREX_PAT:
    print("‚ö†Ô∏è Warning: FOREX_PAT not found. Git operations may fail.")
    print("   Set FOREX_PAT in:")
    print("   - GitHub Secrets (for Actions)")
    print("   - Colab Secrets (for Colab)")
    print("   - Environment variable (for local)")
    REPO_URL = None
else:
    SAFE_PAT = urllib.parse.quote(FOREX_PAT)
    REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
    print("‚úÖ GitHub token configured")

# ======================================================
# 5Ô∏è‚É£ Handle Repository Based on Environment
# ======================================================
if IN_GHA:
    # ===== GitHub Actions =====
    print("\nü§ñ GitHub Actions Mode")
    print("‚úÖ Repository already checked out by actions/checkout")
    print(f"üìÇ Current directory: {Path.cwd()}")

    # Verify .git exists
    if not (Path.cwd() / ".git").exists():
        print("‚ö†Ô∏è Warning: .git directory not found!")
        print("   Make sure actions/checkout@v4 is in your workflow")
    else:
        print("‚úÖ Git repository confirmed")

elif IN_COLAB:
    # ===== Google Colab =====
    print("\n‚òÅÔ∏è Google Colab Mode")

    if not REPO_URL:
        print("‚ùå Cannot clone repository: FOREX_PAT not available")
    elif not (REPO_FOLDER / ".git").exists():
        # Check if directory exists but isn't a git repo
        if REPO_FOLDER.exists():
            print(f"‚ö†Ô∏è Directory exists but is not a git repo. Removing...")
            shutil.rmtree(REPO_FOLDER)
            print("‚úÖ Cleaned up non-git directory")

        # Clone repository
        print(f"üì• Cloning repository to {REPO_FOLDER}...")
        env = os.environ.copy()
        env["GIT_LFS_SKIP_SMUDGE"] = "1"  # Skip LFS files

        try:
            result = subprocess.run(
                ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                check=True,
                env=env,
                capture_output=True,
                text=True,
                timeout=60
            )
            print("‚úÖ Repository cloned successfully")

            # Change to repo directory
            os.chdir(REPO_FOLDER)
            print(f"üìÇ Changed directory to: {os.getcwd()}")

        except subprocess.CalledProcessError as e:
            print(f"‚ùå Clone failed: {e.stderr}")
            print("Creating directory structure manually...")
            REPO_FOLDER.mkdir(parents=True, exist_ok=True)
        except subprocess.TimeoutExpired:
            print("‚ùå Clone timed out after 60 seconds")
            REPO_FOLDER.mkdir(parents=True, exist_ok=True)
    else:
        # Repository exists, pull latest
        print("‚úÖ Repository already exists, pulling latest changes...")
        os.chdir(REPO_FOLDER)

        try:
            result = subprocess.run(
                ["git", "pull", "origin", BRANCH],
                check=True,
                cwd=REPO_FOLDER,
                capture_output=True,
                text=True,
                timeout=30
            )
            print("‚úÖ Successfully pulled latest changes")
        except subprocess.CalledProcessError as e:
            print(f"‚ö†Ô∏è Pull failed: {e.stderr}")
            print("Continuing with existing files...")
        except subprocess.TimeoutExpired:
            print("‚ö†Ô∏è Pull timed out, continuing anyway...")

    # Configure Git LFS (disable for Colab)
    print("‚öôÔ∏è Configuring Git LFS...")
    try:
        subprocess.run(
            ["git", "lfs", "uninstall"],
            check=False,
            cwd=REPO_FOLDER,
            capture_output=True
        )
        print("‚úÖ LFS disabled for Colab")
    except Exception as e:
        print(f"‚ö†Ô∏è LFS setup warning: {e}")

else:
    # ===== Local Environment =====
    print("\nüíª Local Development Mode")
    print(f"üìÇ Working in: {SAVE_FOLDER}")

    if not (REPO_FOLDER / ".git").exists():
        print("‚ö†Ô∏è Not a git repository")
        print("   Run: git clone https://github.com/rahim-dotAI/forex-ai-models.git")
    else:
        print("‚úÖ Git repository found")

# ======================================================
# 6Ô∏è‚É£ Create Organized Directory Structure
# ======================================================
print("\nüìÅ Creating organized directory structure...")
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)
    print(f"   ‚úÖ {dir_name}: {dir_path}")

# ======================================================
# 7Ô∏è‚É£ Git Global Configuration
# ======================================================
print("\nüîß Configuring Git...")

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

# Set git config
git_configs = [
    (["git", "config", "--global", "user.name", GIT_USER_NAME], "User name"),
    (["git", "config", "--global", "user.email", GIT_USER_EMAIL], "User email"),
    (["git", "config", "--global", "advice.detachedHead", "false"], "Detached HEAD warning"),
    (["git", "config", "--global", "init.defaultBranch", "main"], "Default branch")
]

for cmd, description in git_configs:
    try:
        subprocess.run(cmd, check=False, capture_output=True)
    except Exception as e:
        print(f"‚ö†Ô∏è Could not set {description}: {e}")

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 8Ô∏è‚É£ Export Path Constants (MATCH YOUR ENVIRONMENT DETECTION!)
# ======================================================
CSV_FOLDER = DIRECTORIES["data_raw"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_PATH = DIRECTORIES["database"] / "memory_v85.db"
LOG_PATH = DIRECTORIES["logs"] / "pipeline.log"
OUTPUT_PATH = DIRECTORIES["outputs"] / "signals.json"

# ======================================================
# 9Ô∏è‚É£ Environment Summary & Validation
# ======================================================
print("\n" + "=" * 70)
print("üßæ ENVIRONMENT SUMMARY")
print("=" * 70)
print(f"Environment:      {ENV_NAME}")
print(f"Working Dir:      {os.getcwd()}")
print(f"Save Folder:      {SAVE_FOLDER}")
print(f"Repo Folder:      {REPO_FOLDER}")
print(f"Repository:       https://github.com/{GITHUB_USERNAME}/{GITHUB_REPO}")
print(f"Branch:           {BRANCH}")
print(f"Git Repo Exists:  {(REPO_FOLDER / '.git').exists()}")
print(f"FOREX_PAT Set:    {'‚úÖ Yes' if FOREX_PAT else '‚ùå No'}")

# Check critical paths
print("\nüìã Critical Paths:")
print(f"   CSV Folder:    {CSV_FOLDER}")
print(f"   Pickle Folder: {PICKLE_FOLDER}")
print(f"   Database:      {DB_PATH}")
print(f"   Logs:          {LOG_PATH}")
print(f"   Signals:       {OUTPUT_PATH}")

print("\nüìÇ Directory Status:")
critical_paths = {
    "Repo .git": REPO_FOLDER / ".git",
    "Data Raw": CSV_FOLDER,
    "Data Processed": PICKLE_FOLDER,
    "Database": DIRECTORIES["database"],
    "Logs": DIRECTORIES["logs"],
    "Outputs": DIRECTORIES["outputs"]
}

for name, path in critical_paths.items():
    exists = path.exists()
    icon = "‚úÖ" if exists else "‚ùå"
    print(f"  {icon} {name}: {path}")

print("=" * 70)
print("‚úÖ Setup completed successfully!")
print("=" * 70)

# ======================================================
# üîü Export Variables for Downstream Cells
# ======================================================
# These variables are now available in subsequent cells:
# - ENV_NAME: Environment name
# - IN_COLAB: Boolean for Colab detection
# - IN_GHA: Boolean for GitHub Actions detection
# - SAVE_FOLDER: Path to save files (same as REPO_FOLDER in Colab)
# - REPO_FOLDER: Path to git repository
# - CSV_FOLDER, PICKLE_FOLDER, DB_PATH, LOG_PATH, OUTPUT_PATH: Organized paths
# - GITHUB_USERNAME, GITHUB_REPO, BRANCH: Git config
# - FOREX_PAT: GitHub token (if available)

print("\n‚úÖ All environment variables exported for downstream cells")

In [None]:
!pip install mplfinance firebase-admin dropbox requests beautifulsoup4 pandas numpy ta yfinance pyppeteer nest_asyncio lightgbm joblib matplotlib alpha_vantage tqdm scikit-learn river


In [None]:
#!/usr/bin/env python3
"""
ALPHA VANTAGE FX DATA FETCHER - OPTIMIZED FOR DAILY USE
=======================================================
‚úÖ Designed to run ONCE per day (not every 2 hours)
‚úÖ Reduces API usage from 48/day to 4/day
‚úÖ Environment variable SKIP_ALPHA_VANTAGE support
‚úÖ Data quality validation before saving
‚úÖ Works in GitHub Actions, Google Colab, and Local
‚úÖ Thread-safe operations with retry logic
‚úÖ Clear naming: pair_daily_av.csv (av = Alpha Vantage)
"""

import os
import sys
import time
import hashlib
import requests
import subprocess
import threading
import urllib.parse
from pathlib import Path
from datetime import datetime, timezone
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np

# ======================================================
# üÜï SKIP CHECK - Exit early if not needed
# ======================================================
SKIP_ALPHA_VANTAGE = os.environ.get("SKIP_ALPHA_VANTAGE", "false").lower() == "true"

if SKIP_ALPHA_VANTAGE:
    print("=" * 70)
    print("‚è≠Ô∏è  ALPHA VANTAGE SKIPPED (runs separately at midnight)")
    print("=" * 70)
    print("‚ÑπÔ∏è  Alpha Vantage daily data doesn't change hourly")
    print("‚ÑπÔ∏è  Using existing data from last midnight run")
    print("=" * 70)
    sys.exit(0)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION
# ======================================================
print("=" * 70)
print("üöÄ Alpha Vantage FX Data Fetcher - Daily Optimized v2.0")
print("=" * 70)

try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üìç Environment: {ENV_NAME}")
print(f"‚è∞ Current Time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S UTC')}")
print(f"üîÑ Fetch Mode: Daily (saves API calls)")
print("=" * 70)

# ======================================================
# 2Ô∏è‚É£ PATH CONFIGURATION
# ======================================================
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# Directory structure
DIRECTORIES = {
    "data_raw_alpha": SAVE_FOLDER / "data" / "raw" / "alpha_vantage",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "alpha_vantage",
}

for dir_path in DIRECTORIES.values():
    dir_path.mkdir(parents=True, exist_ok=True)

CSV_FOLDER = DIRECTORIES["data_raw_alpha"]
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üìä Alpha Vantage CSV: {CSV_FOLDER}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ DATA QUALITY VALIDATOR
# ======================================================
class DataQualityValidator:
    """Validate data quality before saving"""

    MIN_ROWS = 50
    MIN_PRICE_CV = 0.01  # 0.01% minimum variation
    MIN_UNIQUE_RATIO = 0.01  # 1% unique prices
    MIN_TRUE_RANGE = 1e-10
    MIN_QUALITY_SCORE = 40.0

    @staticmethod
    def validate_dataframe(df, pair):
        """
        Validate DataFrame quality
        Returns: (is_valid, quality_score, metrics, issues)
        """
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            issues.append(f"Too few rows: {len(df)}")

        required_cols = ['open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            issues.append(f"Missing columns: {missing_cols}")
            return False, 0.0, metrics, issues

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            issues.append("No valid OHLC data")
            return False, 0.0, metrics, issues

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean']) * 100 if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))
        metrics['true_range_mean'] = float(np.mean(tr))

        # Quality score (0-100)
        quality_score = 0.0
        quality_score += metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 1.0:
            quality_score += 30
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 1.0) * 30

        quality_score += min(metrics['unique_ratio'] * 20, 20)

        if metrics['true_range_median'] >= 1e-5:
            quality_score += 20
        elif metrics['true_range_median'] >= DataQualityValidator.MIN_TRUE_RANGE:
            quality_score += (metrics['true_range_median'] / 1e-5) * 20

        metrics['quality_score'] = quality_score
        is_valid = (quality_score >= DataQualityValidator.MIN_QUALITY_SCORE)

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 4Ô∏è‚É£ GITHUB CONFIGURATION
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
    except:
        pass

if FOREX_PAT:
    print("‚úÖ GitHub credentials configured")
else:
    print("‚ö†Ô∏è Warning: FOREX_PAT not found")

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
               capture_output=True, check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
               capture_output=True, check=False)

# ======================================================
# 5Ô∏è‚É£ ALPHA VANTAGE CONFIGURATION
# ======================================================
ALPHA_VANTAGE_KEY = os.environ.get("ALPHA_VANTAGE_KEY")

if not ALPHA_VANTAGE_KEY and IN_COLAB:
    try:
        from google.colab import userdata
        ALPHA_VANTAGE_KEY = userdata.get("ALPHA_VANTAGE_KEY")
        if ALPHA_VANTAGE_KEY:
            os.environ["ALPHA_VANTAGE_KEY"] = ALPHA_VANTAGE_KEY
    except:
        pass

if not ALPHA_VANTAGE_KEY:
    raise ValueError("‚ùå ALPHA_VANTAGE_KEY is required")

print(f"‚úÖ Alpha Vantage API key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
print(f"üìä Fetching {len(FX_PAIRS)} pairs: {', '.join(FX_PAIRS)}")
print(f"üí° Daily API usage: {len(FX_PAIRS)} requests/day (16% of 25 limit)")

lock = threading.Lock()

# ======================================================
# 6Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return df

    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)

    return df

def file_hash(filepath, chunk_size=8192):
    """Calculate MD5 hash of file to detect changes"""
    if not filepath.exists():
        return None

    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)

    return md5.hexdigest()

def fetch_alpha_vantage_fx(pair, outputsize='full', max_retries=3, retry_delay=5):
    """
    Fetch FX data from Alpha Vantage API with retry logic

    Returns:
        DataFrame with OHLC data or empty DataFrame on failure
    """
    base_url = 'https://www.alphavantage.co/query'
    from_currency, to_currency = pair.split('/')

    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'outputsize': outputsize,
        'datatype': 'json',
        'apikey': ALPHA_VANTAGE_KEY
    }

    for attempt in range(max_retries):
        try:
            print(f"  üîΩ Fetching {pair} (attempt {attempt + 1}/{max_retries})...")

            r = requests.get(base_url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()

            if 'Error Message' in data:
                raise ValueError(f"API Error: {data['Error Message']}")

            if 'Note' in data:
                print(f"  ‚ö†Ô∏è API rate limit reached for {pair}")
                if attempt < max_retries - 1:
                    time.sleep(retry_delay * 2)
                    continue
                return pd.DataFrame()

            if 'Time Series FX (Daily)' not in data:
                raise ValueError(f"Unexpected response format: {list(data.keys())}")

            ts = data['Time Series FX (Daily)']
            df = pd.DataFrame(ts).T
            df.index = pd.to_datetime(df.index)
            df = df.sort_index()

            df = df.rename(columns={
                '1. open': 'open',
                '2. high': 'high',
                '3. low': 'low',
                '4. close': 'close'
            })

            df = df.astype(float)
            df = ensure_tz_naive(df)

            print(f"  ‚úÖ Fetched {len(df)} rows for {pair}")
            return df

        except requests.RequestException as e:
            print(f"  ‚ö†Ô∏è Network error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                return pd.DataFrame()

        except Exception as e:
            print(f"  ‚ö†Ô∏è Error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                return pd.DataFrame()

    return pd.DataFrame()

# ======================================================
# 7Ô∏è‚É£ PAIR PROCESSING WITH QUALITY VALIDATION
# ======================================================
def process_pair(pair):
    """
    Process single FX pair: fetch, validate quality, merge, save

    Returns:
        Tuple of (filepath if changed, status message, quality_score)
    """
    print(f"\nüîÑ Processing {pair}...")

    filename = pair.replace("/", "_") + "_daily_av.csv"
    file_path = CSV_FOLDER / filename

    # Load existing data
    existing_df = pd.DataFrame()
    if file_path.exists():
        try:
            existing_df = pd.read_csv(file_path, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
            print(f"  üìä Loaded {len(existing_df)} existing rows")
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    old_hash = file_hash(file_path)

    # Fetch new data
    new_df = fetch_alpha_vantage_fx(pair)

    if new_df.empty:
        return None, f"‚ùå {pair}: No data fetched", 0.0

    # Merge with existing data
    if not existing_df.empty:
        combined_df = pd.concat([existing_df, new_df])
        combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
    else:
        combined_df = new_df

    combined_df.sort_index(inplace=True)

    # Validate quality
    is_valid, quality_score, metrics, issues = validator.validate_dataframe(
        combined_df, pair
    )

    print(f"  üìä Quality score: {quality_score:.1f}/100")

    if not is_valid:
        print(f"  ‚ö†Ô∏è Quality issues: {'; '.join(issues[:2])}")
        print(f"     CV: {metrics.get('price_cv', 0):.4f}%, Unique: {metrics.get('unique_ratio', 0):.1%}")

        if quality_score < DataQualityValidator.MIN_QUALITY_SCORE:
            print(f"  ‚ùå Data quality too low - quarantining")

            quarantine_file = QUARANTINE_FOLDER / f"{filename}.bad"
            with lock:
                combined_df.to_csv(quarantine_file)

                report_file = QUARANTINE_FOLDER / f"{filename}.quality.txt"
                with open(report_file, 'w') as f:
                    f.write(f"Quality Report for {pair} (Alpha Vantage)\n")
                    f.write(f"{'='*50}\n")
                    f.write(f"Quality Score: {quality_score:.1f}/100\n")
                    f.write(f"Issues: {'; '.join(issues)}\n")
                    f.write(f"\nMetrics:\n")
                    for k, v in metrics.items():
                        f.write(f"  {k}: {v}\n")

            return None, f"‚ùå {pair}: Quality too low ({quality_score:.1f}/100)", quality_score

    # Save the file
    with lock:
        combined_df.to_csv(file_path)

    new_hash = file_hash(file_path)
    changed = (old_hash != new_hash)

    status = "‚úÖ Updated" if changed else "‚ÑπÔ∏è No changes"
    print(f"  {status} - {len(combined_df)} rows, quality: {quality_score:.1f}/100")

    return (str(file_path) if changed else None), f"{status} {pair} ({len(combined_df)} rows, Q:{quality_score:.0f})", quality_score

# ======================================================
# 8Ô∏è‚É£ EXECUTION WITH RATE LIMITING
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Fetching FX data with quality validation...")
print("=" * 70)

changed_files = []
results = []
quality_scores = {}

# Sequential processing with delays to respect rate limits
for pair in FX_PAIRS:
    try:
        filepath, message, quality = process_pair(pair)
        results.append(message)
        if filepath:
            changed_files.append(filepath)
            quality_scores[filepath] = quality

        # Rate limiting: Wait 15 seconds between requests
        if pair != FX_PAIRS[-1]:  # Don't wait after last pair
            print(f"\n‚è≥ Waiting 15 seconds (rate limiting)...")
            time.sleep(15)

    except Exception as e:
        print(f"‚ùå {pair} processing failed: {e}")
        results.append(f"‚ùå {pair}: Failed")

# ======================================================
# 9Ô∏è‚É£ RESULTS SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

print(f"\nTotal pairs processed: {len(FX_PAIRS)}")
print(f"Files updated: {len(changed_files)}")
print(f"API calls made: {len(FX_PAIRS)}")

if quality_scores:
    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT")
    print("=" * 70)
    avg_quality = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average quality score: {avg_quality:.1f}/100")

    print(f"\nFiles by quality:")
    for fname, score in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        print(f"  {'‚úÖ' if score >= 60 else '‚ö†Ô∏è'} {Path(fname).name}: {score:.1f}/100")

quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# üîü GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Handled by workflow")
    print("=" * 70)

elif changed_files and FOREX_PAT:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"üìä Alpha Vantage daily update - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg Q:{avg_quality:.0f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")

            SAFE_PAT = urllib.parse.quote(FOREX_PAT)
            REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

            for attempt in range(3):
                print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/3)...")
                result = subprocess.run(
                    ["git", "push", REPO_URL, BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print("‚úÖ Successfully pushed to GitHub")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", REPO_URL, BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)

    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        os.chdir(SAVE_FOLDER)

else:
    print("\n‚ÑπÔ∏è No changes to commit")

# ======================================================
# ‚úÖ COMPLETION
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ ALPHA VANTAGE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Files updated: {len(changed_files)}")
print(f"Quality validated: ‚úÖ")
if quality_scores:
    print(f"Average quality: {avg_quality:.1f}/100")
print(f"API calls: {len(FX_PAIRS)}/25 daily limit")
print(f"Status: {'‚úÖ Success' if len(results) == len(FX_PAIRS) else '‚ö†Ô∏è Partial'}")
print("=" * 70)
print("\nüí° Optimization Summary:")
print("   ‚Ä¢ Runs once daily at midnight")
print("   ‚Ä¢ Uses 4 API calls/day (16% of limit)")
print("   ‚Ä¢ Saves 44 calls/day compared to hourly fetching")
print("   ‚Ä¢ Daily OHLC data doesn't change intraday")
print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
YFINANCE FX DATA FETCHER - CLEAN STRUCTURE EDITION
===================================================
‚úÖ Aligned with clean repo structure (data/raw/yfinance)
‚úÖ Relaxed quality thresholds for more data acceptance
‚úÖ Automatic OHLC logic fixing
‚úÖ Enhanced fallback options
‚úÖ Smart data cleaning before validation
‚úÖ Better symbol format handling
‚úÖ Multi-environment support (Colab, GHA, Local)
"""

import os
import time
import hashlib
import subprocess
import shutil
import threading
import urllib.parse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

print("=" * 70)
print("üöÄ YFinance FX Data Fetcher - Clean Structure Edition")
print("=" * 70)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION (MATCHES YOUR SETUP!)
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ UNIFIED PATH CONFIGURATION (MATCHES CLEAN STRUCTURE!)
# ======================================================
if IN_COLAB:
    print("‚òÅÔ∏è Google Colab detected - using clean structure")
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"  # ‚úÖ MATCHES!
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    print("ü§ñ GitHub Actions detected - using repository root")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    print("üíª Local environment detected - using clean structure")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# ‚úÖ CREATE ORGANIZED DIRECTORY STRUCTURE
DIRECTORIES = {
    "data_raw_yfinance": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "yfinance",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Export key paths
CSV_FOLDER = DIRECTORIES["data_raw_yfinance"]  # ‚úÖ YFinance CSVs here
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üì¶ Repo Folder: {REPO_FOLDER}")
print(f"üìä YFinance CSV: {CSV_FOLDER}")
print(f"üóëÔ∏è Quarantine: {QUARANTINE_FOLDER}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GIT CONFIGURATION
# ======================================================
GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if in Colab and PAT not found
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secrets")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not access Colab secrets: {e}")

if not FOREX_PAT:
    raise ValueError("‚ùå FOREX_PAT is required!")

SAFE_PAT = urllib.parse.quote(FOREX_PAT)
REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# Configure git
subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
               capture_output=True, check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
               capture_output=True, check=False)

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 4Ô∏è‚É£ REPOSITORY MANAGEMENT (SIMPLIFIED)
# ======================================================
def ensure_repository():
    """Ensure repository is available and up-to-date"""
    if IN_GHA:
        print("\nü§ñ GitHub Actions: Repository already available")
        if not (REPO_FOLDER / ".git").exists():
            print("‚ö†Ô∏è Warning: .git directory not found")
        else:
            print("‚úÖ Git repository verified")
        return

    print("\nüì• Managing repository...")

    if REPO_FOLDER.exists() and not (REPO_FOLDER / ".git").exists():
        print("‚ö†Ô∏è Directory exists but is not a git repository")
        return

    if (REPO_FOLDER / ".git").exists():
        print(f"üîÑ Pulling latest changes...")
        try:
            result = subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "pull", "origin", BRANCH],
                capture_output=True,
                text=True,
                timeout=30
            )
            if result.returncode == 0:
                print("‚úÖ Repository updated successfully")
            else:
                print(f"‚ö†Ô∏è Pull had issues, continuing anyway")
        except Exception as e:
            print(f"‚ö†Ô∏è Update failed: {e} - continuing with existing repo")
    else:
        print("‚ö†Ô∏è Repository not found. This script expects the repo to be set up first.")
        print("   Please run the GitHub Sync script first!")

ensure_repository()

# ======================================================
# 5Ô∏è‚É£ RATE LIMITER
# ======================================================
class RateLimiter:
    """Rate limiter for API calls"""
    def __init__(self, requests_per_minute=10, requests_per_hour=350):
        self.rpm = requests_per_minute
        self.rph = requests_per_hour
        self.request_times = []
        self.hourly_request_times = []
        self.lock = threading.Lock()
        self.total_requests = 0

    def wait_if_needed(self):
        with self.lock:
            now = time.time()
            self.request_times = [t for t in self.request_times if now - t < 60]
            self.hourly_request_times = [t for t in self.hourly_request_times if now - t < 3600]

            if len(self.request_times) >= self.rpm:
                wait_time = 60 - (now - self.request_times[0])
                if wait_time > 0:
                    time.sleep(wait_time + 1)
                    self.request_times = []

            if len(self.hourly_request_times) >= self.rph:
                wait_time = 3600 - (now - self.hourly_request_times[0])
                if wait_time > 0:
                    time.sleep(wait_time + 1)
                    self.hourly_request_times = []

            self.request_times.append(now)
            self.hourly_request_times.append(now)
            self.total_requests += 1
            time.sleep(1.0 + (hash(str(now)) % 20) / 10)

    def get_stats(self):
        with self.lock:
            return {'total_requests': self.total_requests}

rate_limiter = RateLimiter()

# ======================================================
# 6Ô∏è‚É£ DATA CLEANING & VALIDATION
# ======================================================
def fix_ohlc_logic(df):
    """Fix impossible OHLC relationships"""
    if df is None or df.empty:
        return df

    df = df.copy()
    required_cols = ['open', 'high', 'low', 'close']

    if not all(col in df.columns for col in required_cols):
        return df

    # Fix High: should be maximum of OHLC
    df['high'] = df[required_cols].max(axis=1)

    # Fix Low: should be minimum of OHLC
    df['low'] = df[required_cols].min(axis=1)

    return df

class DataQualityValidator:
    """RELAXED validation for more data acceptance"""

    # ‚úÖ RELAXED THRESHOLDS
    MIN_ROWS = 5  # Down from 10
    MIN_PRICE_CV = 0.01  # Down from 0.1 (1% instead of 10%)
    MIN_UNIQUE_RATIO = 0.005  # Down from 0.05 (0.5% instead of 5%)
    MIN_TRUE_RANGE = 1e-12  # More lenient
    MIN_QUALITY_SCORE = 20.0  # Down from 40.0

    @staticmethod
    def validate_dataframe(df, pair, tf_name):
        """Validate with relaxed criteria"""
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            return False, 0.0, metrics, [f"Too few rows: {len(df)}"]

        required_cols = ['open', 'high', 'low', 'close']
        if not all(col in df.columns for col in required_cols):
            return False, 0.0, metrics, ["Missing OHLC columns"]

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            return False, 0.0, metrics, ["No valid OHLC data"]

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean']) * 100 if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        # Calculate true range
        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))

        # Quality score calculation (more lenient)
        quality_score = metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 0.5:
            quality_score += 40
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 0.5) * 40

        if metrics['unique_ratio'] >= 0.1:
            quality_score += 30
        elif metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO:
            quality_score += (metrics['unique_ratio'] / 0.1) * 30

        metrics['quality_score'] = quality_score

        # Relaxed validation - accept if meets minimum thresholds
        is_valid = (
            quality_score >= DataQualityValidator.MIN_QUALITY_SCORE and
            metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV and
            metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO
        )

        if not is_valid:
            if metrics['price_cv'] < DataQualityValidator.MIN_PRICE_CV:
                issues.append(f"Low CV: {metrics['price_cv']:.4f}%")
            if metrics['unique_ratio'] < DataQualityValidator.MIN_UNIQUE_RATIO:
                issues.append(f"Low unique: {metrics['unique_ratio']:.3%}")

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 7Ô∏è‚É£ CONFIGURATION
# ======================================================
FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# ‚úÖ ENHANCED with more fallback options
TIMEFRAMES = {
    "1d_5y": [
        ("1d", "5y"),
        ("1d", "max"),  # Try max available
        ("1d", "3y"),
        ("1d", "2y"),
    ],
    "1h_2y": [
        ("1h", "2y"),
        ("1h", "1y"),
        ("1h", "730d"),  # Exactly 2 years in days
        ("1h", "6mo")
    ],
    "15m_60d": [
        ("15m", "60d"),
        ("15m", "2mo"),
        ("15m", "30d"),
    ],
    "5m_1mo": [
        ("5m", "1mo"),
        ("5m", "30d"),
        ("5m", "14d"),
    ],
    "1m_7d": [
        ("1m", "7d"),
        ("1m", "5d"),
        ("1m", "3d"),
    ]
}

print(f"\nüìä Configuration:")
print(f"   Pairs: {len(FX_PAIRS)}")
print(f"   Timeframes: {len(TIMEFRAMES)}")
print(f"   Total tasks: {len(FX_PAIRS) * len(TIMEFRAMES)}")
print(f"   Quality threshold: {validator.MIN_QUALITY_SCORE}/100 (RELAXED)")
print("=" * 70)

lock = threading.Lock()

# ======================================================
# 8Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def file_hash(filepath):
    """Calculate MD5 hash of file"""
    if not filepath.exists():
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(8192), b""):
            md5.update(chunk)
    return md5.hexdigest()

def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)
    return df

def merge_data(existing_df, new_df):
    """Merge existing and new data, removing duplicates"""
    existing_df = ensure_tz_naive(existing_df)
    new_df = ensure_tz_naive(new_df)
    if existing_df.empty:
        return new_df
    if new_df.empty:
        return existing_df
    combined = pd.concat([existing_df, new_df])
    combined = combined[~combined.index.duplicated(keep="last")]
    combined.sort_index(inplace=True)
    return combined

def get_symbol_variants(pair, interval):
    """Get multiple symbol format variations"""
    base_symbol = pair.replace("/", "") + "=X"
    variants = [base_symbol]

    # Additional formats
    if interval in ["1d", "1h"]:
        from_curr, to_curr = pair.split("/")
        variants.append(f"{from_curr}{to_curr}=X")  # No separator
        variants.append(f"{from_curr}=X")  # Just base currency

    return variants

# ======================================================
# 9Ô∏è‚É£ WORKER FUNCTION
# ======================================================
def process_pair_tf(pair, tf_name, interval_period_options, max_retries=3):
    """
    Download YFinance data with OHLC fixing and validation

    ‚úÖ Saves to data/raw/yfinance/ with clear naming

    Returns:
        Tuple of (message, filepath if changed, quality_score)
    """
    # ‚úÖ Save to YFinance folder
    filename = f"{pair.replace('/', '_')}_{tf_name}.csv"
    filepath = CSV_FOLDER / filename

    existing_df = pd.DataFrame()
    if filepath.exists():
        try:
            existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    old_hash = file_hash(filepath)

    for option_idx, (interval, period) in enumerate(interval_period_options):
        symbol_variants = get_symbol_variants(pair, interval)

        for symbol in symbol_variants:
            for attempt in range(max_retries):
                try:
                    rate_limiter.wait_if_needed()

                    ticker = yf.Ticker(symbol)
                    df = ticker.history(
                        period=period,
                        interval=interval,
                        auto_adjust=False,
                        prepost=False,
                        actions=False,
                        raise_errors=False
                    )

                    if df.empty:
                        raise ValueError("Empty data")

                    available_cols = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume']
                                     if c in df.columns]
                    df = df[available_cols]
                    df.rename(columns=lambda x: x.lower(), inplace=True)
                    df = ensure_tz_naive(df)

                    combined_df = merge_data(existing_df, df)

                    # ‚úÖ FIX OHLC LOGIC BEFORE VALIDATION
                    combined_df = fix_ohlc_logic(combined_df)

                    is_valid, quality_score, metrics, issues = validator.validate_dataframe(
                        combined_df, pair, tf_name
                    )

                    if not is_valid:
                        if attempt < max_retries - 1:
                            time.sleep(3 * (2 ** attempt))
                            continue
                        elif option_idx < len(interval_period_options) - 1:
                            break  # Try next option
                        else:
                            # Save anyway but mark as low quality
                            print(f"  ‚ö†Ô∏è Low quality ({quality_score:.1f}) but saving: {pair} {tf_name}")

                    # Save the file
                    with lock:
                        combined_df.to_csv(filepath)

                    new_hash = file_hash(filepath)
                    changed = (old_hash != new_hash)

                    status = "‚úÖ" if quality_score >= 50 else "‚ö†Ô∏è"
                    msg = f"{status} {pair} {tf_name} - {len(combined_df)} rows, Q:{quality_score:.0f}"
                    print(f"  {msg}")
                    return msg, str(filepath) if changed else None, quality_score

                except Exception as e:
                    if attempt < max_retries - 1:
                        time.sleep(3 * (2 ** attempt))
                    else:
                        if option_idx < len(interval_period_options) - 1:
                            break  # Try next option

    return f"‚ùå Failed {pair} {tf_name}", None, 0.0

# ======================================================
# üîü PARALLEL EXECUTION
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Starting YFinance data download...")
print("=" * 70 + "\n")

start_time = time.time()
changed_files = []
results = []
quality_scores = {}

with ThreadPoolExecutor(max_workers=2) as executor:
    tasks = []
    for pair in FX_PAIRS:
        for tf_name, options in TIMEFRAMES.items():
            tasks.append(executor.submit(process_pair_tf, pair, tf_name, options))

    for future in as_completed(tasks):
        try:
            msg, filename, quality = future.result()
            results.append(msg)
            if filename:
                changed_files.append(filename)
                quality_scores[filename] = quality
        except Exception as e:
            results.append(f"‚ùå Error: {e}")

elapsed_time = time.time() - start_time

# ======================================================
# 1Ô∏è‚É£1Ô∏è‚É£ SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

success_count = len([r for r in results if "‚úÖ" in r or "‚ö†Ô∏è" in r])
print(f"\nTotal tasks: {len(results)}")
print(f"Successful: {success_count}/{len(results)}")
print(f"Files updated: {len(changed_files)}")
print(f"Time: {elapsed_time/60:.1f} min")

if quality_scores:
    avg_q = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average quality: {avg_q:.1f}/100")

    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT")
    print("=" * 70)
    for fname, score in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        status = "‚úÖ" if score >= 50 else "‚ö†Ô∏è"
        print(f"  {status} {Path(fname).name}: {score:.1f}/100")

# Check quarantine
quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n" + "=" * 70)
    print(f"‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    print("=" * 70)
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# 1Ô∏è‚É£2Ô∏è‚É£ GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("=" * 70)

elif changed_files:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"Update YFinance data - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg Q:{avg_q:.0f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")

            for attempt in range(3):
                print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/3)...")
                result = subprocess.run(
                    ["git", "push", "origin", BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print("‚úÖ Successfully pushed to GitHub")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)
        else:
            print("‚ÑπÔ∏è  No changes to commit")

    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        os.chdir(SAVE_FOLDER)

else:
    print("\n‚ÑπÔ∏è No changes to commit")

# ======================================================
# ‚úÖ COMPLETION
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ YFINANCE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Files updated: {len(changed_files)}")
print(f"Quality validated: ‚úÖ")
if quality_scores:
    print(f"Average quality: {avg_q:.1f}/100")
print(f"Status: {'‚úÖ Success' if success_count == len(results) else '‚ö†Ô∏è Partial'}")
print(f"Rate limiter: {rate_limiter.get_stats()['total_requests']} requests")
print("=" * 70)
print("\nüìÅ Clean File Structure:")
print(f"   YFinance: {CSV_FOLDER}")
print(f"   ‚îî‚îÄ‚îÄ EUR_USD_1d_5y.csv, EUR_USD_1h_2y.csv, etc.")
print(f"   Alpha Vantage: {SAVE_FOLDER / 'data' / 'raw' / 'alpha_vantage'}")
print(f"   ‚îî‚îÄ‚îÄ EUR_USD_daily_av.csv")
print("\nüéØ All data sources in organized folders!")
print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
FX CSV Combiner + Multi-Type Handler - CLEAN STRUCTURE EDITION
==============================================================
‚úÖ Aligned with clean repo structure (data/raw/, data/processed/)
‚úÖ Combines Alpha Vantage + YFinance data
‚úÖ Full-dataset indicator calculation (not incremental)
‚úÖ ATR preservation (no clipping or scaling)
‚úÖ Quality validation before processing
‚úÖ Multi-environment support (Colab, GHA, Local)
"""

import os
import time
import hashlib
import subprocess
import shutil
import urllib.parse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
import ta
from ta.momentum import WilliamsRIndicator
from ta.volatility import AverageTrueRange
import warnings

warnings.filterwarnings('ignore')

print("=" * 70)
print("üîß CSV Combiner & Multi-Type Handler - Clean Structure Edition")
print("=" * 70)

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ UNIFIED PATH CONFIGURATION (MATCHES CLEAN STRUCTURE!)
# ======================================================
if IN_COLAB:
    print("‚òÅÔ∏è Google Colab detected - using clean structure")
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    print("ü§ñ GitHub Actions detected - using repository root")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    print("üíª Local environment detected - using clean structure")
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

# ‚úÖ CREATE ORGANIZED DIRECTORY STRUCTURE
DIRECTORIES = {
    "data_raw_yfinance": SAVE_FOLDER / "data" / "raw" / "yfinance",
    "data_raw_alpha": SAVE_FOLDER / "data" / "raw" / "alpha_vantage",
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
    "quarantine": SAVE_FOLDER / "data" / "quarantine" / "combiner",
}

# Create all directories
for dir_name, dir_path in DIRECTORIES.items():
    dir_path.mkdir(parents=True, exist_ok=True)

# Export key paths
YFINANCE_CSV_FOLDER = DIRECTORIES["data_raw_yfinance"]
ALPHA_CSV_FOLDER = DIRECTORIES["data_raw_alpha"]
PICKLE_FOLDER = DIRECTORIES["data_processed"]
QUARANTINE_FOLDER = DIRECTORIES["quarantine"]
LOG_FOLDER = DIRECTORIES["logs"]

print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üì¶ Repo Folder: {REPO_FOLDER}")
print(f"üìä YFinance CSV: {YFINANCE_CSV_FOLDER}")
print(f"üìä Alpha CSV: {ALPHA_CSV_FOLDER}")
print(f"üîß Processed: {PICKLE_FOLDER}")
print(f"üóëÔ∏è Quarantine: {QUARANTINE_FOLDER}")
print("=" * 70)

lock = threading.Lock()

def print_status(msg, level="info"):
    """Print status messages with icons"""
    levels = {"info": "‚ÑπÔ∏è", "success": "‚úÖ", "warn": "‚ö†Ô∏è", "error": "‚ùå", "debug": "üêû"}
    print(f"{levels.get(level, '‚ÑπÔ∏è')} {msg}")

# ======================================================
# 3Ô∏è‚É£ DATA QUALITY VALIDATOR
# ======================================================
class DataQualityValidator:
    """Validate data quality for OHLC files"""

    MIN_ROWS = 10
    MIN_PRICE_CV = 0.01  # 0.01% minimum (relaxed)
    MIN_UNIQUE_RATIO = 0.005  # 0.5% unique prices (relaxed)
    MIN_TRUE_RANGE = 1e-10
    MIN_QUALITY_SCORE = 20.0  # Relaxed from 30

    @staticmethod
    def validate_dataframe(df, filename):
        """Validate DataFrame quality"""
        if df is None or df.empty:
            return False, 0.0, {}, ["Empty DataFrame"]

        issues = []
        metrics = {}

        metrics['row_count'] = len(df)
        if len(df) < DataQualityValidator.MIN_ROWS:
            issues.append(f"Too few rows: {len(df)}")

        required_cols = ['open', 'high', 'low', 'close']
        missing_cols = [col for col in required_cols if col not in df.columns]
        if missing_cols:
            issues.append(f"Missing columns: {missing_cols}")
            return False, 0.0, metrics, issues

        ohlc_data = df[required_cols].dropna()
        if len(ohlc_data) == 0:
            issues.append("No valid OHLC data")
            return False, 0.0, metrics, issues

        metrics['valid_rows'] = len(ohlc_data)
        metrics['valid_ratio'] = len(ohlc_data) / len(df)

        close_prices = ohlc_data['close']
        metrics['price_mean'] = float(close_prices.mean())
        metrics['price_std'] = float(close_prices.std())
        metrics['price_cv'] = (metrics['price_std'] / metrics['price_mean'] * 100) if metrics['price_mean'] > 0 else 0.0

        metrics['unique_prices'] = close_prices.nunique()
        metrics['unique_ratio'] = metrics['unique_prices'] / len(close_prices)

        high = ohlc_data['high'].values
        low = ohlc_data['low'].values
        close = ohlc_data['close'].values

        tr = np.maximum.reduce([
            high - low,
            np.abs(high - np.roll(close, 1)),
            np.abs(low - np.roll(close, 1))
        ])
        tr[0] = high[0] - low[0]

        metrics['true_range_median'] = float(np.median(tr))

        quality_score = 0.0
        quality_score += metrics['valid_ratio'] * 30

        if metrics['price_cv'] >= 0.5:
            quality_score += 40
        elif metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV:
            quality_score += (metrics['price_cv'] / 0.5) * 40

        if metrics['unique_ratio'] >= 0.1:
            quality_score += 30
        elif metrics['unique_ratio'] >= DataQualityValidator.MIN_UNIQUE_RATIO:
            quality_score += (metrics['unique_ratio'] / 0.1) * 30

        metrics['quality_score'] = quality_score

        is_valid = (
            quality_score >= DataQualityValidator.MIN_QUALITY_SCORE and
            metrics['price_cv'] >= DataQualityValidator.MIN_PRICE_CV
        )

        if not is_valid:
            if metrics['price_cv'] < DataQualityValidator.MIN_PRICE_CV:
                issues.append(f"Low CV: {metrics['price_cv']:.4f}%")
            if metrics['unique_ratio'] < DataQualityValidator.MIN_UNIQUE_RATIO:
                issues.append(f"Low unique: {metrics['unique_ratio']:.3%}")

        return is_valid, quality_score, metrics, issues

validator = DataQualityValidator()

# ======================================================
# 4Ô∏è‚É£ GIT CONFIGURATION
# ======================================================
GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT")

if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secrets")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not access Colab secrets: {e}")

if FOREX_PAT:
    subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
                   capture_output=True, check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
                   capture_output=True, check=False)
    print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 5Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return pd.DataFrame()

    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_localize(None)

    return df

def safe_numeric(df):
    """Handle infinity/NaN robustly"""
    df_clean = df.copy()
    df_clean.replace([np.inf, -np.inf], np.nan, inplace=True)

    required_columns = ['open', 'high', 'low', 'close']
    existing_columns = [col for col in required_columns if col in df_clean.columns]

    if existing_columns:
        df_clean.dropna(subset=existing_columns, inplace=True)
    else:
        df_clean.dropna(how='all', inplace=True)

    return df_clean

# ======================================================
# 6Ô∏è‚É£ CSV DISCOVERY
# ======================================================
def discover_csv_files():
    """Discover CSV files from both YFinance and Alpha Vantage folders"""
    csv_files = []

    # Search in YFinance folder
    yf_files = list(YFINANCE_CSV_FOLDER.glob("*.csv"))
    if yf_files:
        print_status(f"üìÇ Found {len(yf_files)} YFinance CSV(s)", "debug")
        csv_files.extend(yf_files)

    # Search in Alpha Vantage folder
    alpha_files = list(ALPHA_CSV_FOLDER.glob("*.csv"))
    if alpha_files:
        print_status(f"üìÇ Found {len(alpha_files)} Alpha Vantage CSV(s)", "debug")
        csv_files.extend(alpha_files)

    return csv_files

# ======================================================
# 7Ô∏è‚É£ INDICATOR CALCULATION (FULL DATASET)
# ======================================================
def add_indicators_full(df):
    """
    ‚úÖ Calculate indicators on FULL dataset (not incremental)
    ‚úÖ ATR preserved without clipping or scaling
    """
    if df.empty:
        return None

    required_cols = ['open', 'high', 'low', 'close']
    if not all(col in df.columns for col in required_cols):
        return None

    df = safe_numeric(df)
    if df.empty:
        return None

    df = df.copy()
    df.sort_index(inplace=True)

    # Preserve raw prices
    for col in ['open', 'high', 'low', 'close']:
        if col in df.columns and f'raw_{col}' not in df.columns:
            df[f'raw_{col}'] = df[col].copy()

    print_status(f"  üîß Calculating indicators on {len(df)} rows", "debug")

    try:
        # Trend indicators
        if len(df) >= 10:
            df['SMA_10'] = ta.trend.sma_indicator(df['close'], 10)
            df['EMA_10'] = ta.trend.ema_indicator(df['close'], 10)

        if len(df) >= 20:
            df['SMA_20'] = ta.trend.sma_indicator(df['close'], 20)
            df['EMA_20'] = ta.trend.ema_indicator(df['close'], 20)

        if len(df) >= 50:
            df['SMA_50'] = ta.trend.sma_indicator(df['close'], 50)
            df['EMA_50'] = ta.trend.ema_indicator(df['close'], 50)

        if len(df) >= 200:
            df['SMA_200'] = ta.trend.sma_indicator(df['close'], 200)

        # MACD
        if len(df) >= 26:
            macd = ta.trend.MACD(df['close'])
            df['MACD'] = macd.macd()
            df['MACD_signal'] = macd.macd_signal()
            df['MACD_diff'] = macd.macd_diff()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Trend indicator error: {e}", "warn")

    try:
        # Momentum indicators
        if len(df) >= 14:
            df['RSI_14'] = ta.momentum.rsi(df['close'], 14)
            df['Williams_%R'] = WilliamsRIndicator(
                df['high'], df['low'], df['close'], 14
            ).williams_r()
            df['Stoch_K'] = ta.momentum.stoch(df['high'], df['low'], df['close'], 14)
            df['Stoch_D'] = ta.momentum.stoch_signal(df['high'], df['low'], df['close'], 14)

        if len(df) >= 20:
            df['CCI_20'] = ta.trend.cci(df['high'], df['low'], df['close'], 20)
            df['ROC'] = ta.momentum.roc(df['close'], 12)

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Momentum indicator error: {e}", "warn")

    try:
        # ‚úÖ CRITICAL: ATR calculation - NO CLIPPING!
        if len(df) >= 14:
            atr_values = AverageTrueRange(
                df['high'], df['low'], df['close'], 14
            ).average_true_range()

            # Only fill NaN, don't clip
            df['ATR'] = atr_values.fillna(1e-10)

            atr_median = df['ATR'].median()
            if pd.notna(atr_median):
                print_status(f"  üìä ATR median: {atr_median:.8f}", "debug")

        # Bollinger Bands
        if len(df) >= 20:
            bb = ta.volatility.BollingerBands(df['close'], 20, 2)
            df['BB_upper'] = bb.bollinger_hband()
            df['BB_middle'] = bb.bollinger_mavg()
            df['BB_lower'] = bb.bollinger_lband()
            df['BB_width'] = bb.bollinger_wband()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Volatility indicator error: {e}", "warn")

    try:
        # Derived features
        df['price_change'] = df['close'].pct_change()
        df['price_change_5'] = df['close'].pct_change(5)
        df['high_low_range'] = (df['high'] - df['low']) / df['close']
        df['close_open_range'] = (df['close'] - df['open']) / df['open']

        if 'volume' in df.columns:
            df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()

        if 'SMA_50' in df.columns:
            df['price_vs_sma50'] = (df['close'] - df['SMA_50']) / df['SMA_50']

        if 'RSI_14' in df.columns:
            df['rsi_momentum'] = df['RSI_14'].diff()

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Derived features error: {e}", "warn")

    try:
        # ‚úÖ Scale features but PROTECT ATR and raw prices
        numeric_cols = df.select_dtypes(include=[np.number]).columns

        protected_cols = [
            'open', 'high', 'low', 'close', 'volume',
            'raw_open', 'raw_high', 'raw_low', 'raw_close',
            'ATR'  # ‚úÖ PROTECT ATR!
        ]

        scalable_cols = [c for c in numeric_cols if c not in protected_cols]

        if scalable_cols:
            df[scalable_cols] = df[scalable_cols].replace([np.inf, -np.inf], np.nan)
            cols_with_data = [c for c in scalable_cols if not df[c].isna().all()]

            if cols_with_data:
                scaler = RobustScaler()
                df[cols_with_data] = scaler.fit_transform(
                    df[cols_with_data].fillna(0) + 1e-10
                )
                print_status(f"  ‚úÖ Scaled {len(cols_with_data)} features (ATR protected)", "debug")

    except Exception as e:
        print_status(f"  ‚ö†Ô∏è Scaling error: {e}", "warn")

    return df

# ======================================================
# 8Ô∏è‚É£ MAIN PROCESSING FUNCTION
# ======================================================
def process_csv_file(csv_file):
    """Process a single CSV file: validate, combine, add indicators, save"""
    try:
        print_status(f"üìã Processing: {csv_file.name}", "info")

        # Load CSV
        df = pd.read_csv(csv_file, index_col=0, parse_dates=True)
        df = ensure_tz_naive(df)

        if df.empty:
            msg = f"‚ö†Ô∏è {csv_file.name}: Empty file"
            print_status(msg, "warn")
            return None, msg

        # ‚úÖ VALIDATE QUALITY
        is_valid, quality_score, metrics, issues = validator.validate_dataframe(df, csv_file.name)

        print_status(f"  üìä Quality score: {quality_score:.1f}/100", "debug")

        if not is_valid:
            print_status(f"  ‚ö†Ô∏è Quality issues: {'; '.join(issues[:2])}", "warn")

            # Quarantine if too low
            if quality_score < validator.MIN_QUALITY_SCORE:
                print_status(f"  ‚ùå Quarantining low quality file", "error")

                quarantine_file = QUARANTINE_FOLDER / f"{csv_file.name}.bad"
                with lock:
                    df.to_csv(quarantine_file)

                    report_file = QUARANTINE_FOLDER / f"{csv_file.name}.quality.txt"
                    with open(report_file, 'w') as f:
                        f.write(f"Quality Report for {csv_file.name}\n")
                        f.write(f"{'='*50}\n")
                        f.write(f"Quality Score: {quality_score:.1f}/100\n")
                        f.write(f"Issues: {'; '.join(issues)}\n")
                        f.write(f"\nMetrics:\n")
                        for k, v in metrics.items():
                            f.write(f"  {k}: {v}\n")

                return None, f"‚ùå {csv_file.name}: Quarantined (Q:{quality_score:.1f})"
            else:
                print_status(f"  ‚ö†Ô∏è Low quality but acceptable", "warn")

        # ‚úÖ ADD INDICATORS (FULL DATASET)
        processed_df = add_indicators_full(df)

        if processed_df is None:
            msg = f"‚ùå {csv_file.name}: Indicator calculation failed"
            print_status(msg, "error")
            return None, msg

        # ‚úÖ SAVE PROCESSED DATA
        pickle_filename = csv_file.stem + ".pkl"
        pickle_path = PICKLE_FOLDER / pickle_filename

        with lock:
            processed_df.to_pickle(pickle_path, compression='gzip', protocol=4)

        atr_median = processed_df['ATR'].median() if 'ATR' in processed_df.columns else 0
        msg = f"‚úÖ {csv_file.name}: {len(processed_df)} rows, Q:{quality_score:.0f}, ATR:{atr_median:.8f}"
        print_status(msg, "success")

        return str(pickle_path), msg

    except Exception as e:
        msg = f"‚ùå Failed {csv_file.name}: {e}"
        print_status(msg, "error")
        import traceback
        traceback.print_exc()
        return None, msg

# ======================================================
# 9Ô∏è‚É£ MAIN EXECUTION
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Discovering CSV files...")
print("=" * 70 + "\n")

csv_files = discover_csv_files()

if csv_files:
    print_status(f"üìä Total CSV files found: {len(csv_files)}", "success")
    for csv_file in csv_files[:5]:
        print_status(f"  ‚Ä¢ {csv_file.name} ({csv_file.stat().st_size / 1024:.1f} KB)", "debug")
    if len(csv_files) > 5:
        print_status(f"  ... and {len(csv_files) - 5} more", "debug")
else:
    print_status("‚ö†Ô∏è No CSV files found!", "warn")
    print_status("   Check that data fetchers have run successfully", "warn")

changed_files = []
quality_scores = {}

# ======================================================
# üîü PROCESS FILES
# ======================================================
if csv_files:
    print("\n" + "=" * 70)
    print(f"‚öôÔ∏è Processing {len(csv_files)} CSV file(s)...")
    print("=" * 70 + "\n")

    with ThreadPoolExecutor(max_workers=min(8, len(csv_files))) as executor:
        futures = [executor.submit(process_csv_file, f) for f in csv_files]

        for future in as_completed(futures):
            file, msg = future.result()
            if file:
                changed_files.append(file)
                # Extract quality info
                if "ATR:" in msg:
                    try:
                        atr_str = msg.split("ATR:")[1].strip()
                        quality_scores[file] = float(atr_str)
                    except:
                        pass

# ======================================================
# 1Ô∏è‚É£1Ô∏è‚É£ QUALITY REPORT
# ======================================================
if quality_scores:
    print("\n" + "=" * 70)
    print("üìä QUALITY REPORT - ATR VALUES")
    print("=" * 70)

    avg_atr = sum(quality_scores.values()) / len(quality_scores)
    print(f"Average ATR: {avg_atr:.8f}")
    print(f"\nATR by file:")

    for filepath, atr in sorted(quality_scores.items(), key=lambda x: x[1], reverse=True):
        filename = Path(filepath).stem
        status = "‚úÖ" if atr > 1e-6 else "‚ö†Ô∏è"
        print(f"  {status} {filename}: {atr:.8f}")

    low_atr_files = [f for f, atr in quality_scores.items() if atr < 1e-6]
    if low_atr_files:
        print(f"\n‚ö†Ô∏è  {len(low_atr_files)} file(s) with suspiciously low ATR")

# Check quarantine
quarantined = list(QUARANTINE_FOLDER.glob("*.bad"))
if quarantined:
    print(f"\n" + "=" * 70)
    print(f"‚ö†Ô∏è  QUARANTINED FILES: {len(quarantined)}")
    print("=" * 70)
    for qfile in quarantined:
        print(f"  ‚ùå {qfile.stem}")

# ======================================================
# 1Ô∏è‚É£2Ô∏è‚É£ GIT COMMIT & PUSH
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("=" * 70)

elif changed_files and FOREX_PAT:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        os.chdir(REPO_FOLDER)

        subprocess.run(["git", "add", "-A"], check=False)

        commit_msg = f"Update processed data - {len(changed_files)} files"
        if quality_scores:
            commit_msg += f" (Avg ATR: {avg_atr:.6f})"

        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print_status("‚úÖ Changes committed", "success")

            for attempt in range(3):
                print_status(f"üì§ Pushing (attempt {attempt + 1}/3)...", "info")
                result = subprocess.run(
                    ["git", "push", "origin", BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print_status("‚úÖ Push successful", "success")
                    break
                elif attempt < 2:
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)

        elif "nothing to commit" in result.stdout.lower():
            print_status("‚ÑπÔ∏è No changes to commit", "info")

    except Exception as e:
        print_status(f"‚ùå Git error: {e}", "error")
    finally:
        os.chdir(SAVE_FOLDER)

# ======================================================
# ‚úÖ COMPLETION SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ CSV COMBINER COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"CSV files found: {len(csv_files)}")
print(f"Files processed: {len(changed_files)}")
print(f"Files quarantined: {len(quarantined)}")

if quality_scores:
    print(f"\nüìà ATR Statistics:")
    print(f"   Average: {avg_atr:.8f}")
    print(f"   Files analyzed: {len(quality_scores)}")

print("\nüîß KEY FEATURES:")
print("   ‚úÖ Full-dataset indicator calculation")
print("   ‚úÖ ATR preserved (no clipping/scaling)")
print("   ‚úÖ Quality validation with quarantine")
print("   ‚úÖ Clean organized structure")
print("   ‚úÖ Thread-safe processing")

print("\nüìÅ Output Locations:")
print(f"   Processed pickles: {PICKLE_FOLDER}")
print(f"   Quarantine: {QUARANTINE_FOLDER}")

print("=" * 70)

In [None]:
#!/usr/bin/env python3
"""
ULTRA-PERSISTENT SELF-LEARNING FX PIPELINE v5.0
================================================
üéâ ZERO CORRUPTION GUARANTEE - No model files saved!

KEY CHANGES FROM v4.3:
‚úÖ Models rebuilt fresh from data each run (no pickle files)
‚úÖ No file corruption possible (no model file I/O)
‚úÖ No Git conflicts (no model files to commit)
‚úÖ Simpler code, fewer bugs
‚úÖ Always fresh predictions from latest data
‚úÖ Works perfectly in GitHub Actions, Colab, and Local

PERFORMANCE:
- Fast: SGD trains in seconds, RF limited to 50 trees
- Memory efficient: Models exist only during runtime
- Scalable: Processes 24+ pairs in under a minute
"""

import os
import time
import json
import sqlite3
import subprocess
import pickle
import gzip
from pathlib import Path
from datetime import datetime, timezone, timedelta
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

print("=" * 70)
print("üöÄ Ultra-Persistent FX Pipeline v5.0 - CORRUPTION-FREE")
print("=" * 70)

# ======================================================
# SIMPLE DATA LOADER (NO MODEL SAVING/LOADING)
# ======================================================

class SimpleDataLoader:
    """
    Loads data pickles only (not models)
    Models are rebuilt fresh each run - no corruption possible!
    """

    @staticmethod
    def load_data(filepath):
        """Load data pickle with basic validation"""
        if not filepath.exists():
            return None

        try:
            # Check if gzipped
            with open(filepath, 'rb') as f:
                magic = f.read(2)

            # Load appropriately
            if magic == b'\x1f\x8b':  # gzip magic
                with gzip.open(filepath, 'rb') as f:
                    return pickle.load(f)
            else:  # raw pickle
                with open(filepath, 'rb') as f:
                    return pickle.load(f)

        except Exception as e:
            print(f"‚ö†Ô∏è  Cannot load {filepath.name}: {e}")
            return None

# Global loader instance
data_loader = SimpleDataLoader()

# ======================================================
# ENVIRONMENT DETECTION
# ======================================================

try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ
if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Environment: {ENV_NAME}")

# Path configuration
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
    REPO_FOLDER = SAVE_FOLDER
elif IN_GHA:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER
else:
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER

DIRECTORIES = {
    "data_processed": SAVE_FOLDER / "data" / "processed",
    "database": SAVE_FOLDER / "database",
    "logs": SAVE_FOLDER / "logs",
    "outputs": SAVE_FOLDER / "outputs",
}

for dir_path in DIRECTORIES.values():
    dir_path.mkdir(parents=True, exist_ok=True)

PICKLE_FOLDER = DIRECTORIES["data_processed"]
DB_FOLDER = DIRECTORIES["database"]
PERSISTENT_DB = DB_FOLDER / "memory_v85.db"

print(f"üìÇ Base: {BASE_FOLDER}")
print(f"üíæ Save: {SAVE_FOLDER}")
print(f"üìä Data: {PICKLE_FOLDER}")
print(f"üíø Database: {PERSISTENT_DB}")
print("=" * 70)

# ======================================================
# CLEANUP OLD MODEL FILES (ONE-TIME)
# ======================================================

def cleanup_old_model_files():
    """
    Delete old model pickle files - we don't use them anymore!
    This runs once on startup to clean up legacy files
    """
    print("\nüßπ Cleaning up old model files...")

    deleted = 0
    patterns = ['*_sgd_model.pkl', '*_rf_model.pkl', '*_model.pkl']

    for pattern in patterns:
        for model_file in PICKLE_FOLDER.glob(pattern):
            try:
                model_file.unlink()
                deleted += 1
            except Exception:
                pass

    # Clean up corrupted folder
    corrupted_folder = PICKLE_FOLDER / "corrupted"
    if corrupted_folder.exists():
        try:
            import shutil
            shutil.rmtree(corrupted_folder)
        except Exception:
            pass

    if deleted > 0:
        print(f"   ‚úì Cleaned up {deleted} old model files")
    else:
        print(f"   ‚úì No old model files found")

cleanup_old_model_files()

# ======================================================
# UTILITY FUNCTIONS
# ======================================================

def is_weekend(dt=None):
    """Check if it's weekend (market closed)"""
    if dt is None:
        dt = datetime.now(timezone.utc)
    return dt.weekday() in [5, 6]

def get_trade_age_hours():
    """Get trade age threshold based on market hours"""
    return 0.5 if is_weekend() else 2.0

def is_market_open_for_trading():
    """Check if market is open"""
    return not is_weekend()

def print_status(msg, level="info"):
    """Print status with icon"""
    icons = {
        "info": "‚ÑπÔ∏è", "success": "‚úÖ", "warn": "‚ö†Ô∏è", "debug": "üêû",
        "error": "‚ùå", "data": "üìä", "weekend": "üèñÔ∏è", "trading": "üíπ"
    }
    icon = icons.get(level, '‚ÑπÔ∏è')
    print(f"{icon} {msg}")

# ======================================================
# GIT CONFIGURATION
# ======================================================

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
FOREX_PAT = os.environ.get("FOREX_PAT")

if FOREX_PAT:
    subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
                   capture_output=True, check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
                   capture_output=True, check=False)
    print_status(f"Git configured: {GIT_USER_NAME}", "success")

# ======================================================
# ML IMPORTS
# ======================================================

try:
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.linear_model import SGDClassifier
    from sklearn.ensemble import RandomForestClassifier
    print_status("ML libraries loaded", "success")
except ImportError as e:
    print_status(f"ML libraries missing: {e}", "error")
    raise

print("=" * 70)

# ======================================================
# FRESH MODEL TRAINING (NO FILE I/O)
# ======================================================

def train_and_predict_fresh(df, pair_name, timeframe):
    """
    Train models from scratch using data

    No file saving = No corruption possible!
    This is fast because:
    - SGD trains incrementally (seconds)
    - RF limited to 50 trees
    - Only processes recent data

    Args:
        df: DataFrame with features and price data
        pair_name: e.g. "EUR/USD"
        timeframe: e.g. "1h"

    Returns:
        (sgd_pred, rf_pred, confidence) or (None, None, 0.5) on error
    """
    try:
        # Prepare features
        exclude_cols = [
            'close', 'raw_close', 'raw_open', 'raw_high', 'raw_low',
            'open', 'high', 'low', 'volume', 'vwap'
        ]

        feature_cols = [c for c in df.columns if c not in exclude_cols]

        if not feature_cols or len(df) < 50:
            return None, None, 0.5

        X = df[feature_cols].fillna(0)
        y = (df['close'].diff() > 0).astype(int).fillna(0)

        # Train SGDClassifier (fast, incremental learning)
        sgd = SGDClassifier(
            max_iter=1000,
            tol=1e-3,
            random_state=42,
            warm_start=False
        )
        sgd.fit(X, y)
        sgd_pred = int(sgd.predict(X.iloc[[-1]])[0])

        # Train RandomForest (limited trees for speed)
        rf = RandomForestClassifier(
            n_estimators=50,
            max_depth=10,
            class_weight='balanced',
            random_state=42,
            n_jobs=-1
        )
        rf.fit(X, y)
        rf_pred = int(rf.predict(X.iloc[[-1]])[0])

        # Calculate confidence
        confidence = (sgd_pred + rf_pred) / 2.0

        return sgd_pred, rf_pred, confidence

    except Exception as e:
        print_status(f"Training error for {pair_name} {timeframe}: {e}", "debug")
        return None, None, 0.5

# ======================================================
# PROCESS SINGLE PICKLE FILE
# ======================================================

def process_pickle_file(pickle_path):
    """
    Process data pickle and generate trading signals

    Args:
        pickle_path: Path to data pickle file

    Returns:
        (pair, signal_data, aggregated_signal)
    """
    filename = pickle_path.stem

    # Extract currency pair
    currencies = ['EUR', 'USD', 'GBP', 'JPY', 'AUD', 'NZD', 'CAD', 'CHF']
    pair = None

    for curr1 in currencies:
        for curr2 in currencies:
            if curr1 != curr2 and filename.startswith(f"{curr1}_{curr2}"):
                pair = f"{curr1}/{curr2}"
                break
        if pair:
            break

    if not pair:
        return None, {}, "HOLD"

    # Extract timeframe from filename
    fname_lower = filename.lower()
    if "1d" in fname_lower or "daily" in fname_lower:
        timeframe = "1d"
    elif "1h" in fname_lower:
        timeframe = "1h"
    elif "15m" in fname_lower:
        timeframe = "15m"
    elif "5m" in fname_lower:
        timeframe = "5m"
    elif "1m" in fname_lower:
        timeframe = "1m"
    else:
        timeframe = "unknown"

    try:
        # Load data (only disk operation)
        df = data_loader.load_data(pickle_path)

        if df is None or df.empty:
            return pair, {}, "HOLD"

        # Get current price
        current_price = df['raw_close'].iloc[-1] if 'raw_close' in df.columns else df['close'].iloc[-1]

        # Calculate Stop Loss and Take Profit
        if 'ATR' in df.columns:
            atr = df['ATR'].iloc[-1]
            mult = 2.0
            sl = max(0, round(current_price - atr * mult, 5))
            tp = round(current_price + atr * mult, 5)
        else:
            atr_fallback = current_price * 0.01
            sl = max(0, round(current_price - atr_fallback * 2, 5))
            tp = round(current_price + atr_fallback * 2, 5)

        # Train fresh models and predict
        sgd_pred, rf_pred, confidence = train_and_predict_fresh(df, pair, timeframe)

        if sgd_pred is None:
            return pair, {}, "HOLD"

        # Ensemble prediction (majority vote)
        ensemble_pred = 1 if (sgd_pred + rf_pred) >= 1 else 0

        signal_data = {
            "signal": ensemble_pred,
            "sgd_pred": sgd_pred,
            "rf_pred": rf_pred,
            "live": current_price,
            "SL": sl,
            "TP": tp,
            "confidence": confidence,
            "timeframe": timeframe
        }

        # Print signal
        print(f"{'‚úì':2} {pair:8} | {timeframe:3} | Ens:{ensemble_pred} (SGD:{sgd_pred} RF:{rf_pred}) | Price:{current_price:.5f}")

        return pair, {timeframe: signal_data}, "LONG" if ensemble_pred == 1 else "SHORT"

    except Exception as e:
        print_status(f"Error processing {pickle_path.name}: {e}", "error")
        return pair, {}, "HOLD"

# ======================================================
# MAIN PIPELINE EXECUTION
# ======================================================

def main():
    """
    Main pipeline execution
    Processes all data pickles and generates trading signals
    """
    print_status("Starting Ultra-Persistent Pipeline v5.0", "success")
    print()

    # Find data pickle files
    pickle_files = list(PICKLE_FOLDER.glob("*.pkl"))

    # Exclude old model files (shouldn't exist but just in case)
    pickle_files = [f for f in pickle_files
                   if not any(suffix in f.name for suffix in
                             ['_sgd_model', '_rf_model', 'indicator_cache'])]

    if not pickle_files:
        print_status("No data pickles found!", "warn")
        return {}

    print_status(f"Found {len(pickle_files)} data files", "success")
    print()

    # Process all pickle files
    signals = {}

    for pkl_file in pickle_files:
        pair, pair_signals, agg = process_pickle_file(pkl_file)

        if pair and pair_signals:
            if pair not in signals:
                signals[pair] = {"signals": {}, "aggregated": "HOLD"}

            signals[pair]["signals"].update(pair_signals)

            if agg != "HOLD":
                signals[pair]["aggregated"] = agg

    print()
    print_status(f"Generated signals for {len(signals)} pairs", "success")

    return signals

# ======================================================
# ENTRY POINT
# ======================================================

if __name__ == "__main__":
    try:
        start_time = time.time()

        signals = main()

        elapsed = time.time() - start_time

        print()
        print("=" * 70)
        print(f"‚úÖ Pipeline completed in {elapsed:.2f}s")
        print("üéâ NO CORRUPTION POSSIBLE - Models built fresh from data!")
        print("=" * 70)

        # Optional: Save signals to JSON for reference
        if signals:
            output_file = DIRECTORIES["outputs"] / f"signals_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
            with open(output_file, 'w') as f:
                json.dump(signals, f, indent=2)
            print(f"üìÑ Signals saved to: {output_file.name}")

    except Exception as e:
        print_status(f"Pipeline error: {e}", "error")
        import traceback
        traceback.print_exc()

In [None]:
#!/usr/bin/env python3
"""
TRADE BEACON v18.1 - PIPELINE v5.0 COMPATIBLE - FIXED WEEKEND LEARNING
=======================================================================
üéâ ZERO CORRUPTION - Compatible with Pipeline v5.0
üß† Deep Q-Learning with Experience Replay
üìä Learns from Ultra-Persistent Pipeline Database
üõ°Ô∏è Models built fresh, never saved (just like Pipeline v5.0)
‚úÖ FIXED: Weekend backtest now generates trades
‚úÖ FIXED: Better exploration/exploitation balance
‚úÖ FIXED: Improved confidence thresholds
‚ö†Ô∏è LIVE TRADING - Real money at risk on weekdays!
"""

import os, sys, json, pickle, gzip, random, re, smtplib, subprocess, logging, warnings, shutil, sqlite3
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timezone, timedelta
from collections import defaultdict, deque
from dataclasses import dataclass, field, asdict
from typing import Dict, List, Tuple, Any, Optional
from contextlib import contextmanager
import numpy as np
import pandas as pd
import requests

warnings.filterwarnings('ignore')

print("="*70)
print("üß† TRADE BEACON v18.1 - FIXED WEEKEND LEARNING")
print("="*70)

# Environment Setup
try:
    import google.colab
    IN_COLAB, IN_GHA, ENV_NAME = True, False, "Google Colab"
except ImportError:
    IN_COLAB, IN_GHA = False, "GITHUB_ACTIONS" in os.environ
    ENV_NAME = "GitHub Actions" if IN_GHA else "Local"

BASE_FOLDER = Path("/content" if IN_COLAB else Path.cwd())
SAVE_FOLDER = BASE_FOLDER if IN_GHA else (BASE_FOLDER / "forex-ai-models" if IN_COLAB else BASE_FOLDER)

DIRECTORIES = {k: SAVE_FOLDER / v for k, v in {
    "data_processed": "data/processed", "database": "database", "logs": "logs",
    "outputs": "outputs", "omega_state": "omega_state", "rl_memory": "rl_memory",
    "backups": "backups"
}.items()}

for d in DIRECTORIES.values():
    d.mkdir(parents=True, exist_ok=True)

PICKLE_FOLDER = DIRECTORIES["data_processed"]
DATABASE_FOLDER = DIRECTORIES["database"]
OUTPUTS_FOLDER = DIRECTORIES["outputs"]
OMEGA_STATE_FOLDER = DIRECTORIES["omega_state"]
RL_MEMORY_FOLDER = DIRECTORIES["rl_memory"]
BACKUP_FOLDER = DIRECTORIES["backups"]
PIPELINE_DB = DATABASE_FOLDER / "memory_v85.db"

logging.basicConfig(
    filename=str(DIRECTORIES["logs"] / f"trade_beacon_{datetime.now():%Y%m%d_%H%M%S}.log"),
    level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s'
)

def log(msg, lvl="info"):
    icons = {"info":"‚ÑπÔ∏è","success":"‚úÖ","warn":"‚ö†Ô∏è","error":"‚ùå","rocket":"üöÄ","brain":"üß†","money":"üí∞","database":"üíæ"}
    getattr(logging, "warning" if lvl=="warn" else lvl, logging.info)(msg)
    print(f"{icons.get(lvl,'‚ÑπÔ∏è')} {msg}")

# Config
GITHUB_USERNAME, GITHUB_REPO = "rahim-dotAI", "forex-ai-models"
FOREX_PAT = os.getenv("FOREX_PAT", "").strip()
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT: os.environ["FOREX_PAT"] = FOREX_PAT
    except: pass

GMAIL_USER = os.getenv("GMAIL_USER", "nakatonabira3@gmail.com")
GMAIL_APP_PASSWORD = os.getenv("GMAIL_APP_PASSWORD", "").strip() or "gmwohahtltmcewug"
BROWSERLESS_TOKEN = os.getenv("BROWSERLESS_TOKEN", "")

PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
ATR_PERIOD, MIN_ATR, EPS = 14, 1e-5, 1e-8
BASE_CAPITAL, MAX_RISK_PER_TRADE, MAX_POSITIONS, MAX_TRADE_CAP = 100, 0.02, 2, 10.0

STATE_SIZE, ACTION_SPACE = 30, 3
LEARNING_RATE, GAMMA = 0.0005, 0.95
EPSILON_START, EPSILON_MIN, EPSILON_DECAY = 1.0, 0.10, 0.995
BATCH_SIZE, MEMORY_SIZE, MIN_REPLAY_SIZE = 64, 15000, 200
TARGET_UPDATE_FREQUENCY = 25

ATR_SL_MULTIPLIER, ATR_TP_MULTIPLIER = 2.0, 3.0
PROFIT_REWARD_SCALE = 500.0
LOSS_PENALTY_SCALE = 100.0
WIN_BONUS = 50.0
LOSS_PENALTY = 10.0
SHARPE_REWARD_SCALE = 30.0

WEEKEND_BACKTEST_STEPS = 500
BACKTEST_EPSILON = 0.2

OMEGA_SIGNALS_FILE = OUTPUTS_FOLDER / "omega_signals.json"
OMEGA_ITERATION_FILE = OMEGA_STATE_FOLDER / "omega_iteration.json"
RL_MEMORY_FILE = RL_MEMORY_FOLDER / "experience_replay.json.gz"
RL_LEARNING_STATS_FILE = RL_MEMORY_FOLDER / "learning_stats.json"
TRADE_HISTORY_FILE = RL_MEMORY_FOLDER / "trade_history.json"
PIPELINE_SYNC_FILE = RL_MEMORY_FOLDER / "pipeline_sync.json"
RL_NETWORK_WEIGHTS_FILE = RL_MEMORY_FOLDER / "network_weights.json"

log("üí∞ LIVE TRADING MODE ACTIVE", "money")

class SimplePersistence:
    @staticmethod
    def save_json(filepath: Path, data: Any) -> bool:
        try:
            if filepath.exists():
                backup = BACKUP_FOLDER / f"{filepath.stem}_backup.json.gz"
                try: shutil.copy2(filepath, backup)
                except: pass
            temp_file = filepath.parent / f".tmp_{filepath.name}"
            with gzip.open(temp_file, 'wt', encoding='utf-8') as f:
                json.dump(data, f, indent=2, default=str)
            temp_file.replace(filepath)
            log(f"üíæ Saved: {filepath.name}", "success")
            return True
        except Exception as e:
            log(f"‚ùå Save failed: {e}", "error")
            if temp_file.exists(): temp_file.unlink(missing_ok=True)
            return False

    @staticmethod
    def load_json(filepath: Path, default=None) -> Any:
        if not filepath.exists():
            backup = BACKUP_FOLDER / f"{filepath.stem}_backup.json.gz"
            if backup.exists(): filepath = backup
            else: return default
        try:
            with gzip.open(filepath, 'rt', encoding='utf-8') as f:
                data = json.load(f)
            log(f"‚úÖ Loaded: {filepath.name}", "success")
            return data
        except Exception as e:
            log(f"‚ö†Ô∏è Load failed: {e}", "warn")
            backup = BACKUP_FOLDER / f"{filepath.stem}_backup.json.gz"
            if backup.exists() and backup != filepath:
                try:
                    with gzip.open(backup, 'rt', encoding='utf-8') as f:
                        data = json.load(f)
                    log(f"‚úÖ Loaded from backup", "success")
                    return data
                except: pass
            return default

persistence = SimplePersistence()

def is_weekend():
    return datetime.now().weekday() in [5, 6]

def get_weekend_mode():
    return "WEEKEND_LEARNING" if is_weekend() else "LIVE_TRADING"

class PipelineDatabase:
    def __init__(self, db_path=PIPELINE_DB):
        self.db_path, self.conn = db_path, None
        if not self.db_path.exists():
            log(f"‚ö†Ô∏è Pipeline database not found: {self.db_path}", "warn")
            return
        try:
            self.conn = sqlite3.connect(str(self.db_path), timeout=30, check_same_thread=False)
            log(f"‚úÖ Connected to pipeline database", "database")
        except Exception as e:
            log(f"‚ùå Failed to connect to pipeline DB: {e}", "error")

    @contextmanager
    def get_cursor(self):
        if not self.conn:
            yield None
            return
        cursor = self.conn.cursor()
        try: yield cursor
        finally: cursor.close()

    def get_completed_trades(self, since_timestamp=None, limit=1000):
        if not self.conn: return []
        try:
            with self.get_cursor() as cursor:
                if since_timestamp:
                    cursor.execute('''SELECT pair, timeframe, model_used, entry_price, exit_price,
                           sl_price, tp_price, prediction, hit_tp, pnl, pnl_percent, duration_hours, created_at, evaluated_at
                    FROM completed_trades WHERE evaluated_at > ? ORDER BY evaluated_at DESC LIMIT ?''',
                    (since_timestamp, limit))
                else:
                    cursor.execute('''SELECT pair, timeframe, model_used, entry_price, exit_price,
                           sl_price, tp_price, prediction, hit_tp, pnl, pnl_percent, duration_hours, created_at, evaluated_at
                    FROM completed_trades ORDER BY evaluated_at DESC LIMIT ?''', (limit,))
                return cursor.fetchall()
        except Exception as e:
            log(f"‚ö†Ô∏è Failed to fetch trades: {e}", "warn")
            return []

    def get_pipeline_stats(self):
        if not self.conn: return {}
        try:
            with self.get_cursor() as cursor:
                cursor.execute('''SELECT COUNT(*) as total_trades, SUM(CASE WHEN hit_tp THEN 1 ELSE 0 END) as wins,
                    SUM(pnl) as total_pnl, AVG(pnl) as avg_pnl, MAX(evaluated_at) as last_trade FROM completed_trades''')
                result = cursor.fetchone()
                if result:
                    return {'total_trades': result[0] or 0, 'wins': result[1] or 0, 'total_pnl': result[2] or 0.0,
                           'avg_pnl': result[3] or 0.0, 'win_rate': (result[1] / result[0] * 100) if result[0] else 0.0,
                           'last_trade': result[4]}
        except Exception as e:
            log(f"‚ö†Ô∏è Failed to get stats: {e}", "warn")
        return {}

    def close(self):
        if self.conn: self.conn.close()

def load_iteration_counter():
    data = persistence.load_json(OMEGA_ITERATION_FILE, default=None)
    if data and isinstance(data, dict): return data
    return {'total': 0, 'start_date': datetime.now(timezone.utc).isoformat(), 'history': []}

def save_iteration_counter(data):
    persistence.save_json(OMEGA_ITERATION_FILE, data)

def increment_iteration():
    data = load_iteration_counter()
    data['total'] += 1
    data['last_update'] = datetime.now(timezone.utc).isoformat()
    data['history'].append({'iteration': data['total'], 'timestamp': datetime.now(timezone.utc).isoformat(),
                           'environment': ENV_NAME, 'mode': get_weekend_mode()})
    if len(data['history']) > 1000: data['history'] = data['history'][-1000:]
    save_iteration_counter(data)
    return data['total']

@dataclass
class Experience:
    state: List[float]
    action: int
    reward: float
    next_state: List[float]
    done: bool
    metadata: Dict[str, Any] = field(default_factory=dict)
    timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
    def to_dict(self): return asdict(self)
    @classmethod
    def from_dict(cls, data): return cls(**data)

@dataclass
class TradeOutcome:
    pair: str
    action: str
    entry_price: float
    exit_price: float
    sl: float
    tp: float
    position_size: float
    pnl: float
    duration: float
    hit_tp: bool
    timestamp_entry: str
    timestamp_exit: str
    state_at_entry: List[float]
    confidence: float
    regime: str
    session: str

def calculate_rsi(prices: pd.Series, period: int = 14) -> pd.Series:
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=period, min_periods=1).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=period, min_periods=1).mean()
    rs = gain / (loss + EPS)
    return 100 - (100 / (1 + rs))

def calculate_macd(prices: pd.Series, fast=12, slow=26, signal=9):
    ema_fast = prices.ewm(span=fast, adjust=False).mean()
    ema_slow = prices.ewm(span=slow, adjust=False).mean()
    macd = ema_fast - ema_slow
    signal_line = macd.ewm(span=signal, adjust=False).mean()
    return macd, signal_line, macd - signal_line

def calculate_bollinger_bands(prices: pd.Series, period=20, std_dev=2):
    sma = prices.rolling(window=period, min_periods=1).mean()
    std = prices.rolling(window=period, min_periods=1).std()
    return sma + (std * std_dev), sma, sma - (std * std_dev)

def create_state_vector(df_1h: pd.DataFrame, df_1d: pd.DataFrame, pair: str) -> np.ndarray:
    if len(df_1h) < 50 or len(df_1d) < 30: return np.zeros(STATE_SIZE)
    features = []
    try:
        close_1h = df_1h['close'].iloc[-1]
        high_20, low_20 = df_1h['high'].iloc[-20:].max(), df_1h['low'].iloc[-20:].min()
        features.append((close_1h - low_20) / (high_20 - low_20 + EPS))
        features.extend(df_1h['close'].pct_change().iloc[-5:].values)
        rsi_1h = calculate_rsi(df_1h['close'], 14).iloc[-1] / 100.0
        rsi_1d = calculate_rsi(df_1d['close'], 14).iloc[-1] / 100.0
        features.extend([rsi_1h, rsi_1d])
        macd, signal, _ = calculate_macd(df_1h['close'])
        features.extend([np.tanh(macd.iloc[-1] * 100), np.tanh(signal.iloc[-1] * 100)])
        upper, middle, lower = calculate_bollinger_bands(df_1h['close'])
        bb_pos = (close_1h - lower.iloc[-1]) / (upper.iloc[-1] - lower.iloc[-1] + EPS)
        bb_width = (upper.iloc[-1] - lower.iloc[-1]) / middle.iloc[-1]
        features.extend([bb_pos, bb_width])
        atr = df_1h['atr'].iloc[-1]
        atr_ma = df_1h['atr'].rolling(20).mean().iloc[-1]
        features.extend([atr / (atr_ma + EPS), df_1h['close'].pct_change().std() * 100])
        ema_fast = df_1h['close'].ewm(span=12).mean().iloc[-1]
        ema_slow = df_1h['close'].ewm(span=26).mean().iloc[-1]
        trend_1h = (ema_fast - ema_slow) / ema_slow
        ema_fast_1d = df_1d['close'].ewm(span=12).mean().iloc[-1]
        ema_slow_1d = df_1d['close'].ewm(span=26).mean().iloc[-1]
        trend_1d = (ema_fast_1d - ema_slow_1d) / ema_slow_1d
        slope = (df_1h['close'].iloc[-1] - df_1h['close'].iloc[-20]) / df_1h['close'].iloc[-20]
        features.extend([trend_1h * 10, trend_1d * 10, slope * 10])
        vol_ratio = 1.0
        if 'volume' in df_1h.columns and df_1h['volume'].sum() > 0:
            vol_ratio = df_1h['volume'].iloc[-5:].mean() / (df_1h['volume'].iloc[-50:].mean() + EPS)
        features.append(vol_ratio)
        hour = datetime.now().hour
        features.extend([1.0 if 0 <= hour < 8 else 0.0, 1.0 if 8 <= hour < 16 else 0.0, 1.0 if 16 <= hour < 24 else 0.0])
        features.extend([datetime.now().weekday() / 6.0, hour / 23.0])
        try:
            closes = df_1h['close'].values[-20:]
            momentum = (closes[-1] - closes[-10]) / (closes[-10] + EPS)
            volatility = np.std(closes) / (np.mean(closes) + EPS)
            trend = (closes[-1] - closes[0]) / (closes[0] + EPS)
            features.extend([np.tanh(momentum * 10), volatility, np.tanh(trend * 10), 0.0, 0.0, 0.0])
        except: features.extend([0.0] * 6)
        features = features[:STATE_SIZE]
        while len(features) < STATE_SIZE: features.append(0.0)
        return np.array(features, dtype=np.float32)
    except Exception as e:
        log(f"‚ö†Ô∏è State vector error: {e}", "warn")
        return np.zeros(STATE_SIZE)

class ImprovedQNetwork:
    def __init__(self, state_size=STATE_SIZE, action_size=ACTION_SPACE):
        self.state_size, self.action_size = state_size, action_size
        hidden1, hidden2, hidden3 = 128, 64, 32
        self.w1 = np.random.randn(state_size, hidden1) * np.sqrt(2 / state_size)
        self.b1 = np.zeros(hidden1)
        self.w2 = np.random.randn(hidden1, hidden2) * np.sqrt(2 / hidden1)
        self.b2 = np.zeros(hidden2)
        self.w3 = np.random.randn(hidden2, hidden3) * np.sqrt(2 / hidden2)
        self.b3 = np.zeros(hidden3)
        self.w4 = np.random.randn(hidden3, action_size) * np.sqrt(2 / hidden3)
        self.b4 = np.zeros(action_size)

    def leaky_relu(self, x, alpha=0.01): return np.where(x > 0, x, alpha * x)

    def forward(self, state):
        h1 = self.leaky_relu(np.dot(state, self.w1) + self.b1)
        h2 = self.leaky_relu(np.dot(h1, self.w2) + self.b2)
        h3 = self.leaky_relu(np.dot(h2, self.w3) + self.b3)
        return np.dot(h3, self.w4) + self.b4

    def predict(self, state):
        if state.ndim == 1: state = state.reshape(1, -1)
        return self.forward(state[0])

    def update(self, states, targets, learning_rate=LEARNING_RATE):
        for i in range(len(states)):
            state, target = states[i], targets[i]
            h1 = self.leaky_relu(np.dot(state, self.w1) + self.b1)
            h2 = self.leaky_relu(np.dot(h1, self.w2) + self.b2)
            h3 = self.leaky_relu(np.dot(h2, self.w3) + self.b3)
            q_values = np.dot(h3, self.w4) + self.b4
            error = np.clip(q_values - target, -1.0, 1.0)
            dw4 = np.outer(h3, error)
            db4 = error
            dh3 = np.dot(error, self.w4.T)
            dh3 = dh3 * (h3 > 0).astype(float)
            dw3 = np.outer(h2, dh3)
            db3 = dh3
            dh2 = np.dot(dh3, self.w3.T)
            dh2 = dh2 * (h2 > 0).astype(float)
            dw2 = np.outer(h1, dh2)
            db2 = dh2
            dh1 = np.dot(dh2, self.w2.T)
            dh1 = dh1 * (h1 > 0).astype(float)
            dw1 = np.outer(state, dh1)
            db1 = dh1
            dw4 = np.clip(dw4, -1.0, 1.0)
            dw3 = np.clip(dw3, -1.0, 1.0)
            dw2 = np.clip(dw2, -1.0, 1.0)
            dw1 = np.clip(dw1, -1.0, 1.0)
            self.w4 -= learning_rate * dw4
            self.b4 -= learning_rate * db4
            self.w3 -= learning_rate * dw3
            self.b3 -= learning_rate * db3
            self.w2 -= learning_rate * dw2
            self.b2 -= learning_rate * db2
            self.w1 -= learning_rate * dw1
            self.b1 -= learning_rate * db1

    def clone(self):
        new_net = ImprovedQNetwork(self.state_size, self.action_size)
        new_net.w1, new_net.b1 = self.w1.copy(), self.b1.copy()
        new_net.w2, new_net.b2 = self.w2.copy(), self.b2.copy()
        new_net.w3, new_net.b3 = self.w3.copy(), self.b3.copy()
        new_net.w4, new_net.b4 = self.w4.copy(), self.b4.copy()
        return new_net

    def to_dict(self):
        return {'w1': self.w1.tolist(), 'b1': self.b1.tolist(), 'w2': self.w2.tolist(),
               'b2': self.b2.tolist(), 'w3': self.w3.tolist(), 'b3': self.b3.tolist(),
               'w4': self.w4.tolist(), 'b4': self.b4.tolist()}

    def from_dict(self, data):
        try:
            self.w1 = np.array(data['w1'])
            self.b1 = np.array(data['b1'])
            self.w2 = np.array(data['w2'])
            self.b2 = np.array(data['b2'])
            self.w3 = np.array(data['w3'])
            self.b3 = np.array(data['b3'])
            self.w4 = np.array(data['w4'])
            self.b4 = np.array(data['b4'])
            return True
        except Exception as e:
            log(f"‚ö†Ô∏è Network load error: {e}", "warn")
            return False

class ImprovedConfidence:
    def __init__(self):
        self.min_q_spread = 0.1
        self.temperature = 1.0

    def softmax(self, q_values: np.ndarray) -> np.ndarray:
        q_shifted = q_values - np.max(q_values)
        exp_q = np.exp(q_shifted / self.temperature)
        return exp_q / np.sum(exp_q)

    def get_confidence(self, q_values: np.ndarray, epsilon: float, force_trade: bool = False) -> Tuple[bool, float, Dict]:
        sorted_q = np.sort(q_values)[::-1]
        q_spread = sorted_q[0] - sorted_q[1] if len(sorted_q) > 1 else 0.0
        probabilities = self.softmax(q_values)
        best_action_prob = np.max(probabilities)
        probs_clip = np.clip(probabilities, 1e-10, 1.0)
        entropy = -np.sum(probs_clip * np.log(probs_clip))
        max_entropy = np.log(len(q_values))
        normalized_entropy = entropy / max_entropy
        confidence = (0.5 * best_action_prob + 0.3 * (1 - normalized_entropy) + 0.2 * np.tanh(q_spread * 5)) * 100
        progress = 1 - (epsilon - EPSILON_MIN) / (EPSILON_START - EPSILON_MIN)
        if force_trade: threshold = 5.0
        elif epsilon > 0.7: threshold = 10.0
        elif epsilon > 0.5: threshold = 15.0
        elif epsilon > 0.3: threshold = 20.0
        else: threshold = 25.0
        metrics = {'q_spread': float(q_spread), 'best_prob': float(best_action_prob),
                  'entropy': float(normalized_entropy), 'confidence': float(np.clip(confidence, 0, 100)),
                  'threshold': float(threshold), 'progress': float(progress)}
        should_trade = confidence >= threshold or q_spread >= 0.03 or force_trade
        return should_trade, confidence, metrics

    def calculate_position_size(self, base_size: float, confidence: float) -> float:
        confidence_mult = 0.5 + (confidence / 100) * 0.5
        return base_size * confidence_mult

class ImprovedRLAgent:
    def __init__(self):
        self.q_network = ImprovedQNetwork()
        self.target_network = ImprovedQNetwork()
        self.memory = deque(maxlen=MEMORY_SIZE)
        self.epsilon = EPSILON_START
        self.learning_count = 0
        self.trade_count = 0
        self.stats = {'total_updates': 0, 'total_trades': 0, 'profitable_trades': 0, 'total_pnl': 0.0,
                     'win_rate': 0.0, 'avg_reward': 0.0, 'epsilon_history': [], 'q_value_history': [],
                     'pipeline_trades_learned': 0, 'last_pipeline_sync': None}
        self.load_state()
        log(f"üß† RL Agent initialized: {len(self.memory)} experiences", "brain")

    def select_action(self, state, force_greedy=False, backtest_mode=False):
        epsilon_to_use = BACKTEST_EPSILON if backtest_mode else self.epsilon
        if not force_greedy and random.random() < epsilon_to_use:
            if backtest_mode and random.random() > 0.5:
                return random.choice([0, 1])
            return random.randint(0, ACTION_SPACE - 1)
        else:
            q_values = self.q_network.predict(state)
            if backtest_mode:
                q_spread = np.max(q_values) - np.sort(q_values)[-2]
                if q_spread < 0.1:
                    q_values[2] = -999
            return int(np.argmax(q_values))

    def remember(self, experience: Experience):
        self.memory.append(experience)

    def learn(self):
        if len(self.memory) < MIN_REPLAY_SIZE: return
        batch = random.sample(self.memory, min(BATCH_SIZE, len(self.memory)))
        states = np.array([np.array(exp.state) for exp in batch])
        actions = np.array([exp.action for exp in batch])
        rewards = np.array([exp.reward for exp in batch])
        next_states = np.array([np.array(exp.next_state) for exp in batch])
        dones = np.array([exp.done for exp in batch])
        current_q_batch = np.array([self.q_network.forward(s) for s in states])
        next_q_batch = np.array([self.target_network.forward(s) for s in next_states])
        targets = current_q_batch.copy()
        for i in range(len(batch)):
            if dones[i]: targets[i][actions[i]] = rewards[i]
            else: targets[i][actions[i]] = rewards[i] + GAMMA * np.max(next_q_batch[i])
        self.q_network.update(states, targets, LEARNING_RATE)
        self.learning_count += 1
        self.stats['total_updates'] += 1
        avg_q = np.mean([np.max(q) for q in current_q_batch])
        self.stats['q_value_history'].append(float(avg_q))
        self.epsilon = max(EPSILON_MIN, self.epsilon * EPSILON_DECAY)
        self.stats['epsilon_history'].append(self.epsilon)
        if self.learning_count % TARGET_UPDATE_FREQUENCY == 0:
            self.target_network = self.q_network.clone()
            log(f"üéØ Target network updated (#{self.learning_count})", "brain")

    def calculate_reward(self, trade: TradeOutcome) -> float:
        reward = 0.0
        if trade.pnl > 0:
            reward += trade.pnl * PROFIT_REWARD_SCALE
            reward += WIN_BONUS
        else:
            reward += trade.pnl * LOSS_PENALTY_SCALE
            reward -= LOSS_PENALTY
        risk = abs(trade.entry_price - trade.sl) + EPS
        risk_adjusted_return = trade.pnl / risk
        reward += risk_adjusted_return * SHARPE_REWARD_SCALE
        if trade.duration < 24: reward *= 1.1
        elif trade.duration > 72: reward *= 0.9
        if trade.hit_tp: reward += WIN_BONUS * 0.5
        return float(reward)

    def record_trade(self, trade_outcome: TradeOutcome):
        self.trade_count += 1
        self.stats['total_trades'] += 1
        self.stats['total_pnl'] += trade_outcome.pnl
        if trade_outcome.pnl > 0: self.stats['profitable_trades'] += 1
        self.stats['win_rate'] = self.stats['profitable_trades'] / self.stats['total_trades']
        reward = self.calculate_reward(trade_outcome)
        self.stats['avg_reward'] = ((self.stats['avg_reward'] * (self.stats['total_trades'] - 1) + reward) /
                                     self.stats['total_trades'])
        action = 0 if trade_outcome.action == 'BUY' else 1 if trade_outcome.action == 'SELL' else 2
        experience = Experience(
            state=trade_outcome.state_at_entry if isinstance(trade_outcome.state_at_entry, list) else trade_outcome.state_at_entry.tolist(),
            action=action, reward=reward,
            next_state=trade_outcome.state_at_entry if isinstance(trade_outcome.state_at_entry, list) else trade_outcome.state_at_entry.tolist(),
            done=True, metadata={'pair': trade_outcome.pair, 'pnl': trade_outcome.pnl,
                                'hit_tp': trade_outcome.hit_tp, 'duration': trade_outcome.duration})
        self.remember(experience)
        if len(self.memory) >= MIN_REPLAY_SIZE: self.learn()

    def learn_from_pipeline_trades(self, pipeline_db: PipelineDatabase, data: Dict):
        sync_data = persistence.load_json(PIPELINE_SYNC_FILE, default={'last_sync': None, 'trades_learned': 0})
        last_sync = sync_data.get('last_sync')
        log(f"\nüíæ Syncing with Pipeline Database...", "database")
        completed_trades = pipeline_db.get_completed_trades(since_timestamp=last_sync)
        if not completed_trades:
            log("‚ÑπÔ∏è No new pipeline trades to learn from", "info")
            return 0
        log(f"üìä Found {len(completed_trades)} new pipeline trades", "database")
        trades_learned = 0
        for trade_data in completed_trades:
            try:
                (pair, timeframe, model_used, entry_price, exit_price, sl_price, tp_price,
                 prediction, hit_tp, pnl, pnl_percent, duration_hours, created_at, evaluated_at) = trade_data
                if '/' not in pair: pair = f"{pair[:3]}/{pair[4:]}"
                if pair not in data or '1h' not in data[pair] or '1d' not in data[pair]: continue
                state = create_state_vector(data[pair]['1h'], data[pair]['1d'], pair)
                action = 1 if prediction == 1 else 0
                action_str = 'BUY' if prediction == 1 else 'SELL'
                outcome = TradeOutcome(pair=pair, action=action_str, entry_price=float(entry_price),
                    exit_price=float(exit_price), sl=float(sl_price), tp=float(tp_price), position_size=1.0,
                    pnl=float(pnl), duration=float(duration_hours), hit_tp=bool(hit_tp),
                    timestamp_entry=created_at, timestamp_exit=evaluated_at, state_at_entry=state.tolist(),
                    confidence=70.0, regime='PIPELINE', session=timeframe)
                self.record_trade(outcome)
                trades_learned += 1
            except Exception as e:
                log(f"‚ö†Ô∏è Failed to process pipeline trade: {e}", "warn")
                continue
        if completed_trades:
            latest_timestamp = max(trade[13] for trade in completed_trades)
            sync_data['last_sync'] = latest_timestamp
            sync_data['trades_learned'] = sync_data.get('trades_learned', 0) + trades_learned
            persistence.save_json(PIPELINE_SYNC_FILE, sync_data)
        self.stats['pipeline_trades_learned'] = sync_data.get('trades_learned', 0)
        self.stats['last_pipeline_sync'] = datetime.now(timezone.utc).isoformat()
        log(f"‚úÖ Learned from {trades_learned} pipeline trades (Total: {self.stats['pipeline_trades_learned']})", "success")
        return trades_learned

    def save_state(self):
        try:
            memory_list = [exp.to_dict() for exp in list(self.memory)]
            persistence.save_json(RL_MEMORY_FILE, memory_list)
            network_data = {'q_network': self.q_network.to_dict(), 'target_network': self.target_network.to_dict()}
            persistence.save_json(RL_NETWORK_WEIGHTS_FILE, network_data)
            persistence.save_json(RL_LEARNING_STATS_FILE, self.stats)
            log(f"üíæ RL state saved: {len(self.memory)} experiences, {self.stats['total_trades']} trades", "success")
        except Exception as e:
            log(f"‚ö†Ô∏è Failed to save RL state: {e}", "warn")

    def load_state(self):
        try:
            memory_data = persistence.load_json(RL_MEMORY_FILE, default=None)
            if memory_data and len(memory_data) > 0:
                experiences = [Experience.from_dict(exp) for exp in memory_data]
                self.memory.extend(experiences)
                log(f"‚úÖ Loaded {len(experiences)} experiences", "success")
            network_data = persistence.load_json(RL_NETWORK_WEIGHTS_FILE, default=None)
            if network_data:
                q_loaded = self.q_network.from_dict(network_data.get('q_network', {}))
                t_loaded = self.target_network.from_dict(network_data.get('target_network', {}))
                if q_loaded and t_loaded: log("‚úÖ Loaded Q-networks", "success")
                else: log("üîÑ New Q-networks initialized", "info")
            else: log("üîÑ New Q-networks initialized", "info")
            loaded_stats = persistence.load_json(RL_LEARNING_STATS_FILE, default=None)
            if loaded_stats:
                self.stats = loaded_stats
                if self.stats.get('epsilon_history'): self.epsilon = self.stats['epsilon_history'][-1]
                log(f"‚úÖ Loaded stats: {self.stats['total_trades']} trades", "success")
        except Exception as e:
            log(f"‚ö†Ô∏è Could not load RL state: {e}", "warn")

class TradingEnvironment:
    def __init__(self):
        self.active_trades = {}
        self.trade_history = persistence.load_json(TRADE_HISTORY_FILE, default=[])
        if self.trade_history: log(f"‚úÖ Loaded {len(self.trade_history)} historical trades", "success")

    def save_trade_history(self):
        persistence.save_json(TRADE_HISTORY_FILE, self.trade_history)

    def execute_trade(self, pair: str, action: str, price: float, sl: float,
                     tp: float, size: float, state: np.ndarray, metadata: Dict) -> str:
        trade_id = f"{pair}_{datetime.now():%Y%m%d_%H%M%S}"
        self.active_trades[trade_id] = {'pair': pair, 'action': action, 'entry_price': price, 'sl': sl, 'tp': tp,
            'size': size, 'entry_time': datetime.now(timezone.utc).isoformat(), 'state_at_entry': state.tolist(),
            'metadata': metadata}
        log(f"üí∞ Trade: {trade_id} - {action} {pair} @ {price:.5f}", "money")
        return trade_id

    def check_exits(self, current_prices: Dict[str, float]) -> List[TradeOutcome]:
        completed_trades = []
        for trade_id, trade in list(self.active_trades.items()):
            pair = trade['pair']
            if pair not in current_prices: continue
            current_price = current_prices[pair]
            hit_tp = hit_sl = False
            if trade['action'] == 'BUY':
                hit_tp = current_price >= trade['tp']
                hit_sl = current_price <= trade['sl']
            else:
                hit_tp = current_price <= trade['tp']
                hit_sl = current_price >= trade['sl']
            if hit_tp or hit_sl:
                exit_price = trade['tp'] if hit_tp else trade['sl']
                if trade['action'] == 'BUY': pnl = (exit_price - trade['entry_price']) * trade['size']
                else: pnl = (trade['entry_price'] - exit_price) * trade['size']
                pnl -= exit_price * 0.0003 + exit_price * trade['size'] * 0.0005
                entry_time = datetime.fromisoformat(trade['entry_time'])
                exit_time = datetime.now(timezone.utc)
                duration = (exit_time - entry_time).total_seconds() / 3600.0
                outcome = TradeOutcome(pair=pair, action=trade['action'], entry_price=trade['entry_price'],
                    exit_price=exit_price, sl=trade['sl'], tp=trade['tp'], position_size=trade['size'], pnl=pnl,
                    duration=duration, hit_tp=hit_tp, timestamp_entry=trade['entry_time'],
                    timestamp_exit=exit_time.isoformat(), state_at_entry=trade['state_at_entry'],
                    confidence=trade['metadata'].get('confidence', 0),
                    regime=trade['metadata'].get('regime', 'UNKNOWN'),
                    session=trade['metadata'].get('session', 'UNKNOWN'))
                completed_trades.append(outcome)
                self.trade_history.append({'trade_id': trade_id, 'pair': pair, 'action': trade['action'],
                    'entry': trade['entry_price'], 'exit': exit_price, 'pnl': pnl,
                    'result': 'WIN' if hit_tp else 'LOSS', 'duration_hours': duration,
                    'timestamp': exit_time.isoformat()})
                del self.active_trades[trade_id]
                log(f"‚úÖ Closed: {trade_id} - {'WIN' if hit_tp else 'LOSS'} | ${pnl:.2f}",
                    "success" if pnl > 0 else "warn")
        if completed_trades: self.save_trade_history()
        return completed_trades

def run_weekend_backtest(data: Dict, agent: ImprovedRLAgent, confidence_system: ImprovedConfidence):
    log("\nüéì WEEKEND LEARNING MODE: Running backtest (FIXED)...", "brain")
    trades_learned = 0
    errors_encountered = 0
    trades_by_pair = {pair: 0 for pair in PAIRS}
    for pair in PAIRS:
        if pair not in data or '1h' not in data[pair] or '1d' not in data[pair]:
            log(f"  ‚ö†Ô∏è {pair}: Missing data", "warn")
            continue
        df_1h = data[pair]['1h']
        df_1d = data[pair]['1d']
        start_idx = max(60, len(df_1h) - 1500)
        end_idx = len(df_1h) - 5
        sample_points = list(range(start_idx, end_idx, 3))[-WEEKEND_BACKTEST_STEPS:]
        log(f"  üìä {pair}: Testing {len(sample_points)} sample points", "brain")
        for i in sample_points:
            try:
                state = create_state_vector(df_1h.iloc[:i], df_1d.iloc[:max(0, i-24)], pair)
                best_action = agent.select_action(state, force_greedy=False, backtest_mode=True)
                q_values = agent.q_network.predict(state)
                should_trade, confidence, metrics = confidence_system.get_confidence(q_values, agent.epsilon, force_trade=True)
                action_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
                direction = action_map[best_action]
                if direction == 'HOLD': continue
                entry_price = df_1h['close'].iloc[i]
                atr = df_1h['atr'].iloc[i]
                if direction == 'BUY':
                    sl = entry_price - (atr * ATR_SL_MULTIPLIER)
                    tp = entry_price + (atr * ATR_TP_MULTIPLIER)
                else:
                    sl = entry_price + (atr * ATR_SL_MULTIPLIER)
                    tp = entry_price - (atr * ATR_TP_MULTIPLIER)
                hit_tp = hit_sl = False
                exit_idx = i + 1
                for j in range(i + 1, min(i + 100, len(df_1h))):
                    current_price = df_1h['close'].iloc[j]
                    if direction == 'BUY':
                        if current_price >= tp:
                            hit_tp = True
                            exit_idx = j
                            break
                        elif current_price <= sl:
                            hit_sl = True
                            exit_idx = j
                            break
                    else:
                        if current_price <= tp:
                            hit_tp = True
                            exit_idx = j
                            break
                        elif current_price >= sl:
                            hit_sl = True
                            exit_idx = j
                            break
                if not hit_tp and not hit_sl:
                    exit_idx = min(i + 100, len(df_1h) - 1)
                    exit_price = df_1h['close'].iloc[exit_idx]
                else:
                    exit_price = tp if hit_tp else sl
                position_size = 1.0
                if direction == 'BUY': pnl = (exit_price - entry_price) * position_size
                else: pnl = (entry_price - exit_price) * position_size
                duration = (exit_idx - i) * 1.0
                outcome = TradeOutcome(pair=pair, action=direction, entry_price=entry_price, exit_price=exit_price,
                    sl=sl, tp=tp, position_size=position_size, pnl=pnl, duration=duration, hit_tp=hit_tp,
                    timestamp_entry=str(df_1h.index[i]), timestamp_exit=str(df_1h.index[exit_idx]),
                    state_at_entry=state.tolist(), confidence=confidence, regime='BACKTEST', session='WEEKEND')
                agent.record_trade(outcome)
                trades_learned += 1
                trades_by_pair[pair] += 1
            except Exception as e:
                errors_encountered += 1
                if errors_encountered <= 5: log(f"‚ö†Ô∏è Backtest error at index {i}: {e}", "warn")
                continue
    log(f"\nüìä Weekend Backtest Results by Pair:", "brain")
    for pair, count in trades_by_pair.items():
        if count > 0: log(f"  {pair}: {count} trades", "brain")
    log(f"‚úÖ Weekend backtest: {trades_learned} trades learned", "success")
    if errors_encountered > 0: log(f"‚ö†Ô∏è Encountered {errors_encountered} errors during backtest", "warn")
    if trades_learned > 0:
        recent_exp = list(agent.memory)[-trades_learned:]
        wins = sum(1 for exp in recent_exp if exp.metadata.get('pnl', 0) > 0)
        win_rate = (wins / trades_learned) * 100
        avg_reward = sum(exp.reward for exp in recent_exp) / trades_learned
        total_pnl = sum(exp.metadata.get('pnl', 0) for exp in recent_exp)
        log(f"üìä Backtest Performance:", "brain")
        log(f"  Wins: {wins}/{trades_learned} ({win_rate:.1f}%)", "brain")
        log(f"  Avg Reward: {avg_reward:.2f}", "brain")
        log(f"  Total P&L: ${total_pnl:.5f}", "brain")
    return trades_learned

def fetch_price(pair, timeout=10):
    if not BROWSERLESS_TOKEN: return None
    try:
        fc, tc = pair.split("/")
        url = f"https://production-sfo.browserless.io/content?token={BROWSERLESS_TOKEN}"
        r = requests.post(url, json={"url": f"https://www.x-rates.com/calculator/?from={fc}&to={tc}&amount=1"}, timeout=timeout)
        m = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', r.text)
        return float(m.group(1).replace(",", "")) if m else None
    except: return None

def ensure_atr(df):
    if "atr" in df.columns and df["atr"].median() > MIN_ATR:
        return df.assign(atr=df["atr"].fillna(MIN_ATR).clip(lower=MIN_ATR))
    high, low, close = df["high"].values, df["low"].values, df["close"].values
    tr = np.maximum.reduce([high - low, np.abs(high - np.roll(close, 1)), np.abs(low - np.roll(close, 1))])
    tr[0] = high[0] - low[0] if len(tr) > 0 else MIN_ATR
    df["atr"] = pd.Series(tr, index=df.index).rolling(ATR_PERIOD, min_periods=1).mean().fillna(MIN_ATR).clip(lower=MIN_ATR)
    return df

def update_pickle_data():
    log("üîÑ Updating pickle data...", "info")
    updated_count = 0
    for pair in PAIRS:
        latest_price = fetch_price(pair)
        if not latest_price or latest_price <= 0: continue
        pair_key = pair.replace("/", "_")
        for pkl_file in PICKLE_FOLDER.glob(f"{pair_key}*.pkl"):
            if any(x in pkl_file.name for x in ['_model', 'indicator_cache', '.bak']): continue
            try:
                try: df = pd.read_pickle(pkl_file, compression='gzip')
                except: df = pd.read_pickle(pkl_file, compression=None)
                if not isinstance(df, pd.DataFrame) or len(df) < 10: continue
                if not all(c in df.columns for c in ['open', 'high', 'low', 'close']): continue
                last_time = df.index[-1]
                new_time = datetime.now().replace(second=0, microsecond=0)
                if new_time > last_time:
                    new_row = pd.DataFrame({'open': [float(latest_price)], 'high': [float(latest_price)],
                        'low': [float(latest_price)], 'close': [float(latest_price)], 'volume': [0]}, index=[new_time])
                    df = pd.concat([df, new_row]).tail(5000).ffill().bfill()
                    df = ensure_atr(df)
                    df.to_pickle(pkl_file, compression='gzip')
                    updated_count += 1
            except: pass
    log(f"‚úÖ Updated {updated_count} files", "success")
    return updated_count

def load_data(folder):
    log(f"üìÇ Loading data from: {folder}", "info")
    if not folder.exists(): return {}
    all_pkl = [p for p in folder.glob("*.pkl") if not any(s in p.name for s in ['_model', 'indicator_cache', '.bak'])]
    pair_files = defaultdict(list)
    currencies = ["EUR", "GBP", "USD", "AUD", "NZD", "CAD", "CHF", "JPY"]
    for pkl in all_pkl:
        parts = pkl.stem.split('_')
        if len(parts) >= 2 and parts[0] in currencies and parts[1] in currencies:
            pair_files[f"{parts[0]}_{parts[1]}"].append(pkl)
    combined = {}
    for pk, files in pair_files.items():
        pair = f"{pk[:3]}/{pk[4:]}"
        if pair not in PAIRS: continue
        pair_data = {}
        for pkl in files:
            try:
                try: df = pd.read_pickle(pkl, compression='gzip')
                except: df = pd.read_pickle(pkl, compression=None)
                if not isinstance(df, pd.DataFrame) or len(df) < 50: continue
                if not all(c in df.columns for c in ['open', 'high', 'low', 'close']): continue
                df = df.ffill().bfill().dropna(subset=['open', 'high', 'low', 'close'])
                df.index = pd.to_datetime(df.index, errors="coerce")
                if df.index.tz: df.index = df.index.tz_localize(None)
                df = df[df.index.notna()]
                tf = "1d" if "1d" in pkl.stem or "daily" in pkl.stem else "1h"
                if tf not in ["1d", "1h"]: continue
                df = ensure_atr(df)
                pair_data[tf] = df
                log(f"‚úÖ {pair} [{tf}]: {len(df)} rows", "success")
            except: pass
        if pair_data: combined[pair] = pair_data
    log(f"‚úÖ Loaded {len(combined)} pairs", "success")
    return combined

def send_email(signals, iteration, rl_stats, mode, pipeline_stats):
    if not GMAIL_APP_PASSWORD:
        log("‚ö†Ô∏è Email skipped: No password", "warn")
        return
    try:
        msg = MIMEMultipart('alternative')
        msg['Subject'] = f"üß† BEACON v18.1 [{mode}] - Iter #{iteration}"
        msg['From'] = GMAIL_USER
        msg['To'] = GMAIL_USER
        active_signals = sum(1 for s in signals.values() if s.get('direction') != 'HOLD')
        epsilon = rl_stats.get('epsilon_history', [EPSILON_START])[-1] if rl_stats.get('epsilon_history') else EPSILON_START
        mode_badge = "WEEKEND LEARNING" if mode == "WEEKEND_LEARNING" else "LIVE TRADING"
        mode_color = "#f59e0b" if mode == "WEEKEND_LEARNING" else "#10b981"
        html = f"""<!DOCTYPE html><html><head><style>
body{{font-family:-apple-system,sans-serif;background:#0f172a;margin:0;padding:20px}}
.container{{max-width:1000px;margin:0 auto;background:white;border-radius:12px;box-shadow:0 10px 40px rgba(0,0,0,0.4)}}
.header{{background:linear-gradient(135deg,#7c3aed,#4c1d95);color:white;padding:50px;text-align:center}}
.header h1{{margin:0;font-size:38px;font-weight:900}}
.mode-badge{{background:{mode_color};padding:12px 24px;border-radius:30px;margin-top:18px;font-weight:800}}
.stats{{background:#fef3c7;padding:25px;margin:25px;border-radius:10px}}
.stat-grid{{display:grid;grid-template-columns:repeat(auto-fit,minmax(150px,1fr));gap:15px}}
.stat-item{{background:white;padding:15px;border-radius:8px;text-align:center}}
.stat-value{{font-size:28px;font-weight:900;color:#7c3aed}}
.stat-label{{font-size:12px;color:#6b7280;margin-top:5px}}
</style></head><body><div class="container">
<div class="header"><h1>üß† TRADE BEACON v18.1</h1><div class="mode-badge">{mode_badge}</div>
<p style="margin:20px 0 0">Iteration #{iteration} | {datetime.now():%Y-%m-%d %H:%M UTC}</p></div>
<div class="stats"><div style="font-size:20px;font-weight:800;margin-bottom:15px">üß† RL Agent Stats</div>
<div class="stat-grid">
<div class="stat-item"><div class="stat-value">{rl_stats.get('total_trades',0)}</div><div class="stat-label">TRADES</div></div>
<div class="stat-item"><div class="stat-value">{rl_stats.get('win_rate',0)*100:.1f}%</div><div class="stat-label">WIN RATE</div></div>
<div class="stat-item"><div class="stat-value">${rl_stats.get('total_pnl',0):.2f}</div><div class="stat-label">TOTAL P&L</div></div>
<div class="stat-item"><div class="stat-value">{epsilon:.3f}</div><div class="stat-label">EPSILON</div></div>
<div class="stat-item"><div class="stat-value">{active_signals}</div><div class="stat-label">SIGNALS</div></div>
<div class="stat-item"><div class="stat-value">{rl_stats.get('pipeline_trades_learned',0)}</div><div class="stat-label">PIPELINE</div></div>
</div></div></div></body></html>"""
        msg.attach(MIMEText(html, 'html'))
        with smtplib.SMTP_SSL('smtp.gmail.com', 465, timeout=30) as srv:
            srv.login(GMAIL_USER, GMAIL_APP_PASSWORD)
            srv.send_message(msg)
        log(f"‚úÖ Email sent", "success")
    except Exception as e:
        log(f"‚ùå Email failed: {e}", "error")

def push_git(files, msg):
    if IN_GHA or not FOREX_PAT: return False
    try:
        REPO_URL = f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
        repo_path = SAVE_FOLDER if (SAVE_FOLDER / ".git").exists() else BASE_FOLDER
        if not (repo_path / ".git").exists():
            subprocess.run(["git", "clone", REPO_URL, str(repo_path)], capture_output=True, timeout=60, check=True)
        os.chdir(repo_path)
        for f in files:
            if (repo_path / f).exists(): subprocess.run(["git", "add", str(f)], check=False)
        subprocess.run(["git", "commit", "-m", msg], capture_output=True, check=False)
        subprocess.run(["git", "pull", "--rebase", "origin", "main"], capture_output=True, check=False)
        result = subprocess.run(["git", "push", "origin", "main"], capture_output=True, timeout=30)
        return result.returncode == 0
    except: return False
    finally:
        try: os.chdir(SAVE_FOLDER)
        except: pass

def print_diagnostics(agent: ImprovedRLAgent, trade_history: list):
    log("\n" + "="*70, "brain")
    log("üìä RL AGENT DIAGNOSTICS", "brain")
    log("="*70, "brain")
    stats = agent.stats
    log(f"\nüéØ Overall Performance:", "brain")
    log(f"  Total Trades: {stats['total_trades']}", "info")
    log(f"  Win Rate: {stats['win_rate']*100:.1f}%", "info")
    log(f"  Total P&L: ${stats['total_pnl']:.2f}", "money")
    log(f"  Avg Reward: {stats['avg_reward']:.2f}", "info")
    log(f"  Profitable Trades: {stats['profitable_trades']}", "success")
    log(f"\nüß† Learning Status:", "brain")
    log(f"  Current Epsilon: {agent.epsilon:.4f}", "info")
    log(f"  Exploration Rate: {agent.epsilon*100:.1f}%", "info")
    log(f"  Total Updates: {stats['total_updates']}", "info")
    log(f"  Memory: {len(agent.memory)}/{MEMORY_SIZE} ({len(agent.memory)/MEMORY_SIZE*100:.1f}%)", "info")
    if stats.get('q_value_history'):
        recent_q = stats['q_value_history'][-100:]
        log(f"  Q-Values: Œº={np.mean(recent_q):.4f}, œÉ={np.std(recent_q):.4f}", "info")
    log(f"\nüí° Health Check:", "brain")
    if stats['win_rate'] < 0.25: log("  ‚ö†Ô∏è Win rate < 25% - Continue training", "warn")
    elif stats['win_rate'] < 0.35: log("  ‚ö° Win rate 25-35% - Getting better", "warn")
    else: log("  ‚úÖ Win rate > 35% - Ready for live", "success")
    if stats['total_pnl'] < -50: log("  ‚ö†Ô∏è Significant losses - Review strategy", "warn")
    elif stats['total_pnl'] < 0: log("  ‚ö° Negative P&L - Continue learning", "warn")
    else: log("  ‚úÖ Positive P&L - Strategy profitable", "success")
    if stats['total_trades'] < 500: log("  ‚ö†Ô∏è < 500 trades - Need more data", "warn")
    elif stats['total_trades'] < 1000: log("  ‚ö° 500-1000 trades - Building foundation", "warn")
    else: log("  ‚úÖ > 1000 trades - Good experience", "success")
    log("="*70, "brain")

def main():
    log("=" * 70, "rocket")
    log("üß† TRADE BEACON v18.1 - FIXED WEEKEND LEARNING", "brain")
    log("=" * 70, "rocket")
    mode = get_weekend_mode()
    log(f"üìÖ Mode: {mode}", "info")
    iteration = increment_iteration()
    agent = ImprovedRLAgent()
    env = TradingEnvironment()
    confidence_system = ImprovedConfidence()
    pipeline_db = PipelineDatabase()
    try:
        log(f"\nüìä Iteration #{iteration} | {ENV_NAME} | {mode}", "info")
        if mode == "LIVE_TRADING": update_pickle_data()
        data = load_data(PICKLE_FOLDER)
        if not data: raise ValueError("‚ùå No data loaded")
        if pipeline_db.conn:
            pipeline_trades_learned = agent.learn_from_pipeline_trades(pipeline_db, data)
            pipeline_stats = pipeline_db.get_pipeline_stats()
            log(f"\nüíæ Pipeline Stats:", "database")
            log(f"  Total Trades: {pipeline_stats.get('total_trades', 0)}", "database")
            log(f"  Win Rate: {pipeline_stats.get('win_rate', 0):.1f}%", "database")
            log(f"  Total P&L: ${pipeline_stats.get('total_pnl', 0.0):.5f}", "database")
            log(f"  Trades Learned by RL: {agent.stats.get('pipeline_trades_learned', 0)}", "database")
        else:
            pipeline_trades_learned = 0
            pipeline_stats = {}
            log("‚ö†Ô∏è Pipeline database not available", "warn")
        if mode == "WEEKEND_LEARNING":
            backtest_trades = run_weekend_backtest(data, agent, confidence_system)
            log(f"üéì Weekend: {backtest_trades} backtest + {pipeline_trades_learned} pipeline = {backtest_trades + pipeline_trades_learned} total", "brain")
        log("\nüíπ Fetching prices...", "info")
        current_prices = {}
        for pair in PAIRS:
            if mode == "WEEKEND_LEARNING":
                if pair in data and '1h' in data[pair]:
                    price = data[pair]['1h'].iloc[-1]['close']
                    current_prices[pair] = price
            else:
                price = fetch_price(pair)
                if not price and pair in data and '1h' in data[pair]:
                    price = data[pair]['1h'].iloc[-1]['close']
                if price: current_prices[pair] = price
        log("\nüîç Checking trades...", "info")
        completed_trades = env.check_exits(current_prices)
        if completed_trades:
            log(f"\nüéì Learning from {len(completed_trades)} completed trades...", "brain")
            for trade_outcome in completed_trades: agent.record_trade(trade_outcome)
        log("\nüß† Generating signals...", "brain")
        signals = {}
        for pair in PAIRS:
            if pair not in data or '1h' not in data[pair] or '1d' not in data[pair]:
                signals[pair] = {'direction': 'HOLD', 'last_price': current_prices.get(pair, 0)}
                continue
            state = create_state_vector(data[pair]['1h'], data[pair]['1d'], pair)
            q_values = agent.q_network.predict(state)
            should_trade, confidence, metrics = confidence_system.get_confidence(q_values, agent.epsilon)
            best_action = np.argmax(q_values)
            action_map = {0: 'BUY', 1: 'SELL', 2: 'HOLD'}
            direction = action_map[best_action]
            if not should_trade: direction = 'HOLD'
            price = current_prices.get(pair, 0)
            atr = data[pair]['1h']['atr'].iloc[-1]
            if direction == 'BUY':
                sl = price - (atr * ATR_SL_MULTIPLIER)
                tp = price + (atr * ATR_TP_MULTIPLIER)
            elif direction == 'SELL':
                sl = price + (atr * ATR_SL_MULTIPLIER)
                tp = price - (atr * ATR_TP_MULTIPLIER)
            else: sl = tp = price
            signals[pair] = {'direction': direction, 'last_price': price, 'SL': float(sl), 'TP': float(tp),
                'confidence': confidence, 'threshold': metrics['threshold'],
                'timestamp': datetime.now(timezone.utc).isoformat()}
            log(f"  {pair}: Q={q_values[best_action]:.3f}, Conf={confidence:.1f}%, Thresh={metrics['threshold']:.1f}%, {direction}", "brain")
            if direction != 'HOLD' and len(env.active_trades) < MAX_POSITIONS and mode == "LIVE_TRADING":
                base_size = (BASE_CAPITAL * MAX_RISK_PER_TRADE) / (abs(price - sl) + EPS)
                position_size = confidence_system.calculate_position_size(base_size, confidence)
                position_size = min(position_size, MAX_TRADE_CAP / price)
                env.execute_trade(pair, direction, price, sl, tp, position_size, state,
                    {'confidence': confidence, 'regime': 'RL', 'session': 'LIVE'})
        log("\nüíæ Saving...", "info")
        output = {'timestamp': datetime.now(timezone.utc).isoformat(), 'iteration': iteration,
            'version': 'v18.1-fixed-weekend-learning', 'mode': mode, 'signals': signals,
            'rl_stats': agent.stats, 'active_trades': len(env.active_trades), 'pipeline_stats': pipeline_stats}
        persistence.save_json(OMEGA_SIGNALS_FILE, output)
        agent.save_state()
        print_diagnostics(agent, env.trade_history)
        if mode == "LIVE_TRADING": send_email(signals, iteration, agent.stats, mode, pipeline_stats)
        else: log("üìß Email skipped (weekend mode)", "info")
        files = [f"outputs/{OMEGA_SIGNALS_FILE.name}", f"omega_state/{OMEGA_ITERATION_FILE.name}",
            f"rl_memory/{RL_MEMORY_FILE.name}", f"rl_memory/{RL_LEARNING_STATS_FILE.name}",
            f"rl_memory/{TRADE_HISTORY_FILE.name}", f"rl_memory/{PIPELINE_SYNC_FILE.name}",
            f"rl_memory/{RL_NETWORK_WEIGHTS_FILE.name}"]
        commit_msg = f"üß† v18.1 #{iteration} [{mode}] WR={agent.stats['win_rate']*100:.1f}% P&L=${agent.stats['total_pnl']:.2f}"
        push_git(files, commit_msg)
        log("\n" + "=" * 70, "success")
        log("‚úÖ CYCLE COMPLETE", "success")
        log("=" * 70, "success")
        log(f"Iteration: #{iteration} ({ENV_NAME})", "info")
        log(f"Mode: {mode}", "info")
        log(f"RL Trades: {agent.stats['total_trades']}", "brain")
        log(f"Pipeline Trades: {agent.stats.get('pipeline_trades_learned', 0)}", "database")
        log(f"Win Rate: {agent.stats['win_rate']*100:.1f}%", "info")
        log(f"Total P&L: ${agent.stats['total_pnl']:.2f}", "money")
        log(f"Active Trades: {len(env.active_trades)}", "info")
        log(f"Epsilon: {agent.epsilon:.3f}", "info")
        log(f"Memory: {len(agent.memory)} samples", "brain")
        if agent.stats['total_trades'] > 100:
            if agent.stats['win_rate'] >= 0.35: log("\n‚úÖ Agent ready for cautious live trading", "success")
            elif agent.stats['win_rate'] >= 0.25: log("\n‚ö° Continue weekend training before live", "warn")
            else: log("\n‚ö†Ô∏è More training needed", "warn")
    except Exception as e:
        log(f"\n‚ùå Error: {e}", "error")
        logging.exception("Fatal error")
        raise
    finally:
        if pipeline_db.conn: pipeline_db.close()
        log(f"\nüß† Cycle complete (Iteration #{iteration})", "brain")

if __name__ == "__main__":
    main()