In [None]:
# ======================================================
# üîë API Keys Configuration
# ======================================================
import os

# Set API keys from environment variables or defaults
ALPHA_VANTAGE_KEY = os.environ.get('ALPHA_VANTAGE_KEY', '1W58NPZXOG5SLHZ6')
BROWSERLESS_TOKEN = os.environ.get('BROWSERLESS_TOKEN', '2TMVUBAjFwrr7Tb283f0da6602a4cb698b81778bda61967f7')

# Set environment variables for downstream code
os.environ['ALPHA_VANTAGE_KEY'] = ALPHA_VANTAGE_KEY
os.environ['BROWSERLESS_TOKEN'] = BROWSERLESS_TOKEN

# Validate
if not ALPHA_VANTAGE_KEY:
    print("‚ö†Ô∏è Warning: ALPHA_VANTAGE_KEY not set!")
else:
    print(f"‚úÖ Alpha Vantage Key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

if not BROWSERLESS_TOKEN:
    print("‚ö†Ô∏è Warning: BROWSERLESS_TOKEN not set!")
else:
    print(f"‚úÖ Browserless Token: {BROWSERLESS_TOKEN[:4]}...{BROWSERLESS_TOKEN[-4:]}")

In [None]:
# ======================================================
# üåç Environment Detection & Setup (MUST RUN FIRST!)
# ======================================================
import os
import sys
from pathlib import Path

# Detect environment
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# Set base paths based on environment
if IN_COLAB:
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex-ai-models"
elif IN_GHA:
    # GitHub Actions already checks out the repo
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
else:
    # Local development
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER

# Create necessary directories
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)

# Display environment info
print("=" * 60)
print(f"üåç Environment: {ENV_NAME}")
print(f"üìÇ Base Folder: {BASE_FOLDER}")
print(f"üíæ Save Folder: {SAVE_FOLDER}")
print(f"üîß Python: {sys.version.split()[0]}")
print(f"üìç Working Dir: {os.getcwd()}")
print("=" * 60)

# Validate critical environment variables for GitHub Actions
if IN_GHA:
    required_vars = ["FOREX_PAT", "GIT_USER_NAME", "GIT_USER_EMAIL"]
    missing = [v for v in required_vars if not os.environ.get(v)]
    if missing:
        print(f"‚ö†Ô∏è Warning: Missing environment variables: {', '.join(missing)}")
    else:
        print("‚úÖ All required environment variables present")

In [None]:
# ======================================================
# üìÑ GitHub Sync (Environment-Aware) - FULLY FIXED VERSION
# ======================================================
import os
import subprocess
import shutil
from pathlib import Path
import urllib.parse
import sys

# ======================================================
# 1Ô∏è‚É£ Environment Detection (Self-Contained)
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

# Override ENV_NAME if in GitHub Actions
if IN_GHA:
    ENV_NAME = "GitHub Actions"

# ======================================================
# 2Ô∏è‚É£ CRITICAL FIX: Smart Path Configuration
# ======================================================
if IN_GHA:
    # ‚úÖ GitHub Actions: Use current directory (already in repo)
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER
    REPO_FOLDER = BASE_FOLDER  # We're already in the repo!
    print("ü§ñ GitHub Actions Mode: Using current directory")

elif IN_COLAB:
    # ‚úÖ Colab: Use separate workspace folder
    BASE_FOLDER = Path("/content")
    SAVE_FOLDER = BASE_FOLDER / "forex_workspace"  # Different name to avoid confusion
    REPO_FOLDER = SAVE_FOLDER / "forex-ai-models"  # Repo goes inside workspace
    print("‚òÅÔ∏è Colab Mode: Using workspace structure")

else:
    # ‚úÖ Local: Use current directory or custom path
    BASE_FOLDER = Path.cwd()
    SAVE_FOLDER = BASE_FOLDER / "workspace"
    REPO_FOLDER = SAVE_FOLDER / "forex-ai-models"
    print("üíª Local Mode: Using workspace structure")

# Create necessary directories
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)

print("=" * 70)
print(f"üîß Running in: {ENV_NAME}")
print(f"üìÇ Working directory: {os.getcwd()}")
print(f"üíæ Save folder: {SAVE_FOLDER}")
print(f"üì¶ Repo folder: {REPO_FOLDER}")
print(f"üêç Python: {sys.version.split()[0]}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GitHub Configuration
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

# ======================================================
# 4Ô∏è‚É£ GitHub Token (Multi-Source)
# ======================================================
FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if in Colab and PAT not found
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secret.")
    except ImportError:
        pass
    except Exception as e:
        print(f"‚ö†Ô∏è Could not load Colab secret: {e}")

# Validate PAT
if not FOREX_PAT:
    print("‚ö†Ô∏è Warning: FOREX_PAT not found. Git operations may fail.")
    print("   Set FOREX_PAT in:")
    print("   - GitHub Secrets (for Actions)")
    print("   - Colab Secrets (for Colab)")
    print("   - Environment variable (for local)")
    REPO_URL = None
else:
    SAFE_PAT = urllib.parse.quote(FOREX_PAT)
    REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
    print("‚úÖ GitHub token configured")

# ======================================================
# 5Ô∏è‚É£ Handle Repository Based on Environment
# ======================================================
if IN_GHA:
    # ===== GitHub Actions =====
    print("\nü§ñ GitHub Actions Mode")
    print("‚úÖ Repository already checked out by actions/checkout")
    print(f"üìÇ Current directory: {Path.cwd()}")

    # Verify .git exists
    if not (Path.cwd() / ".git").exists():
        print("‚ö†Ô∏è Warning: .git directory not found!")
        print("   Make sure actions/checkout@v4 is in your workflow")
    else:
        print("‚úÖ Git repository confirmed")

    # No need to clone - we're already in the repo!

elif IN_COLAB:
    # ===== Google Colab =====
    print("\n‚òÅÔ∏è Google Colab Mode")

    if not REPO_URL:
        print("‚ùå Cannot clone repository: FOREX_PAT not available")
    elif not (REPO_FOLDER / ".git").exists():
        # Clone repository
        print(f"üì• Cloning repository to {REPO_FOLDER}...")
        env = os.environ.copy()
        env["GIT_LFS_SKIP_SMUDGE"] = "1"  # Skip LFS files

        try:
            result = subprocess.run(
                ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                check=True,
                env=env,
                capture_output=True,
                text=True,
                timeout=60
            )
            print("‚úÖ Repository cloned successfully")

            # Change to repo directory
            os.chdir(REPO_FOLDER)
            print(f"üìÇ Changed directory to: {os.getcwd()}")

        except subprocess.CalledProcessError as e:
            print(f"‚ùå Clone failed: {e.stderr}")
            print("Continuing with existing directory...")
        except subprocess.TimeoutExpired:
            print("‚ùå Clone timed out after 60 seconds")
    else:
        # Repository exists, pull latest
        print("‚úÖ Repository already exists, pulling latest changes...")
        os.chdir(REPO_FOLDER)

        try:
            result = subprocess.run(
                ["git", "pull", "origin", BRANCH],
                check=True,
                cwd=REPO_FOLDER,
                capture_output=True,
                text=True,
                timeout=30
            )
            print("‚úÖ Successfully pulled latest changes")
        except subprocess.CalledProcessError as e:
            print(f"‚ö†Ô∏è Pull failed: {e.stderr}")
            print("Continuing with existing files...")
        except subprocess.TimeoutExpired:
            print("‚ö†Ô∏è Pull timed out, continuing anyway...")

    # Configure Git LFS (disable for Colab)
    print("‚öôÔ∏è Configuring Git LFS...")
    try:
        subprocess.run(
            ["git", "lfs", "uninstall"],
            check=False,
            cwd=REPO_FOLDER,
            capture_output=True
        )
        subprocess.run(
            ["git", "lfs", "migrate", "export", "--include=*.csv"],
            check=False,
            cwd=REPO_FOLDER,
            capture_output=True
        )
        print("‚úÖ LFS configuration updated")
    except Exception as e:
        print(f"‚ö†Ô∏è LFS setup warning: {e}")

else:
    # ===== Local Environment =====
    print("\nüíª Local Development Mode")
    print(f"üìÇ Working in: {SAVE_FOLDER}")

    if not (REPO_FOLDER / ".git").exists():
        if REPO_URL:
            print(f"üì• Cloning repository to {REPO_FOLDER}...")
            try:
                subprocess.run(
                    ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                    check=True,
                    timeout=60
                )
                print("‚úÖ Repository cloned successfully")
            except Exception as e:
                print(f"‚ùå Clone failed: {e}")
        else:
            print("‚ö†Ô∏è Not a git repository and no PAT available")
            print("   Run: git clone https://github.com/rahim-dotAI/forex-ai-models.git")
    else:
        print("‚úÖ Git repository found")
        os.chdir(REPO_FOLDER)

# ======================================================
# 6Ô∏è‚É£ Git Global Configuration
# ======================================================
print("\nüîß Configuring Git...")

GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

# Set git config
git_configs = [
    (["git", "config", "--global", "user.name", GIT_USER_NAME], "User name"),
    (["git", "config", "--global", "user.email", GIT_USER_EMAIL], "User email"),
    (["git", "config", "--global", "advice.detachedHead", "false"], "Detached HEAD warning"),
    (["git", "config", "--global", "init.defaultBranch", "main"], "Default branch")
]

for cmd, description in git_configs:
    try:
        subprocess.run(cmd, check=False, capture_output=True)
    except Exception as e:
        print(f"‚ö†Ô∏è Could not set {description}: {e}")

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 7Ô∏è‚É£ Environment Summary & Validation
# ======================================================
print("\n" + "=" * 70)
print("üßæ ENVIRONMENT SUMMARY")
print("=" * 70)
print(f"Environment:      {ENV_NAME}")
print(f"Working Dir:      {os.getcwd()}")
print(f"Save Folder:      {SAVE_FOLDER}")
print(f"Repo Folder:      {REPO_FOLDER}")
print(f"Repository:       https://github.com/{GITHUB_USERNAME}/{GITHUB_REPO}")
print(f"Branch:           {BRANCH}")
print(f"Git Repo Exists:  {(REPO_FOLDER / '.git').exists()}")
print(f"FOREX_PAT Set:    {'‚úÖ Yes' if FOREX_PAT else '‚ùå No'}")

# Check critical paths
print("\nüìã Critical Paths:")
critical_paths = {
    "Repo .git": REPO_FOLDER / ".git",
    "Save Folder": SAVE_FOLDER,
    "Repo Folder": REPO_FOLDER
}

for name, path in critical_paths.items():
    exists = path.exists()
    icon = "‚úÖ" if exists else "‚ùå"
    print(f"  {icon} {name}: {path} {'(exists)' if exists else '(missing)'}")

print("=" * 70)
print("‚úÖ Setup completed successfully!")
print("=" * 70)

# ======================================================
# 8Ô∏è‚É£ Export Variables for Downstream Cells
# ======================================================
# These variables are now available in subsequent cells:
# - ENV_NAME: Environment name
# - IN_COLAB: Boolean for Colab detection
# - IN_GHA: Boolean for GitHub Actions detection
# - SAVE_FOLDER: Path to save files
# - REPO_FOLDER: Path to git repository
# - GITHUB_USERNAME, GITHUB_REPO, BRANCH: Git config
# - FOREX_PAT: GitHub token (if available)

print("\n‚úÖ All environment variables exported for downstream cells")

In [None]:
!pip install mplfinance firebase-admin dropbox requests beautifulsoup4 pandas numpy ta yfinance pyppeteer nest_asyncio lightgbm joblib matplotlib alpha_vantage tqdm scikit-learn river


In [None]:
# ======================================================
# üöÄ COMPLETE ALPHA VANTAGE FX WORKFLOW - FULLY FIXED
# ======================================================
# ‚úÖ Works in GitHub Actions, Google Colab, and Local
# ‚úÖ No nested repositories
# ‚úÖ Proper path management
# ‚úÖ Thread-safe operations
# ‚úÖ API rate limit handling
# ‚úÖ Automatic retry logic
# ======================================================

import os
import time
import hashlib
import requests
import subprocess
import threading
import shutil
import urllib.parse
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

# ======================================================
# 1Ô∏è‚É£ ENVIRONMENT DETECTION
# ======================================================
print("=" * 70)
print("üöÄ Alpha Vantage FX Data Fetcher")
print("=" * 70)

try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üìç Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ SMART PATH CONFIGURATION (NO NESTED REPOS!)
# ======================================================
if IN_GHA:
    # ‚úÖ GitHub Actions: Already in repo root
    print("ü§ñ GitHub Actions detected - using repository root")
    BASE_FOLDER = Path.cwd()
    REPO_FOLDER = BASE_FOLDER  # SAME as current directory!
    CSV_FOLDER = BASE_FOLDER / "csvs"
    PICKLE_FOLDER = BASE_FOLDER / "pickles"
    LOG_FOLDER = BASE_FOLDER / "logs"

elif IN_COLAB:
    # ‚úÖ Colab: Separate workspace to avoid confusion
    print("‚òÅÔ∏è Google Colab detected - creating workspace")
    BASE_FOLDER = Path("/content/forex_workspace")
    BASE_FOLDER.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = BASE_FOLDER / "forex-ai-models"
    CSV_FOLDER = BASE_FOLDER / "csvs"
    PICKLE_FOLDER = BASE_FOLDER / "pickles"
    LOG_FOLDER = BASE_FOLDER / "logs"

else:
    # ‚úÖ Local: Workspace in current directory
    print("üíª Local environment detected - creating workspace")
    BASE_FOLDER = Path("./forex_workspace").resolve()
    BASE_FOLDER.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = BASE_FOLDER / "forex-ai-models"
    CSV_FOLDER = BASE_FOLDER / "csvs"
    PICKLE_FOLDER = BASE_FOLDER / "pickles"
    LOG_FOLDER = BASE_FOLDER / "logs"

# Create output directories
for folder in [CSV_FOLDER, PICKLE_FOLDER, LOG_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print(f"üìÇ Base folder: {BASE_FOLDER}")
print(f"üì¶ Repo folder: {REPO_FOLDER}")
print(f"üíæ CSV folder: {CSV_FOLDER}")
print("=" * 70)

# ======================================================
# 3Ô∏è‚É£ GITHUB CONFIGURATION
# ======================================================
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

# Get GitHub PAT from environment
FOREX_PAT = os.environ.get("FOREX_PAT")

# Try Colab secrets if available
if not FOREX_PAT and IN_COLAB:
    try:
        from google.colab import userdata
        FOREX_PAT = userdata.get("FOREX_PAT")
        if FOREX_PAT:
            os.environ["FOREX_PAT"] = FOREX_PAT
            print("üîê Loaded FOREX_PAT from Colab secrets")
    except Exception as e:
        print(f"‚ö†Ô∏è Could not access Colab secrets: {e}")

if not FOREX_PAT:
    print("‚ùå ERROR: FOREX_PAT not found!")
    print("   Set it in:")
    print("   - GitHub Secrets (for Actions)")
    print("   - Colab Secrets (for Colab)")
    print("   - Environment variables (for Local)")
    raise ValueError("FOREX_PAT is required")

# URL-encode the PAT for safe use in URLs
SAFE_PAT = urllib.parse.quote(FOREX_PAT)
REPO_URL = f"https://{GITHUB_USERNAME}:{SAFE_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

print("‚úÖ GitHub credentials configured")

# ======================================================
# 4Ô∏è‚É£ REPOSITORY MANAGEMENT
# ======================================================
def ensure_repository():
    """
    Ensure repository is available and up-to-date
    Behavior depends on environment
    """
    if IN_GHA:
        # GitHub Actions: Repo already checked out
        print("\nü§ñ GitHub Actions: Repository already available")
        if not (REPO_FOLDER / ".git").exists():
            print("‚ö†Ô∏è Warning: .git directory not found")
            print("   Make sure actions/checkout@v4 is in your workflow")
        else:
            print("‚úÖ Git repository verified")
        return

    # For Colab and Local: Clone or update
    print("\nüì• Managing repository...")

    if REPO_FOLDER.exists():
        if (REPO_FOLDER / ".git").exists():
            # Repository exists - update it
            print(f"üîÑ Updating existing repository...")
            try:
                # Fetch latest
                result = subprocess.run(
                    ["git", "-C", str(REPO_FOLDER), "fetch", "origin"],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                # Checkout branch
                subprocess.run(
                    ["git", "-C", str(REPO_FOLDER), "checkout", BRANCH],
                    capture_output=True,
                    text=True
                )

                # Pull latest changes
                result = subprocess.run(
                    ["git", "-C", str(REPO_FOLDER), "pull", "origin", BRANCH],
                    capture_output=True,
                    text=True,
                    timeout=30
                )

                if result.returncode == 0:
                    print("‚úÖ Repository updated successfully")
                else:
                    print(f"‚ö†Ô∏è Pull had warnings: {result.stderr}")

            except subprocess.TimeoutExpired:
                print("‚ö†Ô∏è Update timed out - continuing with existing repo")
            except Exception as e:
                print(f"‚ö†Ô∏è Update failed: {e} - continuing with existing repo")
        else:
            # Folder exists but not a git repo - remove it
            print("üóëÔ∏è Removing incomplete repository folder...")
            shutil.rmtree(REPO_FOLDER)

    # Clone if needed
    if not REPO_FOLDER.exists() or not (REPO_FOLDER / ".git").exists():
        print(f"üì• Cloning repository to {REPO_FOLDER}...")

        # Skip LFS to speed up clone
        env = os.environ.copy()
        env["GIT_LFS_SKIP_SMUDGE"] = "1"

        try:
            result = subprocess.run(
                ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                env=env,
                capture_output=True,
                text=True,
                timeout=60
            )

            if result.returncode == 0:
                print("‚úÖ Repository cloned successfully")
            else:
                raise RuntimeError(f"Clone failed: {result.stderr}")

        except subprocess.TimeoutExpired:
            raise TimeoutError("Repository clone timed out after 60 seconds")
        except Exception as e:
            raise RuntimeError(f"Clone failed: {e}")

ensure_repository()

# Configure Git identity
GIT_USER_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_USER_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

subprocess.run(["git", "config", "--global", "user.name", GIT_USER_NAME],
               capture_output=True, check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_USER_EMAIL],
               capture_output=True, check=False)

print(f"‚úÖ Git configured: {GIT_USER_NAME} <{GIT_USER_EMAIL}>")

# ======================================================
# 5Ô∏è‚É£ ALPHA VANTAGE CONFIGURATION
# ======================================================
ALPHA_VANTAGE_KEY = os.environ.get("ALPHA_VANTAGE_KEY", "1W58NPZXOG5SLHZ6")

if not ALPHA_VANTAGE_KEY:
    raise ValueError("‚ùå ALPHA_VANTAGE_KEY is required")

print(f"‚úÖ Alpha Vantage API key: {ALPHA_VANTAGE_KEY[:4]}...{ALPHA_VANTAGE_KEY[-4:]}")

# FX pairs to fetch
FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# Thread lock for file operations
lock = threading.Lock()

# ======================================================
# 6Ô∏è‚É£ HELPER FUNCTIONS
# ======================================================
def ensure_tz_naive(df):
    """
    Remove timezone information from DataFrame index
    """
    if df is None or df.empty:
        return df

    df.index = pd.to_datetime(df.index, errors='coerce')

    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)

    return df

def file_hash(filepath, chunk_size=8192):
    """
    Calculate MD5 hash of file to detect changes
    """
    if not filepath.exists():
        return None

    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)

    return md5.hexdigest()

def fetch_alpha_vantage_fx(pair, outputsize='full', max_retries=3, retry_delay=5):
    """
    Fetch FX data from Alpha Vantage API with retry logic

    Args:
        pair: FX pair (e.g., "EUR/USD")
        outputsize: 'compact' (100 rows) or 'full' (all available)
        max_retries: Number of retry attempts
        retry_delay: Seconds between retries

    Returns:
        DataFrame with OHLC data or empty DataFrame on failure
    """
    base_url = 'https://www.alphavantage.co/query'
    from_currency, to_currency = pair.split('/')

    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'outputsize': outputsize,
        'datatype': 'json',
        'apikey': ALPHA_VANTAGE_KEY
    }

    for attempt in range(max_retries):
        try:
            print(f"  Fetching {pair} (attempt {attempt + 1}/{max_retries})...")

            r = requests.get(base_url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()

            # Check for API errors
            if 'Error Message' in data:
                raise ValueError(f"API Error: {data['Error Message']}")

            if 'Note' in data:
                print(f"  ‚ö†Ô∏è API rate limit reached for {pair}")
                if attempt < max_retries - 1:
                    time.sleep(retry_delay * 2)  # Longer wait for rate limit
                    continue
                return pd.DataFrame()

            if 'Time Series FX (Daily)' not in data:
                raise ValueError(f"Unexpected response format: {list(data.keys())}")

            # Parse time series data
            ts = data['Time Series FX (Daily)']
            df = pd.DataFrame(ts).T
            df.index = pd.to_datetime(df.index)
            df = df.sort_index()

            # Rename columns
            df = df.rename(columns={
                '1. open': 'open',
                '2. high': 'high',
                '3. low': 'low',
                '4. close': 'close'
            })

            # Convert to float
            df = df.astype(float)

            # Remove timezone
            df = ensure_tz_naive(df)

            print(f"  ‚úÖ Fetched {len(df)} rows for {pair}")
            return df

        except requests.RequestException as e:
            print(f"  ‚ö†Ô∏è Network error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                print(f"  ‚ùå Failed after {max_retries} attempts")
                return pd.DataFrame()

        except Exception as e:
            print(f"  ‚ö†Ô∏è Error: {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                print(f"  ‚ùå Failed after {max_retries} attempts")
                return pd.DataFrame()

    return pd.DataFrame()

# ======================================================
# 7Ô∏è‚É£ PAIR PROCESSING
# ======================================================
def process_pair(pair):
    """
    Process single FX pair: fetch, merge with existing, save

    Returns:
        Tuple of (filepath if changed, status message)
    """
    print(f"\nüîÑ Processing {pair}...")

    filename = pair.replace("/", "_") + ".csv"

    # Determine file locations based on environment
    if IN_GHA:
        # In GitHub Actions: Save directly to repo root
        csv_path = REPO_FOLDER / filename
        repo_path = csv_path  # Same file
    else:
        # In Colab/Local: Save to CSV folder AND repo folder
        csv_path = CSV_FOLDER / filename
        repo_path = REPO_FOLDER / filename

    # Load existing data
    existing_df = pd.DataFrame()
    if csv_path.exists():
        try:
            existing_df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
            print(f"  üìä Loaded {len(existing_df)} existing rows")
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    # Get hash before changes
    old_hash = file_hash(csv_path)

    # Fetch new data
    new_df = fetch_alpha_vantage_fx(pair)

    if new_df.empty:
        return None, f"‚ùå {pair}: No data fetched"

    # Merge with existing data
    if not existing_df.empty:
        combined_df = pd.concat([existing_df, new_df])
        # Remove duplicates, keeping latest
        combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
    else:
        combined_df = new_df

    # Sort by date
    combined_df.sort_index(inplace=True)

    # Save files (thread-safe)
    with lock:
        # Save to CSV folder
        combined_df.to_csv(csv_path)

        # Also save to repo folder if different
        if not IN_GHA and csv_path != repo_path:
            combined_df.to_csv(repo_path)

    # Check if file changed
    new_hash = file_hash(csv_path)
    changed = (old_hash != new_hash)

    status = "‚úÖ Updated" if changed else "‚ÑπÔ∏è No changes"
    print(f"  {status} - Total rows: {len(combined_df)}")

    return (str(repo_path) if changed else None), f"{status} {pair} ({len(combined_df)} rows)"

# ======================================================
# 8Ô∏è‚É£ PARALLEL EXECUTION
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Fetching FX data from Alpha Vantage...")
print("=" * 70)

changed_files = []
results = []

# Process pairs in parallel (max 4 at a time to respect API limits)
with ThreadPoolExecutor(max_workers=4) as executor:
    futures = {executor.submit(process_pair, pair): pair for pair in FX_PAIRS}

    for future in as_completed(futures):
        pair = futures[future]
        try:
            filepath, message = future.result()
            results.append(message)
            if filepath:
                changed_files.append(filepath)
        except Exception as e:
            print(f"‚ùå {pair} processing failed: {e}")
            results.append(f"‚ùå {pair}: Failed")

# ======================================================
# 9Ô∏è‚É£ RESULTS SUMMARY
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

print(f"\nTotal pairs processed: {len(FX_PAIRS)}")
print(f"Files updated: {len(changed_files)}")

# ======================================================
# üîü GIT COMMIT & PUSH (Skip in GitHub Actions)
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("   (Workflow will handle commit and push)")
    print("=" * 70)

elif changed_files:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        # Change to repo directory
        os.chdir(REPO_FOLDER)

        # Stage changed files
        print(f"üìù Staging {len(changed_files)} files...")
        subprocess.run(["git", "add", "-A"], check=False)

        # Commit
        commit_msg = f"Update Alpha Vantage FX data - {len(changed_files)} files"
        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")
        elif "nothing to commit" in result.stdout:
            print("‚ÑπÔ∏è No changes to commit")
        else:
            print(f"‚ö†Ô∏è Commit warning: {result.stderr}")

        # Push with retry logic
        max_push_attempts = 3
        for attempt in range(max_push_attempts):
            print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/{max_push_attempts})...")

            result = subprocess.run(
                ["git", "push", "origin", BRANCH],
                capture_output=True,
                text=True,
                timeout=30
            )

            if result.returncode == 0:
                print("‚úÖ Successfully pushed to GitHub")
                break
            else:
                if attempt < max_push_attempts - 1:
                    print(f"‚ö†Ô∏è Push failed, retrying...")
                    # Pull latest and try again
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)
                else:
                    print(f"‚ùå Push failed after {max_push_attempts} attempts")
                    print(f"   Error: {result.stderr}")

    except subprocess.TimeoutExpired:
        print("‚ùå Git operation timed out")
    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        # Return to base folder
        os.chdir(BASE_FOLDER)

else:
    print("\n" + "=" * 70)
    print("‚ÑπÔ∏è No changes to commit")
    print("=" * 70)

# ======================================================
# ‚úÖ COMPLETION
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ ALPHA VANTAGE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Pairs processed: {len(FX_PAIRS)}")
print(f"Files updated: {len(changed_files)}")
print(f"Status: {'Success' if len(results) == len(FX_PAIRS) else 'Partial'}")
print("=" * 70)

In [None]:
# ======================================================
# FULLY IMPROVED FOREX DATA WORKFLOW - YFINANCE
# ‚úÖ Works in: Colab + GitHub Actions + Local
# ‚úÖ No permission errors
# ‚úÖ 403-Proof, Large History Support
# ‚úÖ Environment-aware paths
# ======================================================

import os, time, hashlib, subprocess, shutil, threading
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import yfinance as yf

print("=" * 70)
print("üöÄ YFinance FX Data Fetcher - Multi-Environment Edition")
print("=" * 70)

# ======================================================
# 1Ô∏è‚É£ FIXED: Environment Detection
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ
IN_LOCAL = not IN_COLAB and not IN_GHA

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Detected Environment: {ENV_NAME}")

# ======================================================
# 2Ô∏è‚É£ FIXED: Working Directories (Environment-Aware)
# ======================================================
if IN_COLAB:
    # Colab: Use /content (has permissions)
    BASE_DIR = Path("/content/forex-alpha-models")
    BASE_DIR.mkdir(parents=True, exist_ok=True)
elif IN_GHA:
    # GitHub Actions: Use current working directory (repo root)
    BASE_DIR = Path.cwd()
    print(f"üìÇ GitHub Actions: Using repo root: {BASE_DIR}")
else:
    # Local: Use subdirectory
    BASE_DIR = Path("./forex-alpha-models").resolve()
    BASE_DIR.mkdir(parents=True, exist_ok=True)

# Change to base directory (safe for all environments)
os.chdir(BASE_DIR)

# Setup subdirectories
PICKLE_FOLDER = BASE_DIR / "pickles"
CSV_FOLDER = BASE_DIR / "csvs"
LOG_FOLDER = BASE_DIR / "logs"

# Create all subdirectories with parents=True
for folder in [PICKLE_FOLDER, CSV_FOLDER, LOG_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Working directory: {BASE_DIR.resolve()}")
print(f"‚úÖ Pickle folder: {PICKLE_FOLDER}")
print(f"‚úÖ CSV folder: {CSV_FOLDER}")
print(f"‚úÖ Log folder: {LOG_FOLDER}")

# ======================================================
# 3Ô∏è‚É£ Git Configuration
# ======================================================
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

FOREX_PAT = os.environ.get("FOREX_PAT", "").strip()
if not FOREX_PAT:
    raise ValueError("‚ùå FOREX_PAT environment variable is required!")

print(f"‚úÖ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# Configure git
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
subprocess.run(["git", "config", "--global", "credential.helper", "store"], check=False)

# Store credentials
cred_file = Path.home() / ".git-credentials"
cred_file.write_text(f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com\n")

# ======================================================
# 4Ô∏è‚É£ FIXED: Repository Management (Environment-Aware)
# ======================================================
REPO_URL = f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
REPO_FOLDER = BASE_DIR / GITHUB_REPO

def ensure_repo_cloned(repo_url, repo_folder, branch="main"):
    """
    Clone or update repository with environment-aware handling
    """
    repo_folder = Path(repo_folder)

    if IN_GHA:
        # GitHub Actions: Repo already checked out
        print("ü§ñ GitHub Actions: Repository already available")
        if not (repo_folder / ".git").exists() and (Path.cwd() / ".git").exists():
            # We're in the repo root, use current directory
            print(f"‚úÖ Using current directory as repo: {Path.cwd()}")
            return Path.cwd()
        elif (repo_folder / ".git").exists():
            print(f"‚úÖ Repository found at: {repo_folder}")
            return repo_folder
        else:
            print("‚ö†Ô∏è Warning: .git directory not found")
            print("   Make sure actions/checkout@v4 is in your workflow")
            return repo_folder

    # For Colab and Local: Clone or update
    tmp_folder = repo_folder.parent / (repo_folder.name + "_tmp")

    if tmp_folder.exists():
        shutil.rmtree(tmp_folder)

    if not (repo_folder / ".git").exists():
        print(f"üî• Cloning repository to {tmp_folder}...")
        try:
            subprocess.run(
                ["git", "clone", "-b", branch, repo_url, str(tmp_folder)],
                check=True,
                timeout=60
            )

            if repo_folder.exists():
                shutil.rmtree(repo_folder)

            tmp_folder.rename(repo_folder)
            print(f"‚úÖ Repository cloned successfully")
        except subprocess.TimeoutExpired:
            print("‚ùå Clone timed out after 60 seconds")
            raise
        except Exception as e:
            print(f"‚ùå Clone failed: {e}")
            raise
    else:
        print("üîÑ Repository exists, pulling latest changes...")
        try:
            subprocess.run(
                ["git", "-C", str(repo_folder), "fetch", "origin"],
                check=True,
                timeout=30
            )
            subprocess.run(
                ["git", "-C", str(repo_folder), "checkout", branch],
                check=False
            )
            subprocess.run(
                ["git", "-C", str(repo_folder), "pull", "origin", branch],
                check=False,
                timeout=30
            )
            print("‚úÖ Repository updated successfully")
        except subprocess.TimeoutExpired:
            print("‚ö†Ô∏è Update timed out - continuing with existing repo")
        except Exception as e:
            print(f"‚ö†Ô∏è Update failed: {e} - continuing with existing repo")

    print(f"‚úÖ Repository ready at: {repo_folder.resolve()}")
    return repo_folder

# Ensure repository is available
REPO_FOLDER = ensure_repo_cloned(REPO_URL, REPO_FOLDER, BRANCH)

# ======================================================
# 5Ô∏è‚É£ FX Pairs & Timeframes Configuration
# ======================================================
FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

TIMEFRAMES = {
    "1d_5y": ("1d", "5y"),      # Daily data, 5 years
    "1h_2y": ("1h", "2y"),      # Hourly data, 2 years
    "15m_60d": ("15m", "60d"),  # 15-minute data, 60 days
    "5m_1mo": ("5m", "1mo"),    # 5-minute data, 1 month
    "1m_7d": ("1m", "7d")       # 1-minute data, 7 days
}

print(f"\nüìä Configuration:")
print(f"   Pairs: {len(FX_PAIRS)}")
print(f"   Timeframes: {len(TIMEFRAMES)}")
print(f"   Total tasks: {len(FX_PAIRS) * len(TIMEFRAMES)}")

# Thread lock for file operations
lock = threading.Lock()

# ======================================================
# 6Ô∏è‚É£ Helper Functions
# ======================================================
def file_hash(filepath, chunk_size=8192):
    """Calculate MD5 hash of file to detect changes"""
    if not filepath.exists():
        return None

    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)

    return md5.hexdigest()

def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return df

    df.index = pd.to_datetime(df.index, errors='coerce')

    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)

    return df

def merge_data(existing_df, new_df):
    """Merge existing and new data, removing duplicates"""
    existing_df = ensure_tz_naive(existing_df)
    new_df = ensure_tz_naive(new_df)

    if existing_df.empty:
        return new_df
    if new_df.empty:
        return existing_df

    # Combine dataframes
    combined = pd.concat([existing_df, new_df])

    # Remove duplicates, keeping the latest
    combined = combined[~combined.index.duplicated(keep="last")]

    # Sort by date
    combined.sort_index(inplace=True)

    return combined

# ======================================================
# 7Ô∏è‚É£ Worker Function for Pair/Timeframe Processing
# ======================================================
def process_pair_tf(pair, tf_name, interval, period, max_retries=3, retry_delay=5):
    """
    Download and process data for a single pair/timeframe combination

    Args:
        pair: FX pair (e.g., "EUR/USD")
        tf_name: Timeframe name (e.g., "1d_5y")
        interval: YFinance interval (e.g., "1d")
        period: YFinance period (e.g., "5y")
        max_retries: Number of retry attempts
        retry_delay: Seconds between retries

    Returns:
        Tuple of (status_message, filepath_if_changed)
    """
    # Convert pair to YFinance symbol (e.g., "EUR/USD" -> "EURUSD=X")
    symbol = pair.replace("/", "") + "=X"

    # Create filename
    filename = f"{pair.replace('/', '_')}_{tf_name}.csv"
    filepath = REPO_FOLDER / filename

    # Load existing data if available
    existing_df = pd.DataFrame()
    if filepath.exists():
        try:
            existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True)
            print(f"  üìÇ Loaded {len(existing_df)} existing rows for {pair} {tf_name}")
        except Exception as e:
            print(f"  ‚ö†Ô∏è Could not load existing data: {e}")

    # Get hash before changes
    old_hash = file_hash(filepath)

    # Attempt to download with retries
    for attempt in range(max_retries):
        try:
            print(f"  üîΩ Fetching {pair} {tf_name} (attempt {attempt + 1}/{max_retries})...")

            # Download data from YFinance
            df = yf.download(
                symbol,
                interval=interval,
                period=period,
                progress=False,
                auto_adjust=False,
                threads=True
            )

            if df.empty:
                raise ValueError("No data returned from YFinance")

            # Select and rename columns
            available_cols = [c for c in ['Open', 'High', 'Low', 'Close', 'Volume'] if c in df.columns]
            df = df[available_cols]
            df.rename(columns=lambda x: x.lower(), inplace=True)

            # Remove timezone information
            df = ensure_tz_naive(df)

            # Merge with existing data
            combined_df = merge_data(existing_df, df)

            # Save to CSV (thread-safe)
            with lock:
                combined_df.to_csv(filepath)

            # Check if file changed
            new_hash = file_hash(filepath)
            changed = (old_hash != new_hash)

            if changed:
                print(f"  ‚úÖ Updated {pair} {tf_name} - Total rows: {len(combined_df)}")
                return f"üìà Updated {pair} {tf_name} ({len(combined_df)} rows)", str(filepath)
            else:
                print(f"  ‚ÑπÔ∏è No changes {pair} {tf_name}")
                return f"‚úÖ No changes {pair} {tf_name}", None

        except Exception as e:
            print(f"  ‚ö†Ô∏è Attempt {attempt + 1}/{max_retries} failed for {pair} {tf_name}: {e}")

            if attempt < max_retries - 1:
                print(f"  ‚è≥ Waiting {retry_delay} seconds before retry...")
                time.sleep(retry_delay)
            else:
                print(f"  ‚ùå All attempts failed for {pair} {tf_name}")
                return f"‚ùå Failed {pair} {tf_name}: {e}", None

    return f"‚ùå Failed {pair} {tf_name}", None

# ======================================================
# 8Ô∏è‚É£ Parallel Execution
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Starting parallel data download...")
print("=" * 70 + "\n")

changed_files = []
results = []
tasks = []

# Create all tasks
with ThreadPoolExecutor(max_workers=8) as executor:
    for pair in FX_PAIRS:
        for tf_name, (interval, period) in TIMEFRAMES.items():
            tasks.append(executor.submit(process_pair_tf, pair, tf_name, interval, period))

    # Process results as they complete
    for future in as_completed(tasks):
        try:
            msg, filename = future.result()
            results.append(msg)
            if filename:
                changed_files.append(filename)
        except Exception as e:
            print(f"‚ùå Task failed with error: {e}")
            results.append(f"‚ùå Task failed: {e}")

# ======================================================
# 9Ô∏è‚É£ Results Summary
# ======================================================
print("\n" + "=" * 70)
print("üìä PROCESSING SUMMARY")
print("=" * 70)

for result in results:
    print(result)

print(f"\nTotal tasks: {len(results)}")
print(f"Files updated: {len(changed_files)}")

# ======================================================
# üîü Git Commit & Push (Skip in GitHub Actions)
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("   (Workflow will handle commit and push)")
    print("=" * 70)

elif changed_files:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        # Change to repo directory
        os.chdir(REPO_FOLDER)

        # Stage changed files
        print(f"üìù Staging {len(changed_files)} files...")
        subprocess.run(["git", "add", "-A"], check=False)

        # Commit
        commit_msg = f"Update YFinance FX data - {len(changed_files)} files"
        result = subprocess.run(
            ["git", "commit", "-m", commit_msg],
            capture_output=True,
            text=True
        )

        if result.returncode == 0:
            print("‚úÖ Changes committed")
        elif "nothing to commit" in result.stdout:
            print("‚ÑπÔ∏è No changes to commit")
        else:
            print(f"‚ö†Ô∏è Commit warning: {result.stderr}")

        # Push with retry logic
        max_push_attempts = 3
        for attempt in range(max_push_attempts):
            print(f"üì§ Pushing to GitHub (attempt {attempt + 1}/{max_push_attempts})...")

            result = subprocess.run(
                ["git", "push", "origin", BRANCH],
                capture_output=True,
                text=True,
                timeout=30
            )

            if result.returncode == 0:
                print("‚úÖ Successfully pushed to GitHub")
                break
            else:
                if attempt < max_push_attempts - 1:
                    print(f"‚ö†Ô∏è Push failed, retrying...")
                    # Pull latest and try again
                    subprocess.run(
                        ["git", "pull", "--rebase", "origin", BRANCH],
                        capture_output=True
                    )
                    time.sleep(3)
                else:
                    print(f"‚ùå Push failed after {max_push_attempts} attempts")
                    print(f"   Error: {result.stderr}")

    except subprocess.TimeoutExpired:
        print("‚ùå Git operation timed out")
    except Exception as e:
        print(f"‚ùå Git error: {e}")
    finally:
        # Return to base folder
        os.chdir(BASE_DIR)

else:
    print("\n" + "=" * 70)
    print("‚ÑπÔ∏è No changes to commit")
    print("=" * 70)

# ======================================================
# ‚úÖ Completion
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ YFINANCE WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"Pairs processed: {len(FX_PAIRS)}")
print(f"Timeframes per pair: {len(TIMEFRAMES)}")
print(f"Files updated: {len(changed_files)}")
print(f"Status: {'Success' if len(results) == len(FX_PAIRS) * len(TIMEFRAMES) else 'Partial'}")
print("=" * 70)
print("\nüéØ All FX pairs & timeframes processed with maximum historical data!")

In [None]:
# ======================================================
# FX CSV Combine + Incremental Indicators Pipeline v3.7
# ‚úÖ FIXED: Looks for CSVs in correct location
# ‚úÖ FIXED: No nested paths for GitHub Actions
# ‚úÖ Works in: Colab + GitHub Actions + Local
# ‚úÖ Thread-safe, timezone-safe, Git-push-safe
# ======================================================

import os, time, hashlib, subprocess, shutil
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import ta
from ta.momentum import WilliamsRIndicator
from ta.volatility import AverageTrueRange

print("=" * 70)
print("üîß CSV Combiner & Indicator Generator v3.7 - FIXED FILE DISCOVERY")
print("=" * 70)

# ======================================================
# 0Ô∏è‚É£ FIXED: Environment Detection
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Detected Environment: {ENV_NAME}")

# ======================================================
# 1Ô∏è‚É£ FIXED: Path Setup (NO NESTED DIRECTORIES)
# ======================================================
if IN_COLAB:
    # Colab: Use /content
    ROOT_DIR = Path("/content/forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"
elif IN_GHA:
    # ‚úÖ GitHub Actions: Use current directory (NO NESTING)
    ROOT_DIR = Path.cwd()
    REPO_FOLDER = ROOT_DIR  # No nested folder
    print(f"üìÇ GitHub Actions: Using repo root: {ROOT_DIR}")
else:
    # Local: Use subdirectory
    ROOT_DIR = Path("./forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"

# Setup subdirectories
CSV_FOLDER = ROOT_DIR / "csvs"
PICKLE_FOLDER = ROOT_DIR / "pickles"
LOGS_FOLDER = ROOT_DIR / "logs"

for folder in [CSV_FOLDER, PICKLE_FOLDER, LOGS_FOLDER, REPO_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Root directory: {ROOT_DIR}")
print(f"‚úÖ Repo folder: {REPO_FOLDER}")
print(f"‚úÖ CSV folder: {CSV_FOLDER}")
print(f"‚úÖ Pickle folder: {PICKLE_FOLDER}")
print(f"‚úÖ Logs folder: {LOGS_FOLDER}")

# Thread lock for file operations
lock = threading.Lock()

def print_status(msg, level="info"):
    """Print status messages with icons"""
    levels = {"info":"‚ÑπÔ∏è","success":"‚úÖ","warn":"‚ö†Ô∏è","error":"‚ùå","debug":"üêû"}
    print(f"{levels.get(level, '‚ÑπÔ∏è')} {msg}")

# ======================================================
# 2Ô∏è‚É£ Git Configuration
# ======================================================
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = os.environ.get("GITHUB_USERNAME", "rahim-dotAI")
GITHUB_REPO = os.environ.get("GITHUB_REPO", "forex-ai-models")
FOREX_PAT = os.environ.get("FOREX_PAT", "").strip()
BRANCH = "main"

print(f"‚úÖ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

if FOREX_PAT and not IN_GHA:
    subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
    subprocess.run(["git", "config", "--global", "credential.helper", "store"], check=False)

    cred_file = Path.home() / ".git-credentials"
    cred_file.write_text(f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com\n")

# ======================================================
# 3Ô∏è‚É£ Repository Management (COMPLETE)
# ======================================================
def ensure_repo():
    """Ensure repository exists with environment-aware handling"""
    if IN_GHA:
        # GitHub Actions: Repo already checked out
        print_status("ü§ñ GitHub Actions: Repository already available", "info")
        return

    # For Colab and Local: Clone or update
    REPO_URL = f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

    if not (REPO_FOLDER / ".git").exists():
        if REPO_FOLDER.exists():
            shutil.rmtree(REPO_FOLDER)

        print_status(f"Cloning repo into {REPO_FOLDER}...", "info")
        try:
            subprocess.run(
                ["git", "clone", "-b", BRANCH, REPO_URL, str(REPO_FOLDER)],
                check=True,
                timeout=60
            )
            print_status("‚úÖ Repository cloned successfully", "success")
        except subprocess.TimeoutExpired:
            print_status("‚ùå Clone timed out after 60 seconds", "error")
            raise
        except Exception as e:
            print_status(f"‚ùå Clone failed: {e}", "error")
            raise
    else:
        print_status("Repo exists, pulling latest...", "info")
        try:
            subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "fetch", "origin"],
                check=False,
                timeout=30
            )
            subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "checkout", BRANCH],
                check=False
            )
            subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "pull", "origin", BRANCH],
                check=False,
                timeout=30
            )
            print_status("‚úÖ Repo synced successfully", "success")
        except subprocess.TimeoutExpired:
            print_status("‚ö†Ô∏è Update timed out - continuing", "warn")
        except Exception as e:
            print_status(f"‚ö†Ô∏è Update failed: {e} - continuing", "warn")

# Execute repository setup
ensure_repo()

# ======================================================
# 4Ô∏è‚É£ Helper Functions
# ======================================================
def ensure_tz_naive(df):
    """Remove timezone information from DataFrame index"""
    if df is None or df.empty:
        return pd.DataFrame()

    df.index = pd.to_datetime(df.index, errors='coerce')

    if df.index.tz is not None:
        df.index = df.index.tz_localize(None)

    return df

def safe_numeric(df):
    """Handle infinity/NaN robustly"""
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    required_columns = ['open', 'high', 'low', 'close']
    existing_columns = [col for col in required_columns if col in df.columns]

    if existing_columns:
        df.dropna(subset=existing_columns, inplace=True)
    else:
        df.dropna(how='all', inplace=True)

    return df

# ======================================================
# 5Ô∏è‚É£ CSV Combine
# ======================================================
def combine_csv(csv_path):
    """Combine CSV with existing data in REPO_FOLDER"""
    target_file = REPO_FOLDER / csv_path.name

    # Load existing data
    if target_file.exists():
        try:
            existing_df = pd.read_csv(target_file, index_col=0, parse_dates=True)
            existing_df = ensure_tz_naive(existing_df)
            print_status(f"  üìÇ Loaded {len(existing_df)} existing rows", "debug")
        except Exception as e:
            print_status(f"  ‚ö†Ô∏è Could not load existing: {e}", "warn")
            existing_df = pd.DataFrame()
    else:
        existing_df = pd.DataFrame()

    # Load new data
    try:
        new_df = pd.read_csv(csv_path, index_col=0, parse_dates=True)
        new_df = ensure_tz_naive(new_df)
    except Exception as e:
        print_status(f"  ‚ùå Could not load new data: {e}", "error")
        return existing_df, target_file

    # Combine
    combined_df = pd.concat([existing_df, new_df])
    combined_df = combined_df[~combined_df.index.duplicated(keep="last")]
    combined_df.sort_index(inplace=True)

    return combined_df, target_file

# ======================================================
# 6Ô∏è‚É£ Incremental Indicators
# ======================================================
def add_indicators_incremental(existing_df, combined_df):
    """Add indicators only to NEW rows"""
    if not existing_df.empty:
        new_rows = combined_df.loc[~combined_df.index.isin(existing_df.index)]
    else:
        new_rows = combined_df.copy()

    if new_rows.empty:
        return None

    # Validate OHLC columns
    required_cols = ['open', 'high', 'low', 'close']
    if not all(col in new_rows.columns for col in required_cols):
        print_status(f"‚ö†Ô∏è Missing OHLC columns", "warn")
        return None

    new_rows = safe_numeric(new_rows)

    if new_rows.empty:
        return None

    new_rows.sort_index(inplace=True)

    # Calculate indicators
    try:
        # Moving Averages
        if len(new_rows) >= 10:
            new_rows['SMA_10'] = ta.trend.sma_indicator(new_rows['close'], 10)
            new_rows['EMA_10'] = ta.trend.ema_indicator(new_rows['close'], 10)

        if len(new_rows) >= 50:
            new_rows['SMA_50'] = ta.trend.sma_indicator(new_rows['close'], 50)
            new_rows['EMA_50'] = ta.trend.ema_indicator(new_rows['close'], 50)

        # RSI
        if len(new_rows) >= 14:
            new_rows['RSI_14'] = ta.momentum.rsi(new_rows['close'], 14)
            new_rows['Williams_%R'] = WilliamsRIndicator(
                new_rows['high'], new_rows['low'], new_rows['close'], 14
            ).williams_r()

        # ATR
        if len(new_rows) >= 14:
            new_rows['ATR'] = AverageTrueRange(
                new_rows['high'], new_rows['low'], new_rows['close'], 14
            ).average_true_range()

        # MACD
        if len(new_rows) >= 26:
            new_rows['MACD'] = ta.trend.macd(new_rows['close'])

    except Exception as e:
        print_status(f"‚ö†Ô∏è Indicator error: {e}", "warn")

    # Scale (protect OHLC)
    numeric_cols = new_rows.select_dtypes(include=[np.number]).columns
    protected_cols = ['open', 'high', 'low', 'close', 'volume']
    scalable_cols = [c for c in numeric_cols if c not in protected_cols]

    if scalable_cols and not new_rows[scalable_cols].dropna(how='all').empty:
        new_rows[scalable_cols] = new_rows[scalable_cols].replace([np.inf, -np.inf], np.nan)
        new_rows[scalable_cols] = new_rows[scalable_cols].ffill().bfill().fillna(0)

        scaler = MinMaxScaler()
        try:
            new_rows[scalable_cols] = scaler.fit_transform(new_rows[scalable_cols])
        except Exception as e:
            print_status(f"‚ö†Ô∏è Scaling warning: {e}", "warn")

    return new_rows

# ======================================================
# 7Ô∏è‚É£ Worker Function
# ======================================================
def process_csv_file(csv_file):
    """Process a single CSV file"""
    try:
        # Combine CSV
        combined_df, target_file = combine_csv(csv_file)

        # Validate
        required_cols = ['open', 'high', 'low', 'close']
        if not all(col in combined_df.columns for col in required_cols):
            msg = f"‚ö†Ô∏è Skipped {csv_file.name}: Missing OHLC"
            print_status(msg, "warn")
            return None, msg

        # Check for existing indicators
        existing_pickle = PICKLE_FOLDER / f"{csv_file.stem}_indicators.pkl"

        if existing_pickle.exists():
            try:
                existing_df = pd.read_pickle(existing_pickle)
            except:
                existing_df = pd.DataFrame()
        else:
            existing_df = pd.DataFrame()

        # Add indicators for new rows
        new_indicators = add_indicators_incremental(existing_df, combined_df)

        if new_indicators is not None:
            # Combine
            updated_df = pd.concat([existing_df, new_indicators]).sort_index()

            # Save (thread-safe)
            with lock:
                updated_df.to_pickle(existing_pickle, protocol=4)
                combined_df.to_csv(target_file)

            msg = f"‚úÖ {csv_file.name} updated: {len(new_indicators)} new rows"
            print_status(msg, "success")
            return str(existing_pickle), msg
        else:
            msg = f"‚ÑπÔ∏è {csv_file.name} no new rows"
            print_status(msg, "info")
            return None, msg

    except Exception as e:
        msg = f"‚ùå Failed {csv_file.name}: {e}"
        print_status(msg, "error")
        return None, msg

# ======================================================
# 8Ô∏è‚É£ FIXED: Enhanced CSV Discovery
# ======================================================
print("\n" + "=" * 70)
print("üöÄ Processing CSV files...")
print("=" * 70 + "\n")

# ‚úÖ FIXED: Search in multiple locations and patterns
csv_files = []

# Search patterns for different CSV naming conventions
search_patterns = [
    CSV_FOLDER / "*.csv",           # Standard location
    ROOT_DIR / "*.csv",             # Root directory
    REPO_FOLDER / "*.csv",          # Repo folder
]

print_status(f"üîç Searching for CSV files in multiple locations...", "info")

for pattern in search_patterns:
    found = list(pattern.parent.glob(pattern.name))
    if found:
        print_status(f"  üìÇ Found {len(found)} CSV(s) in: {pattern.parent}", "debug")
        csv_files.extend(found)

# Remove duplicates (keep unique paths)
csv_files = list(set(csv_files))

# ‚úÖ DIAGNOSTIC: Show what we found
if csv_files:
    print_status(f"üìä Total unique CSV files found: {len(csv_files)}", "success")
    for csv_file in csv_files[:5]:  # Show first 5
        print_status(f"  ‚Ä¢ {csv_file.name} ({csv_file.stat().st_size / 1024:.1f} KB)", "debug")
    if len(csv_files) > 5:
        print_status(f"  ... and {len(csv_files) - 5} more", "debug")
else:
    print_status("‚ö™ No CSV files found in any location", "warn")
    print_status("üîç Searched in:", "info")
    print_status(f"  ‚Ä¢ {CSV_FOLDER}", "debug")
    print_status(f"  ‚Ä¢ {ROOT_DIR}", "debug")
    print_status(f"  ‚Ä¢ {REPO_FOLDER}", "debug")

    # ‚úÖ List what's actually in CSV_FOLDER
    if CSV_FOLDER.exists():
        all_files = list(CSV_FOLDER.glob("*"))
        if all_files:
            print_status(f"üìÇ Files in CSV folder ({len(all_files)}):", "debug")
            for f in all_files[:10]:
                print_status(f"  ‚Ä¢ {f.name}", "debug")
        else:
            print_status("üìÇ CSV folder is empty", "debug")

changed_files = []

# ======================================================
# 9Ô∏è‚É£ Process Files
# ======================================================
if csv_files:
    print("\n" + "=" * 70)
    print(f"‚öôÔ∏è Processing {len(csv_files)} CSV file(s)...")
    print("=" * 70 + "\n")

    with ThreadPoolExecutor(max_workers=min(8, len(csv_files))) as executor:
        futures = [executor.submit(process_csv_file, f) for f in csv_files]

        for future in as_completed(futures):
            file, msg = future.result()
            if file:
                changed_files.append(file)

# ======================================================
# üîü Git Push (Skip in GitHub Actions)
# ======================================================
if IN_GHA:
    print("\n" + "=" * 70)
    print("ü§ñ GitHub Actions: Skipping git operations")
    print("   (Workflow will handle commit and push)")
    print("=" * 70)

elif changed_files and FOREX_PAT:
    print("\n" + "=" * 70)
    print("üöÄ Committing changes to GitHub...")
    print("=" * 70)

    try:
        # Stage files
        subprocess.run(
            ["git", "-C", str(REPO_FOLDER), "add"] + changed_files,
            check=False
        )

        # Commit
        subprocess.run(
            ["git", "-C", str(REPO_FOLDER), "commit", "-m", "üìà Auto-update CSVs & indicators"],
            check=False
        )

        # Push with retry
        for attempt in range(3):
            print_status(f"üì§ Pushing (attempt {attempt + 1}/3)...", "info")

            result = subprocess.run(
                ["git", "-C", str(REPO_FOLDER), "push", "origin", BRANCH],
                capture_output=True,
                timeout=30
            )

            if result.returncode == 0:
                print_status("‚úÖ Push successful", "success")
                break
            else:
                if attempt < 2:
                    subprocess.run(
                        ["git", "-C", str(REPO_FOLDER), "pull", "--rebase", "origin", BRANCH],
                        check=False
                    )
                    time.sleep(5)
                else:
                    print_status(f"‚ùå Push failed", "error")

    except Exception as e:
        print_status(f"‚ùå Git error: {e}", "error")

# ======================================================
# ‚úÖ Completion
# ======================================================
print("\n" + "=" * 70)
print("‚úÖ CSV COMBINER WORKFLOW COMPLETED")
print("=" * 70)
print(f"Environment: {ENV_NAME}")
print(f"CSV files found: {len(csv_files)}")
print(f"CSV files processed: {len(csv_files)}")
print(f"Pickle files updated: {len(changed_files)}")
print("=" * 70)

if csv_files:
    print("\nüéØ All CSVs combined with incremental indicators!")
else:
    print("\n‚ö†Ô∏è No CSV files found - check data source cells!")

In [None]:
#!/usr/bin/env python3
"""
VERSION 3.7 ‚Äì ULTRA-PERSISTENT SELF-LEARNING HYBRID FX PIPELINE (ENHANCED)
===========================================================================
‚úÖ FIXED: No nested paths for GitHub Actions
‚úÖ Uses memory_v85.db in repo folder (no nesting)
‚úÖ Full production features with comprehensive analytics
"""

import os, time, json, re, shutil, subprocess, pickle, filecmp, sqlite3
from pathlib import Path
from datetime import datetime, timezone, timedelta
import pandas as pd
import numpy as np
import requests
import ta
import logging
from logging.handlers import RotatingFileHandler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.exceptions import NotFittedError
from collections import defaultdict
from contextlib import contextmanager
import threading

# ======================================================
# 0Ô∏è‚É£ FIXED: Environment Detection & Path Setup
# ======================================================

# Detect environment
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Detected Environment: {ENV_NAME}")

# ‚úÖ FIXED: Set paths based on environment (NO NESTING IN GHA)
if IN_COLAB:
    ROOT_DIR = Path("/content/forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"
elif IN_GHA:
    # ‚úÖ GitHub Actions: Use current directory directly
    ROOT_DIR = Path.cwd()
    REPO_FOLDER = ROOT_DIR  # No nested folder!
    print(f"üìÇ GitHub Actions: Using repo root: {ROOT_DIR}")
else:
    ROOT_DIR = Path("./forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"

# Setup subdirectories
CSV_FOLDER = ROOT_DIR / "csvs"
PICKLE_FOLDER = ROOT_DIR / "pickles"
LOGS_FOLDER = ROOT_DIR / "logs"
BACKUP_FOLDER = ROOT_DIR / "backups"

for folder in [CSV_FOLDER, PICKLE_FOLDER, LOGS_FOLDER, BACKUP_FOLDER, REPO_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Root Directory: {ROOT_DIR}")
print(f"‚úÖ Repo Folder: {REPO_FOLDER}")
print(f"‚úÖ CSV Folder: {CSV_FOLDER}")
print(f"‚úÖ Pickle Folder: {PICKLE_FOLDER}")
print(f"‚úÖ Logs Folder: {LOGS_FOLDER}")
print(f"‚úÖ Backup Folder: {BACKUP_FOLDER}")

# Enhanced logging setup with rotation
log_formatter = logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")

# Main log file with rotation (10MB max, 5 backups)
main_handler = RotatingFileHandler(
    LOGS_FOLDER / "pipeline.log",
    maxBytes=10*1024*1024,
    backupCount=5
)
main_handler.setFormatter(log_formatter)

# Error log file
error_handler = RotatingFileHandler(
    LOGS_FOLDER / "errors.log",
    maxBytes=5*1024*1024,
    backupCount=3
)
error_handler.setLevel(logging.ERROR)
error_handler.setFormatter(log_formatter)

# Configure root logger
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.addHandler(main_handler)
logger.addHandler(error_handler)

def print_status(msg, level="info"):
    """Enhanced status printing with better formatting"""
    icons = {
        "info": "‚ÑπÔ∏è",
        "success": "‚úÖ",
        "warn": "‚ö†Ô∏è",
        "debug": "üêû",
        "error": "‚ùå",
        "performance": "‚ö°"
    }
    log_level = level if level != "warn" else "warning"
    log_level = log_level if log_level != "performance" else "info"
    getattr(logging, log_level, logging.info)(msg)
    print(f"{icons.get(level, '‚ÑπÔ∏è')} {msg}")

print_status(f"Environment: {ENV_NAME}", "success")
print_status(f"Working Directory: {os.getcwd()}", "info")

# ======================================================
# üÜï ENHANCED DATABASE - v3.7 (memory_v85.db)
# ======================================================
PERSISTENT_DB = REPO_FOLDER / "memory_v85.db"  # ‚úÖ In repo folder (no nesting)

class EnhancedTradeMemoryDatabase:
    """
    ENHANCED VERSION v3.7 - Production-ready database

    ‚úÖ Fixed: No nested paths for GitHub Actions
    ‚úÖ Uses memory_v85.db for backward compatibility
    ‚úÖ Full production features with comprehensive analytics
    """

    def __init__(self, db_path=PERSISTENT_DB, max_retries=3):
        self.db_path = db_path
        self.db_path.parent.mkdir(parents=True, exist_ok=True)
        self.conn = None
        self.lock = threading.RLock()
        self.min_age_hours = 1
        self.max_retries = max_retries
        self.performance_metrics = defaultdict(list)

        print_status(f"üìÅ Database path: {self.db_path}", "info")
        self.initialize_database()

    @contextmanager
    def get_cursor(self):
        """Context manager for database cursor with auto-commit"""
        cursor = self.conn.cursor()
        try:
            yield cursor
            self.conn.commit()
        except Exception as e:
            self.conn.rollback()
            raise e
        finally:
            cursor.close()

    def _execute_with_retry(self, operation, *args, **kwargs):
        """Execute database operation with retry logic"""
        for attempt in range(self.max_retries):
            try:
                return operation(*args, **kwargs)
            except sqlite3.OperationalError as e:
                if attempt < self.max_retries - 1:
                    wait_time = (2 ** attempt) * 0.1
                    print_status(
                        f"‚ö†Ô∏è Database busy, retrying in {wait_time:.1f}s... "
                        f"(attempt {attempt + 1}/{self.max_retries})",
                        "warn"
                    )
                    time.sleep(wait_time)
                else:
                    raise e

    def initialize_database(self):
        """Create database with optimized settings"""
        try:
            # Check if database exists
            db_exists = self.db_path.exists()

            self.conn = sqlite3.connect(
                str(self.db_path),
                timeout=30,
                check_same_thread=False
            )

            # Optimized PRAGMA settings
            pragmas = [
                "PRAGMA journal_mode=WAL",
                "PRAGMA synchronous=NORMAL",
                "PRAGMA cache_size=-64000",
                "PRAGMA temp_store=MEMORY",
                "PRAGMA mmap_size=30000000000",
                "PRAGMA page_size=4096",
                "PRAGMA auto_vacuum=INCREMENTAL"
            ]

            for pragma in pragmas:
                self.conn.execute(pragma)

            with self.get_cursor() as cursor:
                # ===== TABLE 1: Pending trades =====
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS pending_trades (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        created_at TEXT NOT NULL,
                        iteration INTEGER NOT NULL,
                        pair TEXT NOT NULL,
                        timeframe TEXT NOT NULL,
                        sgd_prediction INTEGER,
                        rf_prediction INTEGER,
                        ensemble_prediction INTEGER,
                        entry_price REAL NOT NULL,
                        sl_price REAL NOT NULL,
                        tp_price REAL NOT NULL,
                        confidence REAL,
                        evaluated BOOLEAN DEFAULT 0,
                        retry_count INTEGER DEFAULT 0,
                        last_error TEXT
                    )
                ''')

                # Create indexes
                indexes = [
                    "CREATE INDEX IF NOT EXISTS idx_pending_eval ON pending_trades(evaluated, created_at)",
                    "CREATE INDEX IF NOT EXISTS idx_pending_pair ON pending_trades(pair, evaluated)",
                    "CREATE INDEX IF NOT EXISTS idx_pending_iteration ON pending_trades(iteration, evaluated)"
                ]

                for index_sql in indexes:
                    cursor.execute(index_sql)

                # ===== TABLE 2: Completed trades =====
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS completed_trades (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        pending_trade_id INTEGER,
                        created_at TEXT NOT NULL,
                        evaluated_at TEXT NOT NULL,
                        iteration_created INTEGER,
                        iteration_evaluated INTEGER,
                        pair TEXT NOT NULL,
                        timeframe TEXT NOT NULL,
                        model_used TEXT NOT NULL,
                        entry_price REAL NOT NULL,
                        exit_price REAL NOT NULL,
                        sl_price REAL NOT NULL,
                        tp_price REAL NOT NULL,
                        prediction INTEGER,
                        hit_tp BOOLEAN NOT NULL,
                        pnl REAL NOT NULL,
                        pnl_percent REAL,
                        duration_hours REAL,
                        price_movement REAL,
                        FOREIGN KEY (pending_trade_id) REFERENCES pending_trades(id)
                    )
                ''')

                # Create indexes
                indexes = [
                    "CREATE INDEX IF NOT EXISTS idx_completed_model ON completed_trades(model_used, evaluated_at)",
                    "CREATE INDEX IF NOT EXISTS idx_completed_pair ON completed_trades(pair, model_used, evaluated_at)",
                    "CREATE INDEX IF NOT EXISTS idx_completed_timestamp ON completed_trades(evaluated_at)",
                    "CREATE INDEX IF NOT EXISTS idx_completed_pnl ON completed_trades(model_used, pnl)"
                ]

                for index_sql in indexes:
                    cursor.execute(index_sql)

                # ===== TABLE 3: Model performance cache =====
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS model_stats_cache (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        updated_at TEXT NOT NULL,
                        pair TEXT NOT NULL,
                        model_name TEXT NOT NULL,
                        days INTEGER NOT NULL,
                        total_trades INTEGER DEFAULT 0,
                        winning_trades INTEGER DEFAULT 0,
                        losing_trades INTEGER DEFAULT 0,
                        accuracy_pct REAL DEFAULT 0.0,
                        total_pnl REAL DEFAULT 0.0,
                        avg_pnl REAL DEFAULT 0.0,
                        max_pnl REAL DEFAULT 0.0,
                        min_pnl REAL DEFAULT 0.0,
                        sharpe_ratio REAL DEFAULT 0.0,
                        max_drawdown REAL DEFAULT 0.0,
                        avg_duration_hours REAL DEFAULT 0.0,
                        UNIQUE(pair, model_name, days) ON CONFLICT REPLACE
                    )
                ''')

                cursor.execute('''
                    CREATE INDEX IF NOT EXISTS idx_stats_lookup
                    ON model_stats_cache(pair, model_name, days)
                ''')

                # ===== TABLE 4: Pipeline execution log =====
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS execution_log (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        timestamp TEXT NOT NULL,
                        iteration INTEGER NOT NULL,
                        status TEXT NOT NULL,
                        trades_stored INTEGER DEFAULT 0,
                        trades_evaluated INTEGER DEFAULT 0,
                        duration_seconds REAL,
                        memory_usage_mb REAL,
                        error_message TEXT
                    )
                ''')

                # ===== TABLE 5: Performance metrics =====
                cursor.execute('''
                    CREATE TABLE IF NOT EXISTS performance_metrics (
                        id INTEGER PRIMARY KEY AUTOINCREMENT,
                        timestamp TEXT NOT NULL,
                        operation TEXT NOT NULL,
                        duration_ms REAL NOT NULL,
                        rows_affected INTEGER DEFAULT 0,
                        success BOOLEAN DEFAULT 1
                    )
                ''')

            if db_exists:
                print_status(f"‚úÖ Connected to existing database: {self.db_path.name}", "success")
            else:
                print_status(f"‚úÖ Created new database: {self.db_path.name}", "success")

            print_status("‚úÖ Enhanced Database v3.7 initialized", "success")
            self._verify_database_integrity()
            self._optimize_database()

        except sqlite3.Error as e:
            print_status(f"‚ùå Database initialization failed: {e}", "error")
            raise

    def _verify_database_integrity(self):
        """Verify database structure and run integrity check"""
        try:
            with self.get_cursor() as cursor:
                # Check integrity
                cursor.execute("PRAGMA integrity_check")
                result = cursor.fetchone()
                if result[0] != 'ok':
                    print_status(f"‚ö†Ô∏è Database integrity issue: {result[0]}", "warn")

                # Check tables
                cursor.execute("""
                    SELECT name FROM sqlite_master
                    WHERE type='table'
                """)
                tables = [row[0] for row in cursor.fetchall()]

                expected_tables = [
                    'pending_trades', 'completed_trades',
                    'model_stats_cache', 'execution_log',
                    'performance_metrics'
                ]

                for table in expected_tables:
                    if table in tables:
                        cursor.execute(f"SELECT COUNT(*) FROM {table}")
                        count = cursor.fetchone()[0]
                        print_status(f"  ‚úì Table '{table}' exists ({count} rows)", "debug")
                    else:
                        print_status(f"  ‚úó Table '{table}' missing!", "error")

        except Exception as e:
            print_status(f"‚ö†Ô∏è Database verification warning: {e}", "warn")

    def _optimize_database(self):
        """Optimize database performance"""
        try:
            with self.get_cursor() as cursor:
                # Analyze tables for query optimization
                cursor.execute("ANALYZE")

                # Check if vacuum is needed
                cursor.execute("PRAGMA page_count")
                page_count = cursor.fetchone()[0]

                cursor.execute("PRAGMA freelist_count")
                freelist_count = cursor.fetchone()[0]

                # Vacuum if more than 10% free pages
                if page_count > 0 and (freelist_count / page_count) > 0.1:
                    print_status("üîß Running database vacuum...", "info")
                    cursor.execute("PRAGMA incremental_vacuum")
                    print_status("‚úÖ Database optimized", "success")

        except Exception as e:
            print_status(f"‚ö†Ô∏è Database optimization warning: {e}", "warn")

    def _track_performance(self, operation, duration_ms, rows_affected=0, success=True):
        """Track operation performance metrics"""
        try:
            with self.get_cursor() as cursor:
                cursor.execute('''
                    INSERT INTO performance_metrics
                    (timestamp, operation, duration_ms, rows_affected, success)
                    VALUES (?, ?, ?, ?, ?)
                ''', (
                    datetime.now(timezone.utc).isoformat(),
                    operation,
                    duration_ms,
                    rows_affected,
                    success
                ))
        except Exception as e:
            print_status(f"‚ö†Ô∏è Metrics tracking failed: {e}", "debug")

    def store_new_signals(self, aggregated_signals, current_iteration):
        """Store signals with batch insert for better performance"""
        if not aggregated_signals:
            print_status("‚ö†Ô∏è No signals to store", "warn")
            return 0

        start_time = time.time()
        stored_count = 0
        failed_count = 0

        # Prepare batch data
        batch_data = []

        for pair, pair_data in aggregated_signals.items():
            signals = pair_data.get('signals', {})

            for tf_name, signal_data in signals.items():
                if not signal_data:
                    continue

                # Validate required fields
                required_fields = ['live', 'SL', 'TP']
                if not all(signal_data.get(f, 0) > 0 for f in required_fields):
                    failed_count += 1
                    continue

                batch_data.append((
                    datetime.now(timezone.utc).isoformat(),
                    current_iteration,
                    pair,
                    tf_name,
                    signal_data.get('sgd_pred'),
                    signal_data.get('rf_pred'),
                    signal_data.get('signal'),
                    signal_data.get('live', 0),
                    signal_data.get('SL', 0),
                    signal_data.get('TP', 0),
                    signal_data.get('confidence', 0.5)
                ))

        if not batch_data:
            print_status("‚ö†Ô∏è No valid signals to store", "warn")
            return 0

        try:
            with self.lock, self.get_cursor() as cursor:
                # Batch insert
                cursor.executemany('''
                    INSERT INTO pending_trades
                    (created_at, iteration, pair, timeframe,
                     sgd_prediction, rf_prediction, ensemble_prediction,
                     entry_price, sl_price, tp_price, confidence)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', batch_data)

                stored_count = len(batch_data)

                # Log execution
                cursor.execute('''
                    INSERT INTO execution_log
                    (timestamp, iteration, status, trades_stored, duration_seconds)
                    VALUES (?, ?, 'signals_stored', ?, ?)
                ''', (
                    datetime.now(timezone.utc).isoformat(),
                    current_iteration,
                    stored_count,
                    time.time() - start_time
                ))

            duration_ms = (time.time() - start_time) * 1000
            self._track_performance('store_signals', duration_ms, stored_count, True)

            print_status(
                f"üíæ Stored {stored_count} trades in {duration_ms:.0f}ms "
                f"({failed_count} failed)",
                "success"
            )
            return stored_count

        except sqlite3.Error as e:
            print_status(f"‚ùå Batch insert failed: {e}", "error")
            self._track_performance('store_signals', 0, 0, False)
            return 0

    def evaluate_pending_trades(self, current_prices, current_iteration):
        """Enhanced trade evaluation with better performance"""
        if not current_prices:
            print_status("‚ö†Ô∏è No current prices provided", "warn")
            return {}

        start_time = time.time()
        min_age = (datetime.now(timezone.utc) - timedelta(hours=self.min_age_hours)).isoformat()

        try:
            with self.lock, self.get_cursor() as cursor:
                cursor.execute('''
                    SELECT id, pair, timeframe, sgd_prediction, rf_prediction,
                           ensemble_prediction, entry_price, sl_price, tp_price,
                           created_at, iteration
                    FROM pending_trades
                    WHERE evaluated = 0 AND created_at < ?
                    ORDER BY created_at ASC
                    LIMIT 1000
                ''', (min_age,))

                pending_trades = cursor.fetchall()

        except sqlite3.Error as e:
            print_status(f"‚ùå Failed to fetch pending trades: {e}", "error")
            return {}

        if not pending_trades:
            print_status(
                f"‚ÑπÔ∏è No trades old enough to evaluate (need {self.min_age_hours}+ hours)",
                "info"
            )
            return {}

        print_status(
            f"üîç Evaluating {len(pending_trades)} trades from previous iteration(s)",
            "info"
        )

        results_by_model = defaultdict(lambda: {
            'closed_trades': 0,
            'wins': 0,
            'losses': 0,
            'total_pnl': 0.0,
            'trades': []
        })

        evaluated_count = 0
        skipped_count = 0
        completed_trades_batch = []
        evaluated_ids = []

        for trade in pending_trades:
            (trade_id, pair, timeframe, sgd_pred, rf_pred, ensemble_pred,
             entry_price, sl_price, tp_price, created_at, created_iteration) = trade

            current_price = current_prices.get(pair, 0)

            if current_price <= 0:
                skipped_count += 1
                continue

            # Validate prices
            if not self._validate_trade_prices(entry_price, sl_price, tp_price, current_price):
                skipped_count += 1
                continue

            # Evaluate for each model
            for model_name, prediction in [
                ('SGD', sgd_pred),
                ('RandomForest', rf_pred),
                ('Ensemble', ensemble_pred)
            ]:
                if prediction is None:
                    continue

                # Check if TP or SL was hit
                hit_tp, hit_sl, exit_price = self._evaluate_trade_outcome(
                    prediction, current_price, tp_price, sl_price
                )

                # If trade closed, record result
                if exit_price:
                    pnl = self._calculate_pnl(prediction, entry_price, exit_price)
                    pnl_percent = (pnl / entry_price) * 100
                    duration_hours = self._calculate_duration_hours(created_at)
                    price_movement = abs(exit_price - entry_price) / entry_price * 100

                    completed_trades_batch.append((
                        trade_id, created_at, datetime.now(timezone.utc).isoformat(),
                        created_iteration, current_iteration,
                        pair, timeframe, model_name, entry_price, exit_price,
                        sl_price, tp_price, prediction, hit_tp, pnl, pnl_percent,
                        duration_hours, price_movement
                    ))

                    # Accumulate results
                    results_by_model[model_name]['closed_trades'] += 1
                    results_by_model[model_name]['total_pnl'] += pnl

                    if hit_tp:
                        results_by_model[model_name]['wins'] += 1
                    else:
                        results_by_model[model_name]['losses'] += 1

                    results_by_model[model_name]['trades'].append({
                        'pair': pair,
                        'timeframe': timeframe,
                        'pnl': pnl,
                        'hit_tp': hit_tp
                    })

                    status = "WIN ‚úÖ" if hit_tp else "LOSS ‚ùå"
                    print_status(
                        f"{status} {model_name}: {pair} {timeframe} "
                        f"P&L=${pnl:.5f} ({pnl_percent:+.2f}%) [{duration_hours:.1f}h]",
                        "success" if hit_tp else "warn"
                    )

            evaluated_ids.append(trade_id)
            evaluated_count += 1

        # Batch insert completed trades
        if completed_trades_batch:
            try:
                with self.lock, self.get_cursor() as cursor:
                    cursor.executemany('''
                        INSERT INTO completed_trades
                        (pending_trade_id, created_at, evaluated_at,
                         iteration_created, iteration_evaluated,
                         pair, timeframe, model_used, entry_price, exit_price,
                         sl_price, tp_price, prediction, hit_tp, pnl, pnl_percent,
                         duration_hours, price_movement)
                        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    ''', completed_trades_batch)

                    # Mark as evaluated
                    if evaluated_ids:
                        placeholders = ','.join('?' * len(evaluated_ids))
                        cursor.execute(f'''
                            UPDATE pending_trades
                            SET evaluated = 1
                            WHERE id IN ({placeholders})
                        ''', evaluated_ids)

                    # Log execution
                    cursor.execute('''
                        INSERT INTO execution_log
                        (timestamp, iteration, status, trades_evaluated, duration_seconds)
                        VALUES (?, ?, 'trades_evaluated', ?, ?)
                    ''', (
                        datetime.now(timezone.utc).isoformat(),
                        current_iteration,
                        evaluated_count,
                        time.time() - start_time
                    ))

                duration_ms = (time.time() - start_time) * 1000
                self._track_performance('evaluate_trades', duration_ms, evaluated_count, True)

                print_status(
                    f"‚úÖ Evaluated {evaluated_count} trades in {duration_ms:.0f}ms "
                    f"({skipped_count} skipped)",
                    "success"
                )

            except sqlite3.Error as e:
                print_status(f"‚ùå Evaluation batch insert failed: {e}", "error")
                return {}

        # Calculate accuracies
        for model_name, results in results_by_model.items():
            if results['closed_trades'] > 0:
                results['accuracy'] = (results['wins'] / results['closed_trades']) * 100
            else:
                results['accuracy'] = 0.0

        # Update model stats cache
        self._update_stats_cache()

        return dict(results_by_model)

    def _validate_trade_prices(self, entry, sl, tp, current):
        """Enhanced price validation"""
        try:
            if any(p <= 0 for p in [entry, sl, tp, current]):
                return False
            if any(not np.isfinite(p) for p in [entry, sl, tp, current]):
                return False
            prices = [entry, sl, tp, current]
            avg_price = sum(prices) / len(prices)
            for price in prices:
                if abs(price - avg_price) / avg_price > 0.5:
                    return False
            sl_distance = abs(sl - entry) / entry
            tp_distance = abs(tp - entry) / entry
            if sl_distance > 0.2 or tp_distance > 0.5:
                return False
            return True
        except:
            return False

    def _evaluate_trade_outcome(self, prediction, current_price, tp_price, sl_price):
        """Determine if trade hit TP or SL"""
        hit_tp = False
        hit_sl = False
        exit_price = None

        try:
            if prediction == 1:  # Long
                if current_price >= tp_price:
                    hit_tp = True
                    exit_price = tp_price
                elif current_price <= sl_price:
                    hit_sl = True
                    exit_price = sl_price
            elif prediction == 0:  # Short
                if current_price <= tp_price:
                    hit_tp = True
                    exit_price = tp_price
                elif current_price >= sl_price:
                    hit_sl = True
                    exit_price = sl_price
        except Exception as e:
            print_status(f"‚ö†Ô∏è Trade evaluation error: {e}", "warn")

        return hit_tp, hit_sl, exit_price

    def _calculate_pnl(self, prediction, entry_price, exit_price):
        """Calculate profit/loss"""
        try:
            if prediction == 1:  # Long
                return exit_price - entry_price
            else:  # Short
                return entry_price - exit_price
        except:
            return 0.0

    def _calculate_duration_hours(self, created_at):
        """Calculate trade duration in hours"""
        try:
            created_dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
            duration = (datetime.now(timezone.utc) - created_dt).total_seconds() / 3600
            return max(0, duration)
        except:
            return 0.0

    def _update_stats_cache(self):
        """Update cached model performance statistics"""
        try:
            with self.lock, self.get_cursor() as cursor:
                cursor.execute('SELECT DISTINCT pair FROM completed_trades')
                pairs = [row[0] for row in cursor.fetchall()]

                cursor.execute('SELECT DISTINCT model_used FROM completed_trades')
                models = [row[0] for row in cursor.fetchall()]

                for pair in pairs:
                    for model in models:
                        for days in [7, 30, 90]:
                            since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()

                            cursor.execute('''
                                SELECT
                                    COUNT(*) as total,
                                    SUM(CASE WHEN hit_tp THEN 1 ELSE 0 END) as wins,
                                    SUM(CASE WHEN NOT hit_tp THEN 1 ELSE 0 END) as losses,
                                    SUM(pnl) as total_pnl,
                                    AVG(pnl) as avg_pnl,
                                    MAX(pnl) as max_pnl,
                                    MIN(pnl) as min_pnl,
                                    AVG(duration_hours) as avg_duration
                                FROM completed_trades
                                WHERE pair = ? AND model_used = ? AND evaluated_at > ?
                            ''', (pair, model, since))

                            result = cursor.fetchone()
                            if not result or not result[0]:
                                continue

                            total, wins, losses, total_pnl, avg_pnl, max_pnl, min_pnl, avg_duration = result
                            accuracy = (wins / total * 100) if total > 0 else 0.0

                            cursor.execute('''
                                SELECT pnl FROM completed_trades
                                WHERE pair = ? AND model_used = ? AND evaluated_at > ?
                            ''', (pair, model, since))

                            pnls = [row[0] for row in cursor.fetchall()]
                            sharpe_ratio = 0.0
                            max_drawdown = 0.0

                            if len(pnls) > 1:
                                pnl_std = np.std(pnls)
                                if pnl_std > 0:
                                    sharpe_ratio = (avg_pnl or 0) / pnl_std
                                cumulative_pnl = np.cumsum(pnls)
                                running_max = np.maximum.accumulate(cumulative_pnl)
                                drawdown = running_max - cumulative_pnl
                                max_drawdown = np.max(drawdown) if len(drawdown) > 0 else 0.0

                            cursor.execute('''
                                INSERT OR REPLACE INTO model_stats_cache
                                (updated_at, pair, model_name, days, total_trades,
                                 winning_trades, losing_trades, accuracy_pct,
                                 total_pnl, avg_pnl, max_pnl, min_pnl,
                                 sharpe_ratio, max_drawdown, avg_duration_hours)
                                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                            ''', (
                                datetime.now(timezone.utc).isoformat(),
                                pair, model, days, total, wins or 0, losses or 0,
                                accuracy, total_pnl or 0.0, avg_pnl or 0.0,
                                max_pnl or 0.0, min_pnl or 0.0,
                                sharpe_ratio, max_drawdown, avg_duration or 0.0
                            ))

                print_status("‚úÖ Stats cache updated", "debug")

        except sqlite3.Error as e:
            print_status(f"‚ö†Ô∏è Stats cache update failed: {e}", "warn")

    def get_model_performance(self, pair, model_name, days=7):
        """Get comprehensive model performance metrics"""
        try:
            with self.get_cursor() as cursor:
                cursor.execute('''
                    SELECT total_trades, winning_trades, losing_trades,
                           accuracy_pct, total_pnl, avg_pnl, max_pnl, min_pnl,
                           sharpe_ratio, max_drawdown, avg_duration_hours, updated_at
                    FROM model_stats_cache
                    WHERE pair = ? AND model_name = ? AND days = ?
                ''', (pair, model_name, days))

                result = cursor.fetchone()

                if not result:
                    return {
                        'total_trades': 0,
                        'winning_trades': 0,
                        'losing_trades': 0,
                        'accuracy': 0.0,
                        'total_pnl': 0.0,
                        'avg_pnl': 0.0,
                        'max_pnl': 0.0,
                        'min_pnl': 0.0,
                        'sharpe_ratio': 0.0,
                        'max_drawdown': 0.0,
                        'avg_duration_hours': 0.0
                    }

                (total, wins, losses, accuracy, total_pnl, avg_pnl,
                 max_pnl, min_pnl, sharpe, drawdown, avg_duration, updated_at) = result

                return {
                    'total_trades': total,
                    'winning_trades': wins,
                    'losing_trades': losses,
                    'accuracy': accuracy,
                    'total_pnl': total_pnl,
                    'avg_pnl': avg_pnl,
                    'max_pnl': max_pnl,
                    'min_pnl': min_pnl,
                    'sharpe_ratio': sharpe,
                    'max_drawdown': drawdown,
                    'avg_duration_hours': avg_duration,
                    'updated_at': updated_at
                }

        except sqlite3.Error as e:
            print_status(f"‚ö†Ô∏è Failed to get model performance: {e}", "warn")
            return {
                'total_trades': 0,
                'winning_trades': 0,
                'losing_trades': 0,
                'accuracy': 0.0,
                'total_pnl': 0.0,
                'avg_pnl': 0.0,
                'max_pnl': 0.0,
                'min_pnl': 0.0,
                'sharpe_ratio': 0.0,
                'max_drawdown': 0.0,
                'avg_duration_hours': 0.0
            }

    def get_best_model(self, pair, days=7, min_trades=3):
        """Determine best model using multiple criteria"""
        try:
            with self.get_cursor() as cursor:
                cursor.execute('''
                    SELECT model_name, accuracy_pct, total_trades,
                           total_pnl, sharpe_ratio, avg_pnl
                    FROM model_stats_cache
                    WHERE pair = ? AND days = ? AND total_trades >= ?
                    ORDER BY
                        accuracy_pct DESC,
                        sharpe_ratio DESC,
                        total_pnl DESC
                    LIMIT 1
                ''', (pair, days, min_trades))

                result = cursor.fetchone()

                if result:
                    model_name, accuracy, trades, pnl, sharpe, avg_pnl = result
                    print_status(
                        f"üèÜ Best model for {pair}: {model_name} "
                        f"(Acc: {accuracy:.1f}%, Sharpe: {sharpe:.2f}, "
                        f"PnL: ${pnl:.5f})",
                        "performance"
                    )
                    return model_name

        except sqlite3.Error as e:
            print_status(f"‚ö†Ô∏è Failed to get best model: {e}", "warn")

        return 'Ensemble'

    def get_database_stats(self):
        """Get comprehensive database statistics"""
        stats = {}

        try:
            with self.get_cursor() as cursor:
                cursor.execute('SELECT COUNT(*) FROM pending_trades WHERE evaluated = 0')
                stats['pending_trades'] = cursor.fetchone()[0]

                cursor.execute('SELECT COUNT(*) FROM completed_trades')
                stats['completed_trades'] = cursor.fetchone()[0]

                cursor.execute('SELECT SUM(pnl) FROM completed_trades')
                result = cursor.fetchone()
                stats['total_pnl'] = result[0] if result[0] else 0.0

                cursor.execute('''
                    SELECT
                        COUNT(*) as total,
                        SUM(CASE WHEN hit_tp THEN 1 ELSE 0 END) as wins
                    FROM completed_trades
                ''')
                result = cursor.fetchone()
                if result and result[0] > 0:
                    stats['overall_accuracy'] = (result[1] / result[0]) * 100
                else:
                    stats['overall_accuracy'] = 0.0

                cursor.execute('SELECT AVG(duration_hours) FROM completed_trades')
                result = cursor.fetchone()
                stats['avg_duration_hours'] = result[0] if result[0] else 0.0

                cursor.execute('''
                    SELECT model_used, COUNT(*) as trades,
                           SUM(CASE WHEN hit_tp THEN 1 ELSE 0 END) as wins
                    FROM completed_trades
                    GROUP BY model_used
                    ORDER BY wins DESC
                    LIMIT 1
                ''')
                result = cursor.fetchone()
                if result:
                    stats['best_model'] = result[0]
                    stats['best_model_trades'] = result[1]
                    stats['best_model_wins'] = result[2]
                else:
                    stats['best_model'] = 'None'
                    stats['best_model_trades'] = 0
                    stats['best_model_wins'] = 0

                if self.db_path.exists():
                    stats['db_size_mb'] = self.db_path.stat().st_size / (1024 * 1024)
                else:
                    stats['db_size_mb'] = 0.0

                yesterday = (datetime.now(timezone.utc) - timedelta(days=1)).isoformat()
                cursor.execute('''
                    SELECT COUNT(*) FROM completed_trades
                    WHERE evaluated_at > ?
                ''', (yesterday,))
                stats['trades_last_24h'] = cursor.fetchone()[0]

        except Exception as e:
            print_status(f"‚ö†Ô∏è Failed to get database stats: {e}", "warn")

        return stats

    def cleanup_old_data(self, days_to_keep=90):
        """Clean up old data with backup"""
        cutoff_date = (datetime.now(timezone.utc) - timedelta(days=days_to_keep)).isoformat()

        try:
            self.create_backup()

            with self.lock, self.get_cursor() as cursor:
                cursor.execute('''
                    DELETE FROM pending_trades
                    WHERE evaluated = 1 AND created_at < ?
                ''', (cutoff_date,))
                deleted_pending = cursor.rowcount

                cursor.execute('''
                    DELETE FROM execution_log
                    WHERE timestamp < ?
                ''', (cutoff_date,))
                deleted_logs = cursor.rowcount

                cursor.execute('''
                    DELETE FROM performance_metrics
                    WHERE timestamp < ?
                ''', (cutoff_date,))
                deleted_metrics = cursor.rowcount

                print_status(
                    f"üßπ Cleanup complete: Removed {deleted_pending} pending trades, "
                    f"{deleted_logs} logs, {deleted_metrics} metrics",
                    "success"
                )

                self._optimize_database()

        except sqlite3.Error as e:
            print_status(f"‚ùå Cleanup failed: {e}", "error")

    def create_backup(self):
        """Create database backup"""
        try:
            if not self.db_path.exists():
                return

            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            backup_path = BACKUP_FOLDER / f"db_backup_{timestamp}.db"

            shutil.copy2(self.db_path, backup_path)

            backups = sorted(BACKUP_FOLDER.glob("db_backup_*.db"))
            while len(backups) > 5:
                oldest = backups.pop(0)
                oldest.unlink()
                print_status(f"üóëÔ∏è Removed old backup: {oldest.name}", "debug")

            print_status(f"üíæ Backup created: {backup_path.name}", "success")

        except Exception as e:
            print_status(f"‚ö†Ô∏è Backup failed: {e}", "warn")

    def get_performance_report(self, days=7):
        """Generate comprehensive performance report"""
        report = {
            'timestamp': datetime.now(timezone.utc).isoformat(),
            'period_days': days,
            'models': {},
            'pairs': {},
            'overall': {}
        }

        try:
            with self.get_cursor() as cursor:
                cursor.execute('''
                    SELECT pair, model_name, total_trades, accuracy_pct,
                           total_pnl, sharpe_ratio, max_drawdown
                    FROM model_stats_cache
                    WHERE days = ?
                    ORDER BY total_trades DESC
                ''', (days,))

                for row in cursor.fetchall():
                    pair, model, trades, acc, pnl, sharpe, drawdown = row

                    if model not in report['models']:
                        report['models'][model] = {
                            'total_trades': 0,
                            'total_pnl': 0.0,
                            'avg_accuracy': 0.0,
                            'pairs': []
                        }

                    report['models'][model]['total_trades'] += trades
                    report['models'][model]['total_pnl'] += pnl
                    report['models'][model]['pairs'].append({
                        'pair': pair,
                        'accuracy': acc,
                        'pnl': pnl
                    })

                    if pair not in report['pairs']:
                        report['pairs'][pair] = {
                            'total_trades': 0,
                            'best_model': None,
                            'best_accuracy': 0.0
                        }

                    report['pairs'][pair]['total_trades'] += trades
                    if acc > report['pairs'][pair]['best_accuracy']:
                        report['pairs'][pair]['best_model'] = model
                        report['pairs'][pair]['best_accuracy'] = acc

                for model, data in report['models'].items():
                    if len(data['pairs']) > 0:
                        data['avg_accuracy'] = sum(p['accuracy'] for p in data['pairs']) / len(data['pairs'])

                stats = self.get_database_stats()
                report['overall'] = stats

        except Exception as e:
            print_status(f"‚ö†Ô∏è Failed to generate report: {e}", "warn")

        return report

    def close(self):
        """Close database connection with cleanup"""
        try:
            if self.conn:
                self._optimize_database()
                self.conn.close()
                print_status("‚úÖ Database connection closed", "success")
        except Exception as e:
            print_status(f"‚ö†Ô∏è Error closing database: {e}", "warn")

    def __del__(self):
        """Destructor to ensure connection is closed"""
        self.close()


# ======================================================
# üéØ USAGE EXAMPLE & TEST
# ======================================================

if __name__ == "__main__":
    print_status("="*60, "info")
    print_status("ENHANCED FX PIPELINE v3.7 - Database Test", "success")
    print_status("="*60, "info")

    # Initialize database
    db = EnhancedTradeMemoryDatabase()

    # Get database stats
    stats = db.get_database_stats()
    print_status("\nüìä Current Database Statistics:", "info")
    for key, value in stats.items():
        print(f"  {key}: {value}")

    # Example: Store test signals
    test_signals = {
        'EUR/USD': {
            'signals': {
                'H1': {
                    'live': 1.0950,
                    'SL': 1.0920,
                    'TP': 1.1000,
                    'signal': 1,
                    'sgd_pred': 1,
                    'rf_pred': 1,
                    'confidence': 0.85
                }
            }
        }
    }

    stored = db.store_new_signals(test_signals, current_iteration=1)
    print_status(f"\n‚úÖ Stored {stored} test signals", "success")

    # Example: Evaluate trades
    current_prices = {
        'EUR/USD': 1.0980
    }

    results = db.evaluate_pending_trades(current_prices, current_iteration=2)
    if results:
        print_status("\nüìà Evaluation Results:", "info")
        for model, data in results.items():
            print(f"  {model}: {data['wins']}/{data['closed_trades']} wins "
                  f"({data['accuracy']:.1f}% accuracy)")

    # Get model performance
    print_status("\nüìä Model Performance (Last 7 days):", "info")
    for model in ['SGD', 'RandomForest', 'Ensemble']:
        perf = db.get_model_performance('EUR/USD', model, days=7)
        if perf['total_trades'] > 0:
            print(f"  {model}:")
            print(f"    Trades: {perf['total_trades']}")
            print(f"    Accuracy: {perf['accuracy']:.1f}%")
            print(f"    Total PnL: ${perf['total_pnl']:.5f}")
            print(f"    Sharpe Ratio: {perf['sharpe_ratio']:.2f}")

    # Get best model
    best = db.get_best_model('EUR/USD', days=7, min_trades=3)
    print_status(f"\nüèÜ Best Model: {best}", "performance")

    # Generate performance report
    report = db.get_performance_report(days=7)
    print_status("\nüìã Performance Report:", "info")
    print(f"  Total Models: {len(report['models'])}")
    print(f"  Total Pairs: {len(report['pairs'])}")

    for model_name, model_data in report['models'].items():
        print(f"\n  {model_name}:")
        print(f"    Total Trades: {model_data['total_trades']}")
        print(f"    Total PnL: ${model_data['total_pnl']:.5f}")
        print(f"    Avg Accuracy: {model_data['avg_accuracy']:.1f}%")

    # Cleanup and close
    print_status("\nüßπ Running cleanup...", "info")
    db.cleanup_old_data(days_to_keep=90)
    db.close()

    print_status("\n‚úÖ Enhanced Database v3.7 Test Complete!", "success")
    print_status(f"‚úÖ Database location: {PERSISTENT_DB}", "success")
    print_status(f"‚úÖ Fixed for GitHub Actions (no nested paths)", "success")

In [None]:
#!/usr/bin/env python3
"""
VERSION 3.7 ‚Äì Unified Loader + Merge Pickles (Production Ready)
‚úÖ UPDATED: Paths for GitHub Actions compatibility
‚úÖ UPDATED: Environment detection (Colab/GHA/Local)
‚úÖ KEPT: All original logic from Document 6
"""

from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import pandas as pd
import numpy as np
import json
import warnings
import ta
from ta.momentum import WilliamsRIndicator
from ta.volatility import AverageTrueRange
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime

# ======================================================
# 0Ô∏è‚É£ UPDATED: Environment Detection & Path Setup
# ======================================================
try:
    import google.colab
    IN_COLAB = True
    ENV_NAME = "Google Colab"
except ImportError:
    IN_COLAB = False
    ENV_NAME = "Local/GitHub Actions"

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_GHA:
    ENV_NAME = "GitHub Actions"

print(f"üåç Detected Environment: {ENV_NAME}")

# ‚úÖ UPDATED: Dynamic path setup based on environment
if IN_COLAB:
    ROOT_DIR = Path("/content/forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"
elif IN_GHA:
    # GitHub Actions: Use current directory
    ROOT_DIR = Path.cwd()
    REPO_FOLDER = ROOT_DIR
    print(f"üìÇ GitHub Actions: Using repo root: {ROOT_DIR}")
else:
    # Local: Use subdirectory
    ROOT_DIR = Path("./forex-alpha-models")
    ROOT_DIR.mkdir(parents=True, exist_ok=True)
    REPO_FOLDER = ROOT_DIR / "forex-ai-models"

# ‚úÖ UPDATED: Consistent folder structure
CSV_FOLDER = ROOT_DIR / "csvs"
PICKLE_FOLDER = ROOT_DIR / "pickles"  # ‚Üê CHANGED: Was "merged_data_pickles"
TEMP_PICKLE_FOLDER = ROOT_DIR / "temp_pickles"
LOGS_FOLDER = ROOT_DIR / "logs"

for folder in [CSV_FOLDER, PICKLE_FOLDER, TEMP_PICKLE_FOLDER, LOGS_FOLDER, REPO_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print(f"‚úÖ Root Directory: {ROOT_DIR}")
print(f"‚úÖ CSV Folder: {CSV_FOLDER}")
print(f"‚úÖ Pickle Folder: {PICKLE_FOLDER}")
print(f"‚úÖ Temp Folder: {TEMP_PICKLE_FOLDER}")
print(f"‚úÖ Repo Folder: {REPO_FOLDER}")

JSON_FILE = REPO_FOLDER / "latest_signals.json"

print(f"‚úÖ JSON File: {JSON_FILE}")

# ======================================================
# 1Ô∏è‚É£ Safe Indicator Generator (UNCHANGED from Document 6)
# ======================================================
def add_indicators(df: pd.DataFrame) -> pd.DataFrame:
    """Add technical indicators to DataFrame"""
    df = df.copy()
    for col in ["open", "high", "low", "close"]:
        if col not in df.columns:
            df[col] = 0.0

    df = df[(df[["open", "high", "low", "close"]] > 0).all(axis=1)]
    if df.empty:
        return df

    # --- Preserve raw OHLC prices for GA ---
    for col in ["open", "high", "low", "close"]:
        if col in df.columns:
            df[f"raw_{col}"] = df[col].copy()

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        warnings.simplefilter("ignore", category=UserWarning)

        try:
            if len(df['close']) >= 10:
                df['SMA_10'] = ta.trend.sma_indicator(df['close'], 10)
                df['EMA_10'] = ta.trend.ema_indicator(df['close'], 10)
            if len(df['close']) >= 50:
                df['SMA_50'] = ta.trend.sma_indicator(df['close'], 50)
                df['EMA_50'] = ta.trend.ema_indicator(df['close'], 50)
            if len(df['close']) >= 14:
                df['RSI_14'] = ta.momentum.rsi(df['close'], 14)
            if all(col in df.columns for col in ['high', 'low', 'close']) and len(df['close']) >= 14:
                df['Williams_%R'] = WilliamsRIndicator(df['high'], df['low'], df['close'], 14).williams_r()
        except Exception as e:
            print(f"‚ö†Ô∏è Indicator calculation failed: {e}")

        # --- Safe ATR ---
        try:
            if all(col in df.columns for col in ['high', 'low', 'close']):
                window = 14
                if len(df) >= window:
                    df['ATR'] = AverageTrueRange(
                        df['high'], df['low'], df['close'], window=window
                    ).average_true_range().fillna(1e-5).clip(lower=1e-4)
                else:
                    df['ATR'] = 1e-4
        except Exception as e:
            df['ATR'] = 1e-4
            print(f"‚ö†Ô∏è ATR calculation failed: {e}")

        # --- Scale only non-price numeric columns ---
        numeric_cols = [c for c in df.select_dtypes(include=[np.number]).columns if not df[c].isna().all()]
        protected_cols = [
            "open", "high", "low", "close",
            "raw_open", "raw_high", "raw_low", "raw_close"
        ]
        numeric_cols = [c for c in numeric_cols if c not in protected_cols]

        if numeric_cols:
            scaler = MinMaxScaler()
            df[numeric_cols] = scaler.fit_transform(df[numeric_cols].fillna(0) + 1e-8)

    return df

# ======================================================
# 2Ô∏è‚É£ Safe CSV Processing (UNCHANGED from Document 6)
# ======================================================
def process_csv_file(csv_file: Path, save_folder: Path):
    """Process a single CSV file and save as pickle"""
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=pd.errors.ParserWarning)
            df = pd.read_csv(csv_file, index_col=0, parse_dates=True)

        if df.empty:
            print(f"‚ö™ Skipped empty CSV: {csv_file.name}")
            return None

        df.columns = [c.strip().lower().replace(" ", "_") for c in df.columns]
        df = add_indicators(df)
        if df.empty:
            print(f"‚ö™ Skipped CSV after filtering invalid prices: {csv_file.name}")
            return None

        out_file = save_folder / f"{csv_file.stem}.pkl"
        df.to_pickle(out_file)
        print(f"‚úÖ Processed CSV {csv_file.name} ‚Üí {out_file.name}")
        return out_file

    except Exception as e:
        print(f"‚ùå Failed CSV {csv_file.name}: {e}")
        return None

# ======================================================
# 3Ô∏è‚É£ JSON Processing (UNCHANGED from Document 6)
# ======================================================
def process_json_file(json_file: Path, save_folder: Path):
    """Process JSON signals and save as pickles"""
    try:
        with open(json_file, "r") as f:
            data = json.load(f)
    except Exception as e:
        print(f"‚ùå Failed to load JSON: {e}")
        return []

    signals_data = data.get("pairs", {})
    timestamp = pd.to_datetime(data.get("timestamp"), utc=True)
    processed_files = []

    print(f"üìä Processing JSON signals for {len(signals_data)} pairs...")

    for pair, info in signals_data.items():
        signals = info.get("signals", {})
        dfs = []

        for tf_name, tf_info in signals.items():
            # Validate price data
            live = tf_info.get("live")
            sl = tf_info.get("SL")
            tp = tf_info.get("TP")

            if not all([live, sl, tp]) or any(v <= 0 for v in [live, sl, tp] if v is not None):
                print(f"‚ö†Ô∏è No valid price data after filtering")
                continue

            df = pd.DataFrame({
                "live": [live],
                "SL": [sl],
                "TP": [tp],
                "signal": [tf_info.get("signal")]
            }, index=[timestamp])
            df["timeframe"] = tf_name
            df = add_indicators(df)
            if not df.empty:
                dfs.append(df)

        if dfs:
            df_pair = pd.concat(dfs)
            out_file = save_folder / f"{pair.replace('/', '_')}.pkl"
            df_pair.to_pickle(out_file)
            print(f"‚úÖ Processed JSON {pair} ‚Üí {out_file.name}")
            processed_files.append(out_file)

    return processed_files

# ======================================================
# 4Ô∏è‚É£ Safe Pickle Merger (UNCHANGED from Document 6)
# ======================================================
def merge_pickles(temp_folder: Path, final_folder: Path, keep_last: int = 5):
    """Merge temporary pickles into final consolidated pickles"""
    pickles = list(temp_folder.glob("*.pkl"))
    if not pickles:
        print("‚ö™ No temporary pickles to merge.")
        return

    pairs = set(p.stem.split('.')[0] for p in pickles)

    for pair in pairs:
        pair_files = [p for p in pickles if p.stem.startswith(pair)]
        dfs = [pd.read_pickle(p) for p in pair_files if p.exists() and p.stat().st_size > 0]

        if not dfs:
            print(f"‚ö™ Skipped {pair} (no valid pickles)")
            continue

        merged_df = pd.concat(dfs, ignore_index=False).sort_index().drop_duplicates()
        # Create final merged pickle with _2244 suffix
        merged_file = final_folder / f"{pair}_2244.pkl"
        merged_df.to_pickle(merged_file)
        print(f"üîó Merged {len(pair_files)} files ‚Üí {merged_file.name}")

        # Clean up old versions (keep only last N)
        existing = sorted(final_folder.glob(f"{pair}_*.pkl"), key=lambda x: x.stat().st_mtime, reverse=True)
        for old_file in existing[keep_last:]:
            try:
                old_file.unlink()
                print(f"üßπ Removed old file: {old_file.name}")
            except Exception as e:
                print(f"‚ö†Ô∏è Could not remove {old_file.name}: {e}")

# ======================================================
# 5Ô∏è‚É£ Unified Pipeline Runner
# ======================================================
def run_unified_pipeline():
    """Main pipeline execution"""
    print("\n" + "=" * 70)
    print("üöÄ UNIFIED PICKLE MERGER v3.7")
    print("=" * 70)
    print(f"Environment: {ENV_NAME}")
    print(f"Root: {ROOT_DIR}")
    print(f"CSV Folder: {CSV_FOLDER}")
    print(f"Output Folder: {PICKLE_FOLDER}")
    print("=" * 70 + "\n")

    temp_files = []

    # ===== Step 1: Process JSON signals =====
    print("üìã Step 1: Processing JSON signals...")
    if JSON_FILE.exists():
        temp_files += process_json_file(JSON_FILE, TEMP_PICKLE_FOLDER)
        print(f"‚úÖ JSON processing complete: {len(temp_files)} files")
    else:
        print(f"‚ö™ No JSON file found at {JSON_FILE}")

    # ===== Step 2: Process CSV files =====
    print("\nüìã Step 2: Processing CSV files...")
    # ‚úÖ UPDATED: Look in ROOT directory as well as CSV folder
    csv_locations = [CSV_FOLDER, ROOT_DIR]
    csv_files = []

    for location in csv_locations:
        found = list(location.glob("*.csv"))
        if found:
            csv_files.extend(found)

    # Remove duplicates
    csv_files = list(set(csv_files))

    if csv_files:
        print(f"üìä Found {len(csv_files)} CSV files")
        with ThreadPoolExecutor(max_workers=4) as executor:
            futures = [executor.submit(process_csv_file, f, TEMP_PICKLE_FOLDER) for f in csv_files]
            for fut in as_completed(futures):
                result = fut.result()
                if result:
                    temp_files.append(result)
    else:
        print("‚ö™ No CSV files found")

    # ===== Step 3: Merge all pickles =====
    print("\nüìã Step 3: Merging pickle files...")
    merge_pickles(TEMP_PICKLE_FOLDER, PICKLE_FOLDER)

    # ===== Final verification =====
    print("\n" + "=" * 70)
    print("üìä FINAL OUTPUT VERIFICATION")
    print("=" * 70)

    final_pickles = list(PICKLE_FOLDER.glob("*_2244.pkl"))
    if final_pickles:
        print(f"‚úÖ Created {len(final_pickles)} merged pickle files:")
        for pkl in final_pickles:
            df = pd.read_pickle(pkl)
            print(f"  ‚Ä¢ {pkl.name}: {len(df)} rows")
    else:
        print("‚ö†Ô∏è No merged pickle files created!")

    print("=" * 70)
    print("üéØ Unified pipeline complete!")
    print("=" * 70)

    print(f"\n‚úÖ Pipeline completed successfully!")
    print(f"üìÅ Final pickles saved in: {PICKLE_FOLDER}")

    return PICKLE_FOLDER

# ======================================================
# 6Ô∏è‚É£ Execute
# ======================================================
if __name__ == "__main__":
    import os  # Import here for IN_GHA check
    final_folder = run_unified_pipeline()

In [None]:
# TAG: pipeline_main

#!/usr/bin/env python3
"""
Ultimate Forex Pipeline v8.5.2 - FIXED FOR GITHUB ACTIONS
==========================================================
‚úÖ FIXED: No nested paths (forex-alpha-models/forex-ai-models)
‚úÖ FIXED: All files save to repo root in GitHub Actions
‚úÖ Enhanced Git operations with proper error handling
‚úÖ Memory system using pickle (lightweight)
‚úÖ All v8.5.1 features preserved + path corrections
"""

import os
import sys
import json
import pickle
import random
import re
import smtplib
import subprocess
import time
import logging
from pathlib import Path
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from datetime import datetime, timedelta, timezone
from collections import defaultdict
from dataclasses import dataclass

import numpy as np
import pandas as pd
import requests

# ======================================================
# FIXED CONFIGURATION & PATH SETUP
# ======================================================
logging.basicConfig(
    filename='forex_pipeline_v85.log',
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s'
)

def print_status(msg, level="info"):
    icons = {"info": "‚ÑπÔ∏è", "success": "‚úÖ", "warn": "‚ö†Ô∏è", "error": "‚ùå",
             "rocket": "üöÄ", "chart": "üìä", "brain": "üß†", "money": "üí∞"}
    getattr(logging, level if level != "warn" else "warning", logging.info)(msg)
    print(f"{icons.get(level, '‚ÑπÔ∏è')} {msg}")

# ‚úÖ FIXED: Environment detection
try:
    import google.colab
    IN_COLAB = True
    IN_GHA = False
except ImportError:
    IN_COLAB = False
    IN_GHA = "GITHUB_ACTIONS" in os.environ

ENV_NAME = "GitHub Actions" if IN_GHA else ("Colab" if IN_COLAB else "Local")
print_status(f"üåç Environment: {ENV_NAME}", "info")

# ‚úÖ FIXED: Path setup - NO MORE NESTED DIRECTORIES
if IN_GHA:
    # GitHub Actions: Use current directory directly
    ROOT_PATH = Path.cwd()
    REPO_FOLDER = ROOT_PATH  # No nesting!
    PICKLE_FOLDER = ROOT_PATH / "pickles"
    print_status(f"ü§ñ GitHub Actions: Using {ROOT_PATH}", "info")
elif IN_COLAB:
    # Colab: Use standard path
    ROOT_PATH = Path("/content/forex-alpha-models")
    REPO_FOLDER = ROOT_PATH / "forex-ai-models"
    PICKLE_FOLDER = ROOT_PATH / "pickles"
else:
    # Local: Use relative path
    ROOT_PATH = Path("./forex-alpha-models")
    REPO_FOLDER = ROOT_PATH / "forex-ai-models"
    PICKLE_FOLDER = ROOT_PATH / "pickles"

# Create directories
for folder in [PICKLE_FOLDER, REPO_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

print_status(f"‚úÖ Root: {ROOT_PATH}", "success")
print_status(f"‚úÖ Repo: {REPO_FOLDER}", "success")
print_status(f"‚úÖ Pickles: {PICKLE_FOLDER}", "success")

# Git configuration
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = os.environ.get("GITHUB_USERNAME", "rahim-dotAI")
GITHUB_REPO = os.environ.get("GITHUB_REPO", "forex-ai-models")
FOREX_PAT = os.environ.get("FOREX_PAT", "").strip()

if not IN_GHA:
    subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
    subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)

# Email configuration
GMAIL_USER = os.environ.get("GMAIL_USER", "nakatonabira3@gmail.com")
GMAIL_APP_PASSWORD = os.environ.get("GMAIL_APP_PASSWORD", "")
LOGO_URL = "https://raw.githubusercontent.com/rahim-dotAI/forex-ai-models/main/IMG_1599.jpeg"

# Trading parameters
PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
ATR_PERIOD = 14
MIN_ATR = 1e-5
BASE_CAPITAL = 100
MAX_POSITION_FRACTION = 0.1
MAX_TRADE_CAP = BASE_CAPITAL * 0.05
EPS = 1e-8
MAX_ATR_SL = 3.0
MAX_ATR_TP = 3.0
TOURNAMENT_SIZE = 3
SLIPPAGE_PCT = 0.0001
COMMISSION_PCT = 0.0002

# ‚úÖ FIXED: File paths - All in REPO_FOLDER (no nesting)
SIGNALS_JSON_PATH = REPO_FOLDER / "broker_signals.json"
ENSEMBLE_SIGNALS_FILE = REPO_FOLDER / "ensemble_signals.json"
LEARNING_FILE = REPO_FOLDER / "learning_v85.pkl"
ITERATION_FILE = REPO_FOLDER / "iteration_v85.pkl"
MEMORY_FILE = REPO_FOLDER / "memory_v85.pkl"  # Using pickle instead of sqlite
WEIGHTS_FILE = REPO_FOLDER / "weights_v85.pkl"
MONDAY_FILE = REPO_FOLDER / "monday_runs.pkl"

# Ensure parent directories exist
for file_path in [SIGNALS_JSON_PATH, LEARNING_FILE, ITERATION_FILE, MEMORY_FILE]:
    file_path.parent.mkdir(parents=True, exist_ok=True)

print_status(f"üìÅ Memory file: {MEMORY_FILE}", "info")
print_status(f"üìÅ Learning file: {LEARNING_FILE}", "info")
print_status(f"üìÅ Iteration file: {ITERATION_FILE}", "info")

# Model configurations
COMPETITION_MODELS = {
    "Alpha Momentum": {
        "color": "üî¥", "hex_color": "#E74C3C",
        "strategy": "Aggressive momentum with adaptive stops",
        "atr_sl_range": (1.5, 2.5), "atr_tp_range": (2.0, 3.5),
        "risk_range": (0.015, 0.03), "confidence_range": (0.3, 0.5),
        "pop_size": 15, "generations": 20, "mutation_rate": 0.3
    },
    "Beta Conservative": {
        "color": "üîµ", "hex_color": "#3498DB",
        "strategy": "Conservative mean reversion",
        "atr_sl_range": (1.0, 1.8), "atr_tp_range": (1.5, 2.5),
        "risk_range": (0.005, 0.015), "confidence_range": (0.5, 0.7),
        "pop_size": 12, "generations": 15, "mutation_rate": 0.2
    },
    "Gamma Adaptive": {
        "color": "üü¢", "hex_color": "#2ECC71",
        "strategy": "Adaptive volatility trading",
        "atr_sl_range": (1.2, 2.2), "atr_tp_range": (1.8, 3.0),
        "risk_range": (0.01, 0.025), "confidence_range": (0.4, 0.6),
        "pop_size": 18, "generations": 22, "mutation_rate": 0.25
    }
}

# ======================================================
# ITERATION COUNTER
# ======================================================
class IterationCounter:
    def __init__(self, file=ITERATION_FILE):
        self.file = file
        self.data = self._load()
        print_status(f"üìä Iteration counter: Total runs = {self.data['total']}", "info")

    def _load(self):
        if self.file.exists():
            try:
                with open(self.file, 'rb') as f:
                    data = pickle.load(f)
                    print_status(f"‚úÖ Loaded: {data['total']} iterations", "success")
                    return data
            except Exception as e:
                print_status(f"‚ö†Ô∏è Load failed: {e}, creating new", "warn")
        return {'total': 0, 'start': datetime.now(timezone.utc).isoformat(), 'history': []}

    def increment(self, success=True):
        self.data['total'] += 1
        self.data['history'].append({
            'iteration': self.data['total'],
            'time': datetime.now(timezone.utc).isoformat(),
            'success': success
        })
        if len(self.data['history']) > 1000:
            self.data['history'] = self.data['history'][-1000:]
        try:
            with open(self.file, 'wb') as f:
                pickle.dump(self.data, f, protocol=4)
            print_status(f"‚úÖ Saved: Iteration #{self.data['total']}", "success")
        except Exception as e:
            print_status(f"‚ö†Ô∏è Save failed: {e}", "error")
        return self.data['total']

    def get_stats(self):
        days = max(1, (datetime.now(timezone.utc) - datetime.fromisoformat(self.data['start'])).days)
        return {
            'total': self.data['total'],
            'days': days,
            'per_day': self.data['total'] / days,
            'start': self.data['start']
        }

COUNTER = IterationCounter()

# ======================================================
# MEMORY SYSTEM - PICKLE-BASED (Lightweight)
# ======================================================
class MemorySystem:
    """Lightweight pickle-based memory system"""

    def __init__(self, file=MEMORY_FILE):
        self.file = file
        self.data = self._load()
        print_status(f"üìÅ Memory system initialized: {len(self.data['signals'])} signals", "info")

    def _load(self):
        if self.file.exists():
            try:
                with open(self.file, 'rb') as f:
                    data = pickle.load(f)
                    print_status(f"‚úÖ Loaded memory: {len(data.get('signals', []))} signals", "success")
                    return data
            except Exception as e:
                print_status(f"‚ö†Ô∏è Memory load failed: {e}", "warn")

        return {
            'signals': [],
            'trades': [],
            'performance': {},
            'created_at': datetime.now(timezone.utc).isoformat()
        }

    def _save(self):
        try:
            self.data['updated_at'] = datetime.now(timezone.utc).isoformat()
            with open(self.file, 'wb') as f:
                pickle.dump(self.data, f, protocol=4)
        except Exception as e:
            print_status(f"‚ö†Ô∏è Memory save failed: {e}", "error")

    def store_signals(self, signals_by_model, timestamp):
        """Store signals from all models"""
        for model_name, signals in signals_by_model.items():
            for pair, sig in signals.items():
                if sig['direction'] != 'HOLD':
                    self.data['signals'].append({
                        'timestamp': timestamp.isoformat(),
                        'model': model_name,
                        'pair': pair,
                        'direction': sig['direction'],
                        'entry': sig['last_price'],
                        'sl': sig['SL'],
                        'tp': sig['TP'],
                        'confidence': sig['score_1_100']
                    })

        # Keep only last 1000 signals
        if len(self.data['signals']) > 1000:
            self.data['signals'] = self.data['signals'][-1000:]

        self._save()

    def get_history(self, model_name, days=7):
        """Get historical performance"""
        since = (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()

        trades = [t for t in self.data.get('trades', [])
                 if t.get('model') == model_name and t.get('timestamp', '') > since]

        total = len(trades)
        wins = sum(1 for t in trades if t.get('hit_tp', False))

        return {
            'total_trades': total,
            'wins': wins,
            'accuracy': (wins / total * 100) if total > 0 else 0,
            'total_pnl': sum(t.get('pnl', 0) for t in trades),
            'avg_pnl': sum(t.get('pnl', 0) for t in trades) / total if total > 0 else 0
        }

    def close(self):
        """Save and close"""
        self._save()
        print_status("‚úÖ Memory system saved", "success")

MEMORY = MemorySystem()

# ======================================================
# LEARNING SYSTEM
# ======================================================
class LearningSystem:
    def __init__(self, file=LEARNING_FILE):
        self.file = file
        print_status(f"üìö Learning system: {self.file}", "info")
        self.data = self._load()

    def _load(self):
        if self.file.exists():
            try:
                with open(self.file, 'rb') as f:
                    data = pickle.load(f)
                    print_status(f"‚úÖ Loaded learning: {data['iterations']} iterations", "success")
                    return data
            except Exception as e:
                print_status(f"‚ö†Ô∏è Learning load failed: {e}", "warn")
        return {
            'iterations': 0,
            'successful_patterns': {},
            'learning_curve': [],
            'adaptation_score': 0.0
        }

    def record_iteration(self, results, outcomes=None):
        self.data['iterations'] += 1

        for model, result in results.items():
            if not result or 'metrics' not in result:
                continue

            pnl = outcomes[model]['total_pnl'] if outcomes and model in outcomes else result['metrics']['total_pnl']
            accuracy = outcomes[model]['accuracy'] if outcomes and model in outcomes else 0

            if pnl > 0 and accuracy >= 50:
                key = f"{model}_success"
                if key not in self.data['successful_patterns']:
                    self.data['successful_patterns'][key] = []

                self.data['successful_patterns'][key].append({
                    'chromosome': result.get('chromosome'),
                    'pnl': pnl,
                    'accuracy': accuracy,
                    'time': datetime.now(timezone.utc).isoformat()
                })

                if len(self.data['successful_patterns'][key]) > 50:
                    self.data['successful_patterns'][key] = sorted(
                        self.data['successful_patterns'][key],
                        key=lambda x: x['pnl'],
                        reverse=True
                    )[:50]

        self.data['learning_curve'].append(sum(outcomes[m]['total_pnl'] for m in outcomes) if outcomes else 0)
        if len(self.data['learning_curve']) > 100:
            self.data['learning_curve'] = self.data['learning_curve'][-100:]

        if len(self.data['learning_curve']) >= 10:
            recent = np.mean(self.data['learning_curve'][-10:])
            self.data['adaptation_score'] = min(100, max(0, 50 + recent))

        try:
            with open(self.file, 'wb') as f:
                pickle.dump(self.data, f, protocol=4)
            print_status(f"‚úÖ Learning saved: Iteration {self.data['iterations']}", "success")
        except Exception as e:
            logging.error(f"Learning save failed: {e}")

    def get_best_chromosomes(self, model, top_n=3):
        key = f"{model}_success"
        patterns = self.data['successful_patterns'].get(key, [])
        return [p['chromosome'] for p in sorted(patterns, key=lambda x: x['pnl'], reverse=True)[:top_n] if p.get('chromosome')]

    def get_report(self):
        total_success = sum(len(p) for p in self.data['successful_patterns'].values())
        return {
            'iterations': self.data['iterations'],
            'adaptation_score': self.data['adaptation_score'],
            'total_successes': total_success,
            'trend': "üìà Improving" if self.data['adaptation_score'] > 50 else "üìâ Adjusting"
        }

LEARNING = LearningSystem()

# ======================================================
# MODE MANAGER
# ======================================================
class ModeManager:
    def __init__(self):
        self.monday_data = self._load_monday()

    def _load_monday(self):
        if MONDAY_FILE.exists():
            try:
                data = pickle.load(open(MONDAY_FILE, "rb"))
                if data.get('date') != datetime.now().strftime('%Y-%m-%d'):
                    return {'count': 0, 'date': datetime.now().strftime('%Y-%m-%d')}
                return data
            except:
                pass
        return {'count': 0, 'date': datetime.now().strftime('%Y-%m-%d')}

    def get_mode(self):
        weekday = datetime.now().weekday()
        if weekday in [5, 6]:
            return "weekend_replay"
        elif weekday == 0 and self.monday_data['count'] < 1:
            return "monday_replay"
        return "normal"

    def should_send_email(self):
        return self.get_mode() == "normal"

MODE_MANAGER = ModeManager()

# ======================================================
# UTILITY FUNCTIONS
# ======================================================
def ensure_atr(df):
    if "atr" in df.columns and not df["atr"].isna().all():
        df["atr"] = df["atr"].fillna(MIN_ATR).clip(lower=MIN_ATR)
        return df

    high, low, close = df["high"].values, df["low"].values, df["close"].values
    tr = np.maximum.reduce([
        high - low,
        np.abs(high - np.roll(close, 1)),
        np.abs(low - np.roll(close, 1))
    ])
    tr[0] = high[0] - low[0] if len(tr) > 0 else MIN_ATR
    df["atr"] = pd.Series(tr, index=df.index).rolling(ATR_PERIOD, min_periods=1).mean().fillna(MIN_ATR).clip(lower=MIN_ATR)
    return df

def seed_hybrid_signal(df):
    if "hybrid_signal" not in df.columns or df["hybrid_signal"].abs().sum() == 0:
        fast = df["close"].rolling(10, min_periods=1).mean()
        slow = df["close"].rolling(50, min_periods=1).mean()
        df["hybrid_signal"] = (fast - slow).fillna(0)
    return df

def load_data(folder):
    combined = {}
    for pair in PAIRS:
        combined[pair] = {}
        prefix = pair.replace("/", "_")
        for pf in sorted(folder.glob(f"{prefix}*.pkl")):
            try:
                df = pd.read_pickle(pf)
                if not isinstance(df, pd.DataFrame) or len(df) < 50:
                    continue
                df.index = pd.to_datetime(df.index, errors="coerce")
                if df.index.tz is not None:
                    df.index = df.index.tz_convert(None)
                df = ensure_atr(df)
                df = seed_hybrid_signal(df)
                tf = re.sub(rf"{prefix}_?|\.pkl", "", pf.name).strip("_") or "merged"
                combined[pair][tf] = df
            except:
                continue
    return combined

def fetch_live_rate(pair):
    token = os.environ.get("BROWSERLESS_TOKEN", "")
    if not token:
        return 0.0
    from_c, to_c = pair.split("/")
    try:
        r = requests.post(
            f"https://production-sfo.browserless.io/content?token={token}",
            json={"url": f"https://www.x-rates.com/calculator/?from={from_c}&to={to_c}&amount=1"},
            timeout=8
        )
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', r.text)
        return float(match.group(1).replace(",", "")) if match else 0.0
    except:
        return 0.0

def build_tf_map(data):
    return {p: list(tfs.keys()) for p, tfs in data.items()}

def create_chromosome(tf_map, config):
    chrom = [
        float(random.uniform(*config['atr_sl_range'])),
        float(random.uniform(*config['atr_tp_range'])),
        float(random.uniform(*config['risk_range'])),
        float(random.uniform(*config['confidence_range']))
    ]
    for p in PAIRS:
        n = max(1, len(tf_map.get(p, [])))
        weights = np.random.dirichlet(np.ones(n)).tolist()
        chrom.extend(weights)
    return chrom

def decode_chromosome(chrom, tf_map):
    atr_sl = np.clip(chrom[0], 1.0, MAX_ATR_SL)
    atr_tp = np.clip(chrom[1], 1.0, MAX_ATR_TP)
    risk, conf = chrom[2], chrom[3]

    tf_w = {}
    idx = 4
    for p in PAIRS:
        n = max(1, len(tf_map.get(p, [])))
        weights = np.array(chrom[idx:idx+n], dtype=float)
        weights = weights / (weights.sum() + EPS) if weights.sum() > 0 else np.ones(n) / n
        tf_w[p] = {tf: float(w) for tf, w in zip(tf_map.get(p, []), weights)}
        idx += n

    return atr_sl, atr_tp, risk, conf, tf_w

def calculate_sharpe(equity_curve):
    if len(equity_curve) < 2:
        return 0.0

    equity_array = np.array(equity_curve, dtype=float)
    returns = np.diff(equity_array) / (equity_array[:-1] + EPS)
    if len(returns) == 0 or np.std(returns) == 0:
        return 0.0

    return float(np.mean(returns) / (np.std(returns) + EPS))

# ======================================================
# BACKTESTING
# ======================================================
def backtest_strategy(data, tf_map, chromosome):
    atr_sl, atr_tp, risk, conf, tf_w = decode_chromosome(chromosome, tf_map)

    equity = BASE_CAPITAL
    equity_curve = [equity]
    trades = []
    position = None

    all_times = sorted(set().union(*[df.index for tfs in data.values() for df in tfs.values()]))

    for t in all_times:
        if position:
            pair = position['pair']
            price = 0
            for tf in tf_map.get(pair, []):
                if tf in data.get(pair, {}) and t in data[pair][tf].index:
                    price = data[pair][tf].loc[t, 'close']
                    break

            if price > 0:
                hit_tp = (position['dir'] == 'BUY' and price >= position['tp']) or (position['dir'] == 'SELL' and price <= position['tp'])
                hit_sl = (position['dir'] == 'BUY' and price <= position['sl']) or (position['dir'] == 'SELL' and price >= position['sl'])

                if hit_tp or hit_sl:
                    exit_price = position['tp'] if hit_tp else position['sl']
                    pnl = (exit_price - position['entry']) * position['size'] if position['dir'] == 'BUY' else (position['entry'] - exit_price) * position['size']
                    equity += pnl
                    equity_curve.append(equity)
                    trades.append({'pnl': pnl, 'correct': hit_tp})
                    position = None

        if position is None:
            for pair in PAIRS:
                signal = 0
                price = 0
                atr = MIN_ATR

                for tf, weight in tf_w.get(pair, {}).items():
                    if tf in data.get(pair, {}) and t in data[pair][tf].index:
                        row = data[pair][tf].loc[t]
                        signal += row.get('hybrid_signal', 0) * weight
                        price = row['close']
                        atr = max(row.get('atr', MIN_ATR), MIN_ATR)

                if abs(signal) > conf and price > 0:
                    direction = 'BUY' if signal > 0 else 'SELL'
                    size = min(equity * risk, MAX_TRADE_CAP) / (atr * atr_sl)

                    if direction == 'BUY':
                        sl = price - (atr * atr_sl)
                        tp = price + (atr * atr_tp)
                    else:
                        sl = price + (atr * atr_sl)
                        tp = price - (atr * atr_tp)

                    position = {'pair': pair, 'dir': direction, 'entry': price, 'sl': sl, 'tp': tp, 'size': size}
                    break

    total = len(trades)
    wins = sum(1 for t in trades if t['correct'])
    return {
        'total_trades': total,
        'winning_trades': wins,
        'accuracy': (wins / total * 100) if total > 0 else 0,
        'total_pnl': sum(t['pnl'] for t in trades),
        'sharpe': calculate_sharpe(equity_curve)
    }

# ======================================================
# GENETIC ALGORITHM
# ======================================================
def run_ga(data, tf_map, model_name, config):
    print_status(f"{config['color']} Training {model_name}...", "info")

    pop_size = config['pop_size']
    generations = config['generations']
    mutation_rate = config['mutation_rate']

    try:
        population = []
        best_hist = LEARNING.get_best_chromosomes(model_name, top_n=3)
        for chrom in best_hist:
            if chrom:
                metrics = backtest_strategy(data, tf_map, chrom)
                fitness = metrics['total_pnl'] + (metrics['accuracy'] / 100) * 10
                population.append((fitness, chrom))

        while len(population) < pop_size:
            chrom = create_chromosome(tf_map, config)
            metrics = backtest_strategy(data, tf_map, chrom)
            fitness = metrics['total_pnl'] + (metrics['accuracy'] / 100) * 10
            population.append((fitness, chrom))

        population.sort(reverse=True, key=lambda x: x[0])

        for gen in range(generations):
            new_pop = []
            elite_count = max(1, int(pop_size * 0.2))
            new_pop.extend(population[:elite_count])

            while len(new_pop) < pop_size:
                parent1 = max(random.sample(population, TOURNAMENT_SIZE), key=lambda x: x[0])[1]
                parent2 = max(random.sample(population, TOURNAMENT_SIZE), key=lambda x: x[0])[1]

                point = random.randint(1, len(parent1) - 1)
                child = [float(x) for x in parent1[:point]] + [float(x) for x in parent2[point:]]

                for i in range(len(child)):
                    if random.random() < mutation_rate:
                        if i == 0:
                            child[i] = float(np.clip(child[i] + random.gauss(0, 0.3), *config['atr_sl_range']))
                        elif i == 1:
                            child[i] = float(np.clip(child[i] + random.gauss(0, 0.3), *config['atr_tp_range']))
                        elif i == 2:
                            child[i] = float(np.clip(child[i] + random.gauss(0, 0.005), *config['risk_range']))
                        elif i == 3:
                            child[i] = float(np.clip(child[i] + random.gauss(0, 0.1), *config['confidence_range']))
                        else:
                            child[i] = float(max(0.01, child[i] + random.gauss(0, 0.2)))

                metrics = backtest_strategy(data, tf_map, child)
                fitness = metrics['total_pnl'] + (metrics['accuracy'] / 100) * 10
                new_pop.append((fitness, child))

            population = sorted(new_pop, reverse=True, key=lambda x: x[0])

            if (gen + 1) % 5 == 0:
                print_status(f"  Gen {gen+1}/{generations}: Best={population[0][0]:.4f}", "info")

        best_chrom = population[0][1]
        final_metrics = backtest_strategy(data, tf_map, best_chrom)

        print_status(
            f"  ‚úÖ {model_name}: {final_metrics['accuracy']:.1f}% accuracy | "
            f"${final_metrics['total_pnl']:.4f} PnL | {final_metrics['total_trades']} trades",
            "success"
        )

        return {'chromosome': best_chrom, 'metrics': final_metrics}

    except Exception as e:
        logging.exception(f"{model_name} GA error")
        raise

# ======================================================
# SIGNAL GENERATION
# ======================================================
def generate_signals(data, tf_map, chromosome, model_name, current_time):
    atr_sl, atr_tp, risk, conf, tf_w = decode_chromosome(chromosome, tf_map)
    signals = {}

    for pair in PAIRS:
        signal_strength = 0
        price = 0
        atr = MIN_ATR

        for tf, weight in tf_w.get(pair, {}).items():
            if tf in data.get(pair, {}):
                df = data[pair][tf]
                if len(df) > 0:
                    row = df.iloc[-1]
                    signal_strength += row.get('hybrid_signal', 0) * weight
                    price = row['close']
                    atr = max(row.get('atr', MIN_ATR), MIN_ATR)

        direction = 'HOLD'
        sl = tp = price

        if abs(signal_strength) > conf and price > 0:
            direction = 'BUY' if signal_strength > 0 else 'SELL'

            if direction == 'BUY':
                sl = price - (atr * atr_sl)
                tp = price + (atr * atr_tp)
            else:
                sl = price + (atr * atr_sl)
                tp = price - (atr * atr_tp)

        signals[pair] = {
            'direction': direction,
            'last_price': float(price),
            'SL': float(sl),
            'TP': float(tp),
            'atr': float(atr),
            'score_1_100': int(abs(signal_strength) * 100),
            'model': model_name,
            'timestamp': current_time.isoformat()
        }

    return signals

# ======================================================
# EMAIL SYSTEM
# ======================================================
def send_email(signals_by_model, iteration_stats, learning_report):
    if not MODE_MANAGER.should_send_email() or not GMAIL_APP_PASSWORD:
        print_status("Email skipped (replay mode or no credentials)", "info")
        return

    try:
        msg = MIMEMultipart('alternative')
        msg['Subject'] = f"ü§ñ Forex AI Signals - Iteration #{iteration_stats['iteration']}"
        msg['From'] = GMAIL_USER
        msg['To'] = GMAIL_USER

        html = f"""
<!DOCTYPE html>
<html>
<head>
<style>
body {{font-family: Arial, sans-serif; background: #f4f4f4; margin: 0; padding: 20px;}}
.container {{max-width: 800px; margin: 0 auto; background: white; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 10px rgba(0,0,0,0.1);}}
.header {{background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; text-align: center;}}
.header h1 {{margin: 0; font-size: 28px;}}
.stats {{display: flex; justify-content: space-around; padding: 20px; background: #f8f9fa;}}
.stat {{text-align: center;}}
.stat-value {{font-size: 24px; font-weight: bold; color: #667eea;}}
.model-section {{padding: 20px; border-bottom: 1px solid #eee;}}
.signal {{padding: 15px; background: #f8f9fa; border-radius: 5px; margin: 10px 0;}}
.signal-buy {{border-left: 4px solid #28a745;}}
.signal-sell {{border-left: 4px solid #dc3545;}}
</style>
</head>
<body>
<div class="container">
    <div class="header">
        <h1>ü§ñ Forex AI Trading Signals</h1>
        <p>Iteration #{iteration_stats['iteration']} | {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}</p>
    </div>
    <div class="stats">
        <div class="stat">
            <div class="stat-value">{iteration_stats['total_iterations']}</div>
            <div>Total Iterations</div>
        </div>
        <div class="stat">
            <div class="stat-value">{learning_report['adaptation_score']:.1f}/100</div>
            <div>Adaptation Score</div>
        </div>
        <div class="stat">
            <div class="stat-value">{learning_report['trend']}</div>
            <div>Trend</div>
        </div>
    </div>
"""

        for model_name, signals in signals_by_model.items():
            config = COMPETITION_MODELS[model_name]
            html += f"""
    <div class="model-section">
        <div style="font-size: 20px; font-weight: bold;">{config['color']} {model_name}</div>
        <div style="color: #666; margin: 10px 0;">{config['strategy']}</div>
"""

            for pair, sig in signals.items():
                if sig['direction'] != 'HOLD':
                    direction_class = sig['direction'].lower()
                    html += f"""
        <div class="signal signal-{direction_class}">
            <div style="font-weight: bold;">{pair}: {sig['direction']} @ {sig['last_price']:.5f}</div>
            <div style="color: #666; margin-top: 5px;">
                SL: {sig['SL']:.5f} | TP: {sig['TP']:.5f} | Confidence: {sig['score_1_100']}/100
            </div>
        </div>
"""

            html += "    </div>"

        html += """
    <div style="padding: 20px; text-align: center; background: #f8f9fa; color: #666;">
        Powered by Advanced AI Trading System | v8.5.2
    </div>
</div>
</body>
</html>
"""

        msg.attach(MIMEText(html, 'html'))

        with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
            server.login(GMAIL_USER, GMAIL_APP_PASSWORD)
            server.send_message(msg)

        print_status("‚úÖ Email sent successfully", "success")

    except Exception as e:
        print_status(f"‚ö†Ô∏è Email failed: {e}", "warn")

# ======================================================
# ENHANCED GIT OPERATIONS (Skip in GitHub Actions)
# ======================================================
def push_to_github(files, message):
    """Enhanced Git push - SKIPS in GitHub Actions"""

    # ‚úÖ Skip Git operations in GitHub Actions (workflow handles it)
    if IN_GHA:
        print_status("ü§ñ GitHub Actions: Skipping Git push (workflow handles it)", "info")
        return True

    if not FOREX_PAT:
        print_status("‚ö†Ô∏è No FOREX_PAT - skipping Git push", "warn")
        return False

    try:
        REPO_URL = f"https://{GITHUB_USERNAME}:{FOREX_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

        # Ensure repo exists
        if not (REPO_FOLDER / ".git").exists():
            print_status("Cloning repository...", "info")
            subprocess.run(
                ["git", "clone", REPO_URL, str(REPO_FOLDER)],
                capture_output=True,
                timeout=60,
                check=True
            )

        os.chdir(REPO_FOLDER)

        # Stage files
        print_status("Staging files...", "info")
        files_added = 0
        for f in files:
            file_path = REPO_FOLDER / f
            if file_path.exists():
                subprocess.run(["git", "add", str(f)], check=False)
                files_added += 1

        if files_added == 0:
            print_status("No files to stage", "warn")
            return True

        # Commit
        subprocess.run(
            ["git", "commit", "-m", message],
            capture_output=True,
            check=False
        )

        # Pull before push
        subprocess.run(
            ["git", "pull", "--rebase", "origin", "main"],
            capture_output=True,
            check=False
        )

        # Push with retry
        for attempt in range(3):
            print_status(f"Pushing (attempt {attempt + 1}/3)...", "info")

            result = subprocess.run(
                ["git", "push", "origin", "main"],
                capture_output=True,
                timeout=30
            )

            if result.returncode == 0:
                print_status("‚úÖ Successfully pushed to GitHub", "success")
                return True

            if attempt < 2:
                time.sleep(2)

        print_status("‚ùå Push failed after 3 attempts", "error")
        return False

    except Exception as e:
        print_status(f"‚ùå Git error: {e}", "error")
        return False
    finally:
        try:
            os.chdir(ROOT_PATH)
        except:
            pass

# ======================================================
# MAIN EXECUTION
# ======================================================
def main():
    print_status("=" * 70, "rocket")
    print_status("üöÄ FOREX PIPELINE v8.5.2 - FIXED PATHS EDITION", "rocket")
    print_status("=" * 70, "rocket")

    success = False

    try:
        # Display stats
        current_iter = COUNTER.data['total'] + 1
        stats = COUNTER.get_stats()
        mode = MODE_MANAGER.get_mode()

        print_status(f"\nüìä Iteration #{current_iter} | Mode: {mode.upper()}", "info")
        print_status(f"Total Runs: {stats['total']} | Days: {stats['days']} | Avg/Day: {stats['per_day']:.1f}", "info")

        # Load data
        print_status("\nüì¶ Loading data...", "info")
        data = load_data(PICKLE_FOLDER)

        if not data:
            raise ValueError("‚ùå No data loaded - check PICKLE_FOLDER")

        print_status(f"‚úÖ Loaded {len(data)} pairs", "success")

        tf_map = build_tf_map(data)

        # Run competition
        print_status("\nüèÜ Running Competition...", "chart")
        competition_results = {}
        signals_by_model = {}

        for model_name, config in COMPETITION_MODELS.items():
            try:
                result = run_ga(data, tf_map, model_name, config)
                competition_results[model_name] = result

                # Generate signals
                signals = generate_signals(
                    data, tf_map, result['chromosome'],
                    model_name, datetime.now(timezone.utc)
                )
                signals_by_model[model_name] = signals

            except Exception as e:
                print_status(f"‚ùå {model_name} failed: {e}", "error")

        # Store signals in memory
        MEMORY.store_signals(signals_by_model, datetime.now(timezone.utc))

        # Update learning
        LEARNING.record_iteration(competition_results)
        learning_report = LEARNING.get_report()

        print_status(
            f"\nüß† Learning: {learning_report['trend']} | "
            f"Score: {learning_report['adaptation_score']:.1f}/100",
            "brain"
        )

        # Save signals to JSON
        print_status("\nüíæ Saving signals...", "info")

        # Ensure parent directories exist
        SIGNALS_JSON_PATH.parent.mkdir(parents=True, exist_ok=True)

        with open(SIGNALS_JSON_PATH, 'w') as f:
            json.dump(signals_by_model, f, indent=2, default=str)
        print_status(f"‚úÖ Saved: {SIGNALS_JSON_PATH}", "success")

        with open(ENSEMBLE_SIGNALS_FILE, 'w') as f:
            json.dump({
                'timestamp': datetime.now(timezone.utc).isoformat(),
                'iteration': current_iter,
                'models': signals_by_model
            }, f, indent=2, default=str)
        print_status(f"‚úÖ Saved: {ENSEMBLE_SIGNALS_FILE}", "success")

        # Send email
        iteration_stats = {
            'iteration': current_iter,
            'total_iterations': stats['total']
        }
        send_email(signals_by_model, iteration_stats, learning_report)

        # Push to GitHub (skipped in GHA)
        print_status("\nüîÑ Git operations...", "info")
        files = [
            SIGNALS_JSON_PATH.name,
            ENSEMBLE_SIGNALS_FILE.name
        ]
        push_to_github(
            files,
            f"ü§ñ Auto-update: Iteration #{current_iter} - {datetime.now().strftime('%Y-%m-%d %H:%M UTC')}"
        )

        # Summary
        print_status("\n" + "=" * 70, "success")
        print_status("‚úÖ PIPELINE COMPLETED SUCCESSFULLY", "success")
        print_status("=" * 70, "success")
        print_status(f"Iteration: #{current_iter}", "info")
        print_status(f"Models: {len(competition_results)}", "info")
        print_status(
            f"Signals: {sum(1 for m in signals_by_model.values() for s in m.values() if s['direction'] != 'HOLD')}",
            "info"
        )

        success = True

    except KeyboardInterrupt:
        print_status("\n‚ö†Ô∏è Shutdown requested", "warn")
    except Exception as e:
        print_status(f"\n‚ùå Fatal error: {e}", "error")
        logging.exception("Fatal error")
        import traceback
        traceback.print_exc()
    finally:
        COUNTER.increment(success=success)
        MEMORY.close()
        print_status("Cleanup complete", "info")

if __name__ == "__main__":
    main()
    print_status("Pipeline shutdown complete", "info")