In [None]:
import os
SAVE_DIR = "forex_ai_outputs"
os.makedirs(SAVE_DIR, exist_ok=True)
os.chdir(SAVE_DIR)
print("Working directory:", os.getcwd())



In [None]:
!pip install mplfinance firebase-admin dropbox requests beautifulsoup4 pandas numpy ta yfinance pyppeteer nest_asyncio lightgbm joblib matplotlib alpha_vantage tqdm scikit-learn


In [None]:
# -------------------------------
# Fully Automatic Fresh-Run GitHub Workflow in Colab / GitHub Actions
# -------------------------------

import os
import subprocess
import shutil

# -------------------------------
# 0️⃣ User Config
# -------------------------------
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
REPO_FOLDER = GITHUB_REPO  # Local folder
GITHUB_PAT = "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"  # Working PAT
GIT_USER_EMAIL = "nakatonabira3@gmail.com"

# -------------------------------
# 1️⃣ Install Git and Git LFS (Safe across environments)
# -------------------------------
print("⚙️ Checking Git and Git LFS...")

def safe_run(cmd):
    """Run shell command safely with clear logging."""
    try:
        subprocess.run(cmd, shell=True, check=True)
    except subprocess.CalledProcessError as e:
        print(f"⚠️ Skipped or failed: {cmd}\n   Reason: {e}")

USE_SUDO = shutil.which("sudo") is not None

if shutil.which("git") is None:
    cmd = "apt-get update -qq && apt-get install -y git"
    if USE_SUDO:
        cmd = "sudo " + cmd
    safe_run(cmd)
else:
    print("✅ Git already installed.")

if shutil.which("git-lfs") is None:
    cmd = "apt-get install -y git-lfs"
    if USE_SUDO:
        cmd = "sudo " + cmd
    safe_run(cmd)
else:
    print("✅ Git LFS already installed.")

safe_run("git lfs install")

# -------------------------------
# 2️⃣ Configure secure Git credentials
# -------------------------------
print("🔐 Configuring secure GitHub credentials...")
safe_run("git config --global credential.helper store")
cred_path = os.path.expanduser("~/.git-credentials")
with open(cred_path, "w") as f:
    f.write(f"https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com\n")

# -------------------------------
# 3️⃣ Remove local repo (fresh run)
# -------------------------------
if os.path.exists(REPO_FOLDER):
    print(f"🗑️ Removing existing local repo '{REPO_FOLDER}' for a fresh run...")
    shutil.rmtree(REPO_FOLDER)

# -------------------------------
# 4️⃣ Configure Git identity
# -------------------------------
print("🔧 Configuring Git identity...")
safe_run(f'git config --global user.name "{GITHUB_USERNAME}"')
safe_run(f'git config --global user.email "{GIT_USER_EMAIL}"')

# -------------------------------
# 5️⃣ Clone repo fresh
# -------------------------------
print(f"📥 Cloning repo '{REPO_FOLDER}' from GitHub securely...")
safe_run(f"git clone https://github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git")
os.chdir(REPO_FOLDER)

# -------------------------------
# 6️⃣ Track CSV/PKL files with Git LFS
# -------------------------------
print("📌 Tracking CSV/PKL files with Git LFS...")
safe_run("git lfs track '*.csv'")
safe_run("git lfs track '*.pkl'")
safe_run("git add .gitattributes")
safe_run('git commit -m "Track CSV/PKL files with Git LFS" || echo "No .gitattributes changes"')

# -------------------------------
# 7️⃣ Stage, commit, and push changes (Safe)
# -------------------------------
print("📂 Staging all new/modified files...")
safe_run("git add -A")
safe_run('git commit -m "Auto-update: new or modified files" || echo "No new changes to commit"')

# Avoid accidental repo wipes
file_count = sum(len(files) for _, _, files in os.walk('.'))
if file_count < 10:
    print("⚠️ Too few files detected — possible clone failure. Skipping push.")
else:
    print("🚀 Pushing changes to GitHub...")
    safe_run("git push origin main")

# -------------------------------
# 8️⃣ List LFS-tracked files
# -------------------------------
print("📋 LFS-tracked files:")
safe_run("git lfs ls-files")

print("✅ Fresh-run GitHub repo workflow complete!")


In [None]:
import os

# Set your keys (only for this session)
os.environ['ALPHA_VANTAGE_KEY'] = '1W58NPZXOG5SLHZ6'
os.environ['BROWSERLESS_TOKEN'] = '2St0qUktyKsA0Bsb5b510553885cae26942e44c26c0f19c3d'

# Test if they work
print("Alpha Vantage Key:", os.environ.get('ALPHA_VANTAGE_KEY'))
print("Browserless Token:", os.environ.get('BROWSERLESS_TOKEN'))




In [None]:
import os
import pandas as pd
import requests
import hashlib
from pathlib import Path
import subprocess

# -----------------------------
# CONFIGURATION
# -----------------------------
SAVE_FOLDER = "/content/forex-ai-models"  # Cloned GitHub repo folder
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)

GIT_NAME = "Abdul Rahim"
GIT_EMAIL = "nakatonabira3@gmail.com"

# -----------------------------
# GitHub Auth & Repo Info
# -----------------------------
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
GITHUB_TOKEN = "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"  # Classic PAT
BRANCH = "main"
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# -----------------------------
# Setup Git identity (once per session)
# -----------------------------
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=True)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=True)

# -----------------------------
# Helper functions
# -----------------------------
def fetch_alpha_vantage_fx(pair, outputsize='compact'):
    """Fetch FX data from Alpha Vantage."""
    base_url = 'https://www.alphavantage.co/query'
    from_currency, to_currency = pair.split('/')
    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'outputsize': outputsize,
        'datatype': 'json',
        'apikey': os.environ['ALPHA_VANTAGE_KEY']
    }
    response = requests.get(base_url, params=params, timeout=30)
    data = response.json()
    if 'Time Series FX (Daily)' not in data:
        print(f"Failed to fetch {pair}: {data}")
        return pd.DataFrame()
    ts = data['Time Series FX (Daily)']
    df = pd.DataFrame(ts).T
    df.index = pd.to_datetime(df.index)
    df.sort_index(inplace=True)
    return df.rename(columns={
        '1. open': 'open',
        '2. high': 'high',
        '3. low': 'low',
        '4. close': 'close'
    }).astype(float)

def file_hash(filepath, chunk_size=8192):
    """Compute MD5 hash of a file in chunks (faster, memory efficient)."""
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

def ensure_repo_cloned(repo_url, repo_folder, branch="main"):
    """Clone repo if missing or pull latest if exists."""
    if not os.path.exists(os.path.join(repo_folder, ".git")):
        if os.path.exists(repo_folder):
            subprocess.run(["rm", "-rf", repo_folder], check=True)
        print("📥 Cloning repo...")
        try:
            subprocess.run(["git", "clone", "-b", branch, repo_url, repo_folder], check=True)
        except subprocess.CalledProcessError:
            print(f"⚠️ Branch '{branch}' not found. Cloning default branch...")
            subprocess.run(["git", "clone", repo_url, repo_folder], check=True)
    else:
        print("🔄 Repo exists, pulling latest changes...")
        os.chdir(repo_folder)
        subprocess.run(["git", "fetch", "origin"], check=True)
        subprocess.run(["git", "checkout", branch], check=False)
        subprocess.run(["git", "pull", "origin", branch], check=False)
        os.chdir("..")

def ensure_repo_initialized(repo_folder):
    """Handle empty repo / initial commit and set main branch."""
    ensure_repo_cloned(REPO_URL, repo_folder, BRANCH)
    os.chdir(repo_folder)
    result = subprocess.run(["git", "status"], capture_output=True, text=True)
    if "nothing to commit" in result.stdout:
        return  # Already initialized
    subprocess.run(["git", "commit", "--allow-empty", "-m", "Initial commit"], check=False)
    subprocess.run(["git", "branch", "-M", "main"], check=False)
    subprocess.run(["git", "push", "-u", "origin", "main"], check=False)
    os.chdir("..")

# -----------------------------
# Initialize repo (first time only)
# -----------------------------
ensure_repo_initialized(SAVE_FOLDER)

# -----------------------------
# List of currency pairs
# -----------------------------
pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# -----------------------------
# Fetch, merge, save, commit & push
# -----------------------------
for pair in pairs:
    filename = pair.replace("/", "_") + ".csv"
    filepath = os.path.join(SAVE_FOLDER, filename)

    # Load existing data (only if file exists)
    if os.path.exists(filepath):
        existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True)
        print(f"Loaded {pair}: {existing_df.shape[0]} rows")
    else:
        existing_df = pd.DataFrame()
        print(f"No existing data for {pair}")

    old_hash = file_hash(filepath)

    # Fetch new data
    new_df = fetch_alpha_vantage_fx(pair)
    if new_df.empty:
        print(f"No new data fetched for {pair}")
        continue

    # Merge efficiently (drop duplicates directly)
    if not existing_df.empty:
        combined_df = pd.concat([existing_df, new_df])
        combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
    else:
        combined_df = new_df

    combined_df.sort_index(inplace=True)
    combined_df.to_csv(filepath)

    new_hash = file_hash(filepath)

    # Commit & push only if changed
    if old_hash != new_hash:
        print(f"🔄 Updating {pair} on GitHub...")
        os.chdir(SAVE_FOLDER)
        subprocess.run(["git", "add", filename], check=False)
        subprocess.run(["git", "commit", "-m", f"Update {pair} historical FX data"], check=False)
        subprocess.run(
            f"git push https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git {BRANCH}",
            shell=True
        )
        os.chdir("..")
    else:
        print(f"No changes for {pair}, skipping push.")

print("✅ All pairs processed.")


In [None]:
import os
import hashlib
import pandas as pd
import yfinance as yf
from pathlib import Path
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed

# -----------------------------
# CONFIGURATION
# -----------------------------
SAVE_FOLDER = "/content/forex-ai-models"  # Cloned GitHub repo folder
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)

GIT_NAME = "Abdul Rahim"
GIT_EMAIL = "nakatonabira3@gmail.com"

# -----------------------------
# GitHub Auth & Repo Info
# -----------------------------
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
GITHUB_TOKEN = "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"  # Classic PAT
BRANCH = "main"
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# -----------------------------
# Setup Git identity (once per session)
# -----------------------------
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)

# -----------------------------
# Helper function: file hash
# -----------------------------
def file_hash(filepath, chunk_size=8192):
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

# -----------------------------
# Ensure repo exists, pull latest, setup LFS
# -----------------------------
def ensure_repo():
    if not os.path.exists(os.path.join(SAVE_FOLDER, ".git")):
        if os.path.exists(SAVE_FOLDER):
            subprocess.run(["rm", "-rf", SAVE_FOLDER], check=True)
        print("📥 Cloning repo...")
        subprocess.run(["git", "clone", "-b", BRANCH, REPO_URL, SAVE_FOLDER], check=True)
    else:
        print("🔄 Repo exists, pulling latest changes...")
        subprocess.run(["git", "-C", SAVE_FOLDER, "fetch", "origin"], check=True)
        subprocess.run(["git", "-C", SAVE_FOLDER, "checkout", BRANCH], check=False)
        subprocess.run(["git", "-C", SAVE_FOLDER, "pull", "origin", BRANCH], check=False)

    # Install LFS and track CSV/PKL
    print("⚙️ Installing Git LFS...")
    subprocess.run("apt-get update && apt-get install git-lfs -y", shell=True)
    subprocess.run(["git", "lfs", "install"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "lfs", "track", "*.csv"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "lfs", "track", "*.pkl"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "add", ".gitattributes"], check=False)
    subprocess.run(
        f'git -C {SAVE_FOLDER} commit -m "Track CSV/PKL files with Git LFS" || echo "No changes to commit for .gitattributes"',
        shell=True
    )

ensure_repo()

# -----------------------------
# FX pairs and timeframes
# -----------------------------
fx_pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

timeframes = {
    "1m_7d": ("1m", "7d"),
    "5m_1mo": ("5m", "1mo"),
    "15m_60d": ("15m", "60d"),
    "1h_2y": ("1h", "2y"),
    "1d_5y": ("1d", "5y")
}

# -----------------------------
# Worker function for each pair/timeframe
# -----------------------------
def process_pair_tf(pair, tf_name, interval, period):
    symbol = pair.replace("/", "") + "=X"
    filename = f"{pair.replace('/', '_')}_{tf_name}.csv"
    filepath = os.path.join(SAVE_FOLDER, filename)

    # Load existing
    if os.path.exists(filepath) and os.path.getsize(filepath) > 0:
        existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True)
        print(f"Loaded existing {pair} {tf_name}, {existing_df.shape[0]} rows")
    else:
        existing_df = pd.DataFrame()
        print(f"No existing data for {pair} {tf_name}")

    old_hash = file_hash(filepath)

    # Fetch new data
    try:
        df = yf.download(
            symbol,
            period=period,
            interval=interval,
            progress=False,
            auto_adjust=False,
            threads=True
        )
    except Exception as e:
        return f"❌ Failed to fetch {pair} {tf_name}: {e}", None

    if df.empty:
        return f"⚠️ Skipped {pair} {tf_name}: No data", None

    # Standardize
    df = df[['Open', 'High', 'Low', 'Close']]
    df.columns = ['open', 'high', 'low', 'close']
    df.index = pd.to_datetime(df.index)

    # Merge with existing
    if not existing_df.empty:
        combined_df = pd.concat([existing_df, df])
        combined_df = combined_df[~combined_df.index.duplicated(keep="last")]
    else:
        combined_df = df

    combined_df.sort_index(inplace=True)
    combined_df.to_csv(filepath)

    new_hash = file_hash(filepath)

    # Only mark for commit if changed
    if old_hash != new_hash:
        return f"📌 Updated {pair} {tf_name}", filename
    else:
        return f"✅ No changes for {pair} {tf_name}", None

# -----------------------------
# Run all downloads in parallel
# -----------------------------
changed_files = []
tasks = []
with ThreadPoolExecutor(max_workers=8) as executor:
    for pair in fx_pairs:
        for tf_name, (interval, period) in timeframes.items():
            tasks.append(executor.submit(process_pair_tf, pair, tf_name, interval, period))

for future in as_completed(tasks):
    msg, filename = future.result()
    print(msg)
    if filename:
        changed_files.append(filename)

# -----------------------------
# Commit & push once if needed
# -----------------------------
if changed_files:
    print(f"🚀 Committing {len(changed_files)} updated files in one push...")
    subprocess.run(["git", "-C", SAVE_FOLDER, "add"] + changed_files, check=False)
    subprocess.run(
        ["git", "-C", SAVE_FOLDER, "commit", "-m", "Update multiple FX files"],
        check=False
    )
    subprocess.run(
        f"git -C {SAVE_FOLDER} push https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git {BRANCH}",
        shell=True
    )
else:
    print("✅ No changes detected, nothing to push.")

print("🎯 All yfinance pairs & timeframes processed (parallel, single push).")


In [None]:
import os
import pandas as pd
import hashlib
from pathlib import Path
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed

# -----------------------------
# CONFIGURATION
# -----------------------------
SAVE_FOLDER = "/content/forex-ai-models"
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)

GIT_NAME = "Abdul Rahim"
GIT_EMAIL = "nakatonabira3@gmail.com"

# Setup Git identity
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)

# -----------------------------
# GitHub repo info
# -----------------------------
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
GITHUB_TOKEN = "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"  # classic PAT
BRANCH = "main"
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# -----------------------------
# Ensure repo exists, pull latest, track LFS
# -----------------------------
def ensure_repo():
    if not os.path.exists(os.path.join(SAVE_FOLDER, ".git")):
        if os.path.exists(SAVE_FOLDER):
            subprocess.run(["rm", "-rf", SAVE_FOLDER], check=True)
        print("📥 Cloning repo...")
        subprocess.run(["git", "clone", "-b", BRANCH, REPO_URL, SAVE_FOLDER], check=True)
    else:
        print("🔄 Repo exists, pulling latest changes...")
        subprocess.run(["git", "-C", SAVE_FOLDER, "fetch", "origin"], check=True)
        subprocess.run(["git", "-C", SAVE_FOLDER, "checkout", BRANCH], check=False)
        subprocess.run(["git", "-C", SAVE_FOLDER, "pull", "origin", BRANCH], check=False)

    # Track combined CSVs with Git LFS
    subprocess.run(["git", "-C", SAVE_FOLDER, "lfs", "track", "*_combined.csv"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "add", ".gitattributes"], check=False)
    subprocess.run(
        f'git -C {SAVE_FOLDER} commit -m "Track combined CSVs with Git LFS" || echo "No changes for .gitattributes"',
        shell=True
    )

ensure_repo()

# -----------------------------
# Helper: file hash
# -----------------------------
def file_hash(filepath, chunk_size=8192):
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

# -----------------------------
# Helper: combine AV daily and YF higher-frequency data
# -----------------------------
def combine_fx_data(av_df, yf_df):
    if av_df is None or av_df.empty:
        return yf_df
    if yf_df is None or yf_df.empty:
        return av_df

    if av_df.index.tz is not None:
        av_df.index = av_df.index.tz_convert(None)
    if yf_df.index.tz is not None:
        yf_df.index = yf_df.index.tz_convert(None)

    combined_df = pd.merge(yf_df, av_df[['open','high','low','close']],
                           left_index=True, right_index=True,
                           how='outer', suffixes=('','_av'))

    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].fillna(combined_df[f'{col}_av'])
    combined_df.drop(columns=[f'{col}_av' for col in ['open','high','low','close']], errors='ignore', inplace=True)
    combined_df.sort_index(inplace=True)
    combined_df.dropna(subset=['open','high','low','close'], inplace=True)
    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].astype(float)
    return combined_df

# -----------------------------
# FX pairs and timeframes
# -----------------------------
pairs = ["EUR/USD","GBP/USD","USD/JPY","AUD/USD"]
timeframes = ["1m_7d", "5m_1mo", "15m_60d", "1h_2y", "1d_5y"]

# -----------------------------
# Worker function for one pair/timeframe
# -----------------------------
def process_pair_tf(pair, tf_name):
    av_file = os.path.join(SAVE_FOLDER, pair.replace("/","_")+"_daily.csv")
    av_df = pd.read_csv(av_file, index_col=0, parse_dates=True) if os.path.exists(av_file) else pd.DataFrame()

    yf_file = os.path.join(SAVE_FOLDER, f"{pair.replace('/','_')}_{tf_name}.csv")
    yf_df = pd.read_csv(yf_file, index_col=0, parse_dates=True) if os.path.exists(yf_file) else pd.DataFrame()

    combined_df = combine_fx_data(av_df, yf_df)
    if combined_df.empty:
        return f"No data to combine for {pair} {tf_name}", None

    combined_file = os.path.join(SAVE_FOLDER, f"{pair.replace('/','_')}_{tf_name}_combined.csv")
    old_hash = file_hash(combined_file)
    combined_df.to_csv(combined_file)
    new_hash = file_hash(combined_file)

    if old_hash != new_hash:
        return f"Changes detected for {pair} {tf_name}, saving combined CSV", combined_file
    else:
        return f"No changes for {pair} {tf_name}, skipping save", None

# -----------------------------
# Run all combinations in parallel
# -----------------------------
changed_files = []
tasks = []
with ThreadPoolExecutor(max_workers=8) as executor:
    for pair in pairs:
        for tf_name in timeframes:
            tasks.append(executor.submit(process_pair_tf, pair, tf_name))

for future in as_completed(tasks):
    msg, filename = future.result()
    print(msg)
    if filename:
        changed_files.append(filename)

# -----------------------------
# Commit & push once if any files changed
# -----------------------------
if changed_files:
    print(f"🚀 Committing {len(changed_files)} combined files in one push...")
    subprocess.run(["git", "-C", SAVE_FOLDER, "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "commit", "-m", "Update combined FX data"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "push", "origin", "main"], check=False)
else:
    print("✅ No combined files changed, nothing to push.")

print("✅ All FX pairs processed and combined (parallel, single push).")


In [None]:
import os
import requests
import re

def fetch_live_rate(pair):
    """
    Fetch live FX rate from X-Rates using Browserless.
    """
    from_currency, to_currency = pair.split('/')
    browserless_token = os.environ.get('BROWSERLESS_TOKEN')
    if not browserless_token:
        raise ValueError("Set BROWSERLESS_TOKEN in your environment variables")

    url = f"https://production-sfo.browserless.io/content?token={browserless_token}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}

    try:
        res = requests.post(url, json=payload)
        # Regex to extract the FX value
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0
    except Exception as e:
        print(f"Failed to fetch {pair}: {e}")
        return 0

# --- Fetch live prices for all pairs ---
pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
live_prices = {pair: fetch_live_rate(pair) for pair in pairs}

for pair, price in live_prices.items():
    print(f"{pair}: {price}")


In [None]:
import os
import pandas as pd
import hashlib
import numpy as np
import ta
from ta.momentum import WilliamsRIndicator
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import subprocess

# -----------------------------
# CONFIGURATION
# -----------------------------
SAVE_FOLDER = "/content/forex-ai-models"
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)

GIT_NAME = "Abdul Rahim"
GIT_EMAIL = "nakatonabira3@gmail.com"
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)

combined_save_path = os.path.join(SAVE_FOLDER, "combined_with_indicators")
Path(combined_save_path).mkdir(parents=True, exist_ok=True)

pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# -----------------------------
# Helper Functions
# -----------------------------
def file_hash(filepath, chunk_size=8192):
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

def combine_fx_data(av_df, yf_df):
    """Merge AlphaVantage + Yahoo Finance data"""
    if av_df is None or av_df.empty:
        return yf_df
    if yf_df is None or yf_df.empty:
        return av_df
    if av_df.index.tz is not None:
        av_df.index = av_df.index.tz_convert(None)
    if yf_df.index.tz is not None:
        yf_df.index = yf_df.index.tz_convert(None)
    combined_df = pd.merge(yf_df, av_df[['open','high','low','close']],
                           left_index=True, right_index=True,
                           how='outer', suffixes=('','_av'))
    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].fillna(combined_df[f'{col}_av'])
    combined_df.drop(columns=[f'{col}_av' for col in ['open','high','low','close']], inplace=True, errors='ignore')
    combined_df.sort_index(inplace=True)
    combined_df.dropna(subset=['open','high','low','close'], inplace=True)
    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].astype(float)
    return combined_df

def add_all_indicators(df):
    """Add trend, momentum, volatility, and support/resistance indicators"""
    if df is None or df.empty:
        return df
    df = df.copy()

    # Trend indicators
    trend_indicators = {
        'SMA_10': lambda df: ta.trend.sma_indicator(df['close'], window=10),
        'SMA_50': lambda df: ta.trend.sma_indicator(df['close'], window=50),
        'SMA_200': lambda df: ta.trend.sma_indicator(df['close'], window=200),
        'EMA_10': lambda df: ta.trend.ema_indicator(df['close'], window=10),
        'EMA_50': lambda df: ta.trend.ema_indicator(df['close'], window=50),
        'EMA_200': lambda df: ta.trend.ema_indicator(df['close'], window=200),
        'MACD': lambda df: ta.trend.macd(df['close']),
        'MACD_signal': lambda df: ta.trend.macd_signal(df['close']),
        'ADX': lambda df: ta.trend.adx(df['high'], df['low'], df['close'], window=14)
    }

    # Momentum indicators
    momentum_indicators = {
        'RSI_14': lambda df: ta.momentum.rsi(df['close'], window=14),
        'StochRSI': lambda df: ta.momentum.stochrsi(df['close'], window=14),
        'CCI': lambda df: ta.trend.cci(df['high'], df['low'], df['close'], window=20),
        'ROC': lambda df: ta.momentum.roc(df['close'], window=12),
        'Williams_%R': lambda df: WilliamsRIndicator(df['high'], df['low'], df['close'], lbp=14).williams_r()
    }

    # Volatility indicators
    volatility_indicators = {
        'Bollinger_High': lambda df: ta.volatility.bollinger_hband(df['close'], window=20, window_dev=2),
        'Bollinger_Low': lambda df: ta.volatility.bollinger_lband(df['close'], window=20, window_dev=2),
        'ATR': lambda df: ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14),
        'STDDEV_20': lambda df: df['close'].rolling(window=20).std(),
    }

    # Volume indicators (optional)
    volume_indicators = {}
    if 'volume' in df.columns:
        volume_indicators = {
            'OBV': lambda df: ta.volume.on_balance_volume(df['close'], df['volume']),
            'MFI': lambda df: ta.volume.money_flow_index(df['high'], df['low'], df['close'], df['volume'], window=14)
        }

    # Combine all indicators
    all_indicators = {**trend_indicators, **momentum_indicators, **volatility_indicators, **volume_indicators}
    for name, func in all_indicators.items():
        try:
            df[name] = func(df)
        except:
            df[name] = np.nan

    # Crossovers
    df['EMA_10_cross_EMA_50'] = (df['EMA_10'] > df['EMA_50']).astype(int)
    df['EMA_50_cross_EMA_200'] = (df['EMA_50'] > df['EMA_200']).astype(int)
    df['SMA_10_cross_SMA_50'] = (df['SMA_10'] > df['SMA_50']).astype(int)
    df['SMA_50_cross_SMA_200'] = (df['SMA_50'] > df['SMA_200']).astype(int)

    # Support/Resistance
    df['recent_high'] = df['high'].rolling(20).max()
    df['recent_low'] = df['low'].rolling(20).min()
    df['pivot_point'] = (df['high'] + df['low'] + df['close'])/3
    df['support_1'] = 2*df['pivot_point'] - df['high']
    df['resistance_1'] = 2*df['pivot_point'] - df['low']

    # Fill NaNs and normalize
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.ffill(inplace=True)
    df.bfill(inplace=True)
    df.fillna(0, inplace=True)

    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        scaler = MinMaxScaler()
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

    # Long/Short scores for GA
    df['long_score'] = df['EMA_10_cross_EMA_50'] + df['EMA_50_cross_EMA_200'] + df['SMA_10_cross_SMA_50']*0.5 + df['SMA_50_cross_SMA_200']*0.5 + df['ADX'] + df['RSI_14']
    df['short_score'] = (1 - df['EMA_10_cross_EMA_50']) + (1 - df['EMA_50_cross_EMA_200']) + (1 - df['SMA_10_cross_SMA_50'])*0.5 + (1 - df['SMA_50_cross_SMA_200'])*0.5 + (1 - df['ADX']) + (1 - df['RSI_14'])

    return df

# -----------------------------
# Worker function
# -----------------------------
def process_pair_file(pair, tf_file):
    av_file = os.path.join(SAVE_FOLDER, f"{pair.replace('/','_')}_daily.csv")
    try:
        av_df = pd.read_csv(av_file, index_col=0, parse_dates=True)
    except Exception:
        av_df = pd.DataFrame()
    tf_name = tf_file.replace(".csv","").replace(f"{pair.replace('/','_')}_","")
    try:
        yf_df = pd.read_csv(os.path.join(SAVE_FOLDER, tf_file), index_col=0, parse_dates=True)
    except Exception:
        print(f"⚠️ Failed to read {tf_file}")
        return None, f"{pair} {tf_name} (skipped)"
    combined_df = combine_fx_data(av_df, yf_df)
    combined_df = add_all_indicators(combined_df)

    save_file = os.path.join(combined_save_path, f"{pair.replace('/','_')}_{tf_name}_combined.pkl")
    old_hash = file_hash(save_file)
    combined_df.to_pickle(save_file, protocol=4)
    new_hash = file_hash(save_file)
    return save_file if old_hash != new_hash else None, f"{pair} {tf_name}"

# -----------------------------
# Run in parallel
# -----------------------------
changed_files = []
tasks = []

# Pre-filter files to process for better thread management
files_to_process = []
for pair in pairs:
    for tf_file in os.listdir(SAVE_FOLDER):
        if not tf_file.startswith(pair.replace('/','_')):
            continue
        # Exclude daily & already combined files
        if tf_file.endswith("daily.csv") or tf_file.endswith("_combined.csv") or tf_file.endswith("_combined.pkl"):
            continue
        files_to_process.append((pair, tf_file))

# Use optimal number of workers (minimum 1)
max_workers = max(1, min(8, len(files_to_process), (os.cpu_count() or 4) * 2))

with ThreadPoolExecutor(max_workers=max_workers) as executor:
    for pair, tf_file in files_to_process:
        tasks.append(executor.submit(process_pair_file, pair, tf_file))

    for future in as_completed(tasks):
        changed_file, msg = future.result()
        print(msg)
        if changed_file:
            changed_files.append(changed_file)

# -----------------------------
# Commit & push once
# -----------------------------
if changed_files:
    print(f"🚀 Committing {len(changed_files)} files in one push...")
    subprocess.run(["git", "-C", SAVE_FOLDER, "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "commit", "-m", "Update combined data + indicators"], check=False)
    subprocess.run(["git", "-C", SAVE_FOLDER, "push", "origin", "main"], check=False)
else:
    print("✅ No changes detected, nothing to push.")

print("✅ All FX pairs processed, combined, indicators added, and saved (parallel, single push).")


In [None]:
import os
import json
import numpy as np
import pandas as pd
import yfinance as yf
import ta
from ta.momentum import WilliamsRIndicator
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
import joblib
import datetime as dt
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import re

# -----------------------------
# CONFIG
# -----------------------------
PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
TIMEFRAMES = {
    "1m_7d": ("1m", "7d"),
    "5m_1mo": ("5m", "1mo"),
    "15m_60d": ("15m", "60d"),
    "1h_2y": ("1h", "2y"),
    "1d_5y": ("1d", "5y"),
}
INJECT_CANDLES = 5
MODEL_DIR = Path("models")
MODEL_DIR.mkdir(exist_ok=True)

BROKER_JSON = "broker_signals.json"
BROKER_LOG = "broker_signals_log.csv"

# -----------------------------
# Delete old models to prevent feature mismatch
# -----------------------------
for f in MODEL_DIR.glob("*.pkl"):
    f.unlink()

# -----------------------------
# LIVE PRICE FETCH
# -----------------------------
def fetch_live_rate(pair):
    from_currency, to_currency = pair.split('/')
    browserless_token = os.environ.get('BROWSERLESS_TOKEN')
    if not browserless_token:
        return 0
    url = f"https://production-sfo.browserless.io/content?token={browserless_token}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}
    try:
        res = requests.post(url, json=payload)
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0
    except:
        return 0

live_prices = {pair: fetch_live_rate(pair) for pair in PAIRS}

# -----------------------------
# DATA & INDICATORS
# -----------------------------
def fetch_data(symbol, interval, period):
    df = yf.download(symbol.replace('/', '') + "=X", interval=interval, period=period,
                     progress=False, auto_adjust=True)
    df.dropna(inplace=True)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]
    return df

def inject_live_price(df, live_price, n_candles=INJECT_CANDLES):
    df_copy = df.copy()
    n_inject = min(n_candles, len(df_copy))
    for col in ["Open","High","Low","Close"]:
        if col in df_copy.columns:
            df_copy.iloc[-n_inject:, df_copy.columns.get_loc(col)] = live_price
    return df_copy

def add_all_indicators(df):
    if df is None or df.empty:
        return df
    df = df.copy()

    # Classic indicators
    df["SMA_10"] = ta.trend.sma_indicator(df["Close"], 10)
    df["SMA_50"] = ta.trend.sma_indicator(df["Close"], 50)
    df["EMA_10"] = ta.trend.ema_indicator(df["Close"], 10)
    df["EMA_50"] = ta.trend.ema_indicator(df["Close"], 50)
    df["EMA_200"] = ta.trend.ema_indicator(df["Close"], 200)
    df["MACD"] = ta.trend.macd(df["Close"])
    df["MACD_signal"] = ta.trend.macd_signal(df["Close"])
    df["ADX"] = ta.trend.adx(df["High"], df["Low"], df["Close"], window=14)
    df["WilliamsR"] = WilliamsRIndicator(df["High"], df["Low"], df["Close"], lbp=14).williams_r()
    df["Bollinger_High"] = ta.volatility.bollinger_hband(df["Close"], window=20)
    df["Bollinger_Low"] = ta.volatility.bollinger_lband(df["Close"], window=20)
    df["ATR"] = ta.volatility.average_true_range(df["High"], df["Low"], df["Close"], window=14)

    # ML features
    close = df["Close"].values
    df["return"] = np.concatenate([[0], (close[1:] - close[:-1])/close[:-1]])
    df["ma_fast"] = pd.Series(close).rolling(5).mean()
    df["ma_slow"] = pd.Series(close).rolling(20).mean()
    delta = np.diff(close, prepend=close[0])
    gain = np.where(delta>0, delta, 0)
    loss = np.where(delta<0, -delta, 0)
    avg_gain = pd.Series(gain).rolling(14).mean()
    avg_loss = pd.Series(loss).rolling(14).mean()
    rs = avg_gain / avg_loss.replace(0, 1e-9)
    df["rsi"] = 100 - (100/(1+rs))

    # Fill NaNs and scale
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        scaler = MinMaxScaler()
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    return df

def generate_features(df):
    df_features = add_all_indicators(df)
    feat = df_features.select_dtypes(include=[np.number])
    return feat

def make_labels(df):
    future = df["Close"].shift(-1)
    signal = np.where(future>df["Close"], 1, np.where(future<df["Close"], -1, 0))
    return signal[:-1]

# -----------------------------
# MODEL TRAIN/LOAD
# -----------------------------
def train_or_load_model(features, labels, model_path):
    if features.empty or len(labels)==0:
        return None, []
    trained_features = list(features.columns)
    model = SGDClassifier(loss="log_loss", max_iter=1000, tol=1e-3)
    model.fit(features, labels)
    joblib.dump({"model": model, "features": trained_features}, model_path)
    return model, trained_features

def hybrid_signal(model, features, trained_features):
    if model is None or features.empty:
        return pd.Series([0]*len(features), index=features.index)
    for col in trained_features:
        if col not in features.columns:
            features[col] = 0
    features_aligned = features[trained_features].copy()
    features_aligned.fillna(0, inplace=True)
    preds = model.predict(features_aligned)
    return pd.Series(preds, index=features.index)

# -----------------------------
# PROCESS ONE PAIR/TIMEFRAME
# -----------------------------
def process_pair_tf(pair, tf_name, interval, period, live_price):
    df = fetch_data(pair, interval, period)
    if len(df)<2:
        return None, None, None
    df_live = inject_live_price(df, live_price)
    features = generate_features(df_live)
    labels = make_labels(df_live)
    features = features.iloc[:len(labels)]
    model_path = MODEL_DIR / f"{pair.replace('/','_')}_{tf_name}.pkl"
    model, trained_features = train_or_load_model(features, labels, model_path)
    signals = hybrid_signal(model, features, trained_features)
    df_live = df_live.iloc[:len(signals)].copy()
    df_live["hybrid_signal"] = signals.values
    latest_signal = int(df_live["hybrid_signal"].iloc[-1])
    long_count = int((df_live["hybrid_signal"]==1).sum())
    short_count = int((df_live["hybrid_signal"]==-1).sum())
    hold_count = int((df_live["hybrid_signal"]==0).sum())
    result = {
        "pair": pair,
        "timeframe": tf_name,
        "long": long_count,
        "short": short_count,
        "hold": hold_count,
        "latest_signal": latest_signal,
        "live_price": live_price
    }
    return result, df, df_live

# -----------------------------
# RUN HYBRID PIPELINE
# -----------------------------
def run_hybrid():
    broker_output = {"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(), "pairs": {}}
    log_rows = []
    tasks = []
    with ThreadPoolExecutor(max_workers=6) as executor:
        for pair in PAIRS:
            live_price = live_prices[pair]
            broker_output["pairs"][pair] = {}
            for tf_name, (interval, period) in TIMEFRAMES.items():
                tasks.append(executor.submit(process_pair_tf, pair, tf_name, interval, period, live_price))
        for future in as_completed(tasks):
            result, df, df_live = future.result()
            if result is None:
                continue
            pair, tf_name = result["pair"], result["timeframe"]
            broker_output["pairs"][pair][tf_name] = {
                "long": result["long"],
                "short": result["short"],
                "hold": result["hold"],
                "latest_signal": result["latest_signal"],
                "live_price": float(result["live_price"])
            }
            log_rows.append({
                "timestamp": dt.datetime.now(dt.timezone.utc),
                "pair": pair,
                "timeframe": tf_name,
                **{k: result[k] for k in ["long","short","hold","latest_signal"]},
                "live_price": result["live_price"]
            })
    # Save JSON
    with open(BROKER_JSON, "w") as f:
        json.dump(broker_output, f, indent=2)
    # Append CSV log
    log_df = pd.DataFrame(log_rows)
    if not os.path.exists(BROKER_LOG):
        log_df.to_csv(BROKER_LOG, index=False)
    else:
        log_df.to_csv(BROKER_LOG, mode="a", header=False, index=False)
    print(f"💾 Broker JSON saved: {BROKER_JSON}")
    print(f"📑 Signals logged to CSV: {BROKER_LOG}")
    return broker_output

# -----------------------------
# RUN SCRIPT
# -----------------------------
if __name__ == "__main__":
    signals = run_hybrid()
    print("\n📊 Latest Signals JSON:\n", json.dumps(signals, indent=2))


In [None]:
import os
import pandas as pd
from pathlib import Path

# -----------------------------
# CONFIG
# -----------------------------
CSV_FOLDER = "/content/forex-ai-models"  # folder where fetched CSVs are
SAVE_FOLDER = "./combined_data"          # folder for backtest-ready .pkl files
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)

pairs = ["EUR/USD","GBP/USD","USD/JPY","AUD/USD"]

# -----------------------------
# Helper function: temporary hybrid signal
# -----------------------------
def generate_temp_signal(df, fast=5, slow=20):
    if len(df) < slow:
        return pd.Series([0]*len(df), index=df.index)
    fast_ma = df['close'].rolling(fast).mean()
    slow_ma = df['close'].rolling(slow).mean()
    signal = (fast_ma - slow_ma).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
    return signal.fillna(0)

# -----------------------------
# Process all CSVs
# -----------------------------
for pair in pairs:
    csv_files = list(Path(CSV_FOLDER).glob(f"{pair.replace('/','_')}_*_combined.csv"))

    if not csv_files:
        print(f"⚠️ No CSV files found for {pair}, skipping.")
        continue

    for csv_file in csv_files:
        try:
            df = pd.read_csv(csv_file, index_col=0, parse_dates=True)

            # Check required OHLC columns
            for col in ['open','high','low','close']:
                if col not in df.columns:
                    print(f"⚠️ {csv_file} missing column {col}, skipping.")
                    continue

            # Generate temporary hybrid_signal based on MA
            df['hybrid_signal'] = generate_temp_signal(df)

            # Compute ATR if missing
            if 'atr' not in df.columns:
                high = df['high'].values
                low = df['low'].values
                close = df['close'].values
                tr = pd.Series(
                    [max(h-l, abs(h-close[i-1]), abs(l-close[i-1])) if i>0 else h-l
                     for i,(h,l) in enumerate(zip(high,low))]
                )
                df['atr'] = tr.rolling(14).mean().fillna(1e-5).clip(lower=1e-5)

            # Save as pickle
            pkl_file = Path(SAVE_FOLDER) / f"{csv_file.stem}.pkl"
            df.to_pickle(pkl_file)
            print(f"✅ Saved {pkl_file} with temporary hybrid_signal")

        except Exception as e:
            print(f"❌ Failed to process {csv_file}: {e}")

print("🎯 All CSVs processed, hybrid_signal generated, and converted to .pkl for backtest.")


In [None]:
#!/usr/bin/env python3
"""
Hybrid Vectorized Backtest + GA + Momentum-aware Live Browserless Signals + Email
- Multi-timeframe calculations preserved
- GA loads previous population and continues from last generation
- Live signals with SL, TP, and 1-100 scoring
- High-confidence trade flags included
- Signals sent via Gmail (hardcoded App password)
- GA evaluation fully parallelized
"""

import os, json, pickle, random, re
from pathlib import Path
import numpy as np
import pandas as pd
import requests
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from joblib import Parallel, delayed

# -----------------------------
# CONFIG
# -----------------------------
SAVE_FOLDER = "/content/forex-ai-models/combined_with_indicators"
Path(SAVE_FOLDER).mkdir(parents=True, exist_ok=True)
BEST_CHROM_FILE = "/content/forex-ai-models/best_chromosome.pkl"
TRADE_MEMORY_FILE = "/content/forex-ai-models/trade_memory.pkl"
POPULATION_FILE = "/content/forex-ai-models/population.pkl"
GEN_COUNT_FILE = "/content/forex-ai-models/generation_count.pkl"
SIGNALS_JSON_PATH = "/content/forex-ai-models/broker_signals.json"

pairs = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD']

ATR_PERIOD = 14
MIN_ATR = 1e-5
BASE_CAPITAL = 100
MAX_POSITION_FRACTION = 0.1

POPULATION_SIZE = 12
GENERATIONS = 10
MUTATION_RATE = 0.2
EARLY_STOPPING = 5
TOURNAMENT_SIZE = 3
EPS = 1e-8

# -----------------------------
# Gmail App password
# -----------------------------
GMAIL_USER = "nakatonabira3@gmail.com"
GMAIL_APP_PASSWORD = "gmwohahtltmcewug"  # <-- your App password

# -----------------------------
# Browserless fetch
# -----------------------------
def fetch_live_rate(pair: str, timeout: int = 8) -> float:
    from_currency, to_currency = pair.split('/')
    token = os.environ.get('BROWSERLESS_TOKEN')
    if not token: return 0.0
    url = f"https://production-sfo.browserless.io/content?token={token}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}
    try:
        res = requests.post(url, json=payload, timeout=timeout)
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0.0
    except Exception as e:
        print(f"⚠️ fetch_live_rate error for {pair}: {e}")
        return 0.0

# -----------------------------
# Data prep
# -----------------------------
def make_index_tz_naive(df):
    if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None:
        return df.tz_convert(None)
    return df

def seed_hybrid_signal_if_needed(df):
    if 'hybrid_signal' not in df.columns: df['hybrid_signal'] = 0.0
    if df['hybrid_signal'].abs().sum() == 0:
        fast = df['close'].rolling(10, min_periods=1).mean()
        slow = df['close'].rolling(50, min_periods=1).mean()
        df['hybrid_signal'] = (fast - slow).fillna(0)
    df['hybrid_signal'] = df['hybrid_signal'].fillna(0).astype(float)
    return df

def ensure_atr(df):
    if 'atr' in df.columns and not df['atr'].isnull().all():
        df['atr'] = df['atr'].fillna(MIN_ATR).clip(lower=MIN_ATR)
        return df
    high, low, close = df['high'].values, df['low'].values, df['close'].values
    tr = np.maximum.reduce([
        high - low,
        np.abs(high - np.roll(close, 1)),
        np.abs(low - np.roll(close, 1))
    ])
    tr[0] = (high[0]-low[0]) if len(tr)>0 else MIN_ATR
    df['atr'] = pd.Series(tr, index=df.index).rolling(ATR_PERIOD, min_periods=1).mean().fillna(MIN_ATR).clip(lower=MIN_ATR)
    return df

def load_combined_data(folder):
    combined_data = {}
    precomputed = {}
    for pair in pairs:
        combined_data[pair] = {}
        prefix = pair.replace('/','_')
        for p in Path(folder).glob(f"{prefix}_*_combined.pkl"):
            tf_name = p.name.replace(f"{prefix}_","").replace("_combined.pkl","")
            try:
                df = pd.read_pickle(p)
                df = make_index_tz_naive(df)
                if not all(c in df.columns for c in ['open','high','low','close']): continue
                df = seed_hybrid_signal_if_needed(df)
                df = ensure_atr(df)
                combined_data[pair][tf_name] = df
            except: pass
    return combined_data

# -----------------------------
# Vectorized Backtest
# -----------------------------
def run_vector_backtest_vectorized(combined_data, capital, base_risk, atr_sl, atr_tp, conf_mult, tf_weights_per_pair, trade_memory=None):
    if trade_memory is None: trade_memory = {pair: [] for pair in combined_data.keys()}
    results = {}
    epsilon = 1e-8

    # Precompute signals, prices, atr for all pairs
    precomputed = {}
    for pair, tfs in combined_data.items():
        if not tfs:
            results[pair] = {'equity_curve': pd.Series([capital]), 'total_pnl':0,'max_drawdown':0}
            continue
        all_idxs = sorted(set().union(*[set(df.index) for df in tfs.values()]))
        df_all = pd.DataFrame(index=all_idxs)
        for tf_name, df in tfs.items():
            df_all[f'close_{tf_name}'] = df['close'].reindex(df_all.index).ffill()
            df_all[f'signal_{tf_name}'] = df['hybrid_signal'].reindex(df_all.index).ffill().fillna(0.0)
            df_all[f'atr_{tf_name}'] = df['atr'].reindex(df_all.index).ffill().fillna(MIN_ATR).clip(lower=MIN_ATR)
        df_all['price'] = df_all[[c for c in df_all.columns if c.startswith('close_')]].mean(axis=1).clip(lower=epsilon)
        df_all['atr'] = df_all[[c for c in df_all.columns if c.startswith('atr_')]].mean(axis=1).clip(lower=MIN_ATR)
        precomputed[pair] = df_all

    # Evaluate
    for pair, df_all in precomputed.items():
        tfs = combined_data.get(pair, {})
        if not tfs:
            continue
        agg_signal = sum([df_all[f'signal_{tf}']*tf_weights_per_pair.get(pair,{}).get(tf,0.0) for tf in tfs.keys()])
        mean_abs_signal = np.mean([df_all[f'signal_{tf}'].abs().mean() for tf in tfs.keys()]) if tfs else 0.0
        conf_threshold = conf_mult * (mean_abs_signal + EPS)
        df_all['agg_signal'] = np.where(np.abs(agg_signal)>=conf_threshold, agg_signal, 0.0)

        price, atr, agg_signal = df_all['price'].values, df_all['atr'].values, df_all['agg_signal'].values
        n = len(price)
        if n<=1:
            results[pair] = {'equity_curve': pd.Series([capital]), 'total_pnl':0,'max_drawdown':0}
            continue

        memory_factor = 1.0
        if trade_memory.get(pair):
            total_prev_pnl = sum([float(tr.get('pnl',0)) for tr in trade_memory[pair]])
            memory_factor = max(0.2, 1.0 + total_prev_pnl / max(1.0, capital*10.0))

        size = (capital*base_risk*np.abs(agg_signal))/(atr_sl*(atr/price)+EPS)
        size = np.minimum(size*memory_factor, capital*MAX_POSITION_FRACTION)
        size = np.nan_to_num(size,nan=0.0,posinf=capital*MAX_POSITION_FRACTION)
        direction = np.sign(agg_signal)
        pnl = direction*size*(atr_tp*atr/price)
        equity = np.zeros(n,dtype=float)
        equity[0]=capital
        for i in range(1,n): equity[i] = equity[i-1]+float(pnl[i])
        trade_memory.setdefault(pair,[]).append({'equity':float(equity[-1]), 'pnl':float(equity[-1]-capital)})
        if len(trade_memory[pair])>200: trade_memory[pair] = trade_memory[pair][-200:]
        equity_series = pd.Series(equity,index=df_all.index)
        results[pair] = {'equity_curve': equity_series,'total_pnl':float(equity[-1]-capital),'max_drawdown':float((equity_series.cummax()-equity_series).max())}

    total_pnl = sum([r['total_pnl'] for r in results.values()])
    max_dd = max([r['max_drawdown'] for r in results.values()] or [0.0])
    score = total_pnl/(1.0+max_dd) if (1.0+max_dd)!=0 else total_pnl
    return score, results, trade_memory

# -----------------------------
# GA functions
# -----------------------------
def build_tf_names(combined_data): return {pair: sorted(list(combined_data[pair].keys())) for pair in pairs}

def create_chromosome(tf_names_map):
    chrom = [random.uniform(1.0,2.0), random.uniform(2.0,4.0), random.uniform(0.005,0.02), random.uniform(0.3,0.7)]
    for pair in pairs:
        n=max(1,len(tf_names_map.get(pair,[])))
        w=np.random.dirichlet(np.ones(n)).tolist()
        chrom.extend(w)
    return chrom

def decode_chromosome(chrom,tf_names_map):
    atr_sl, atr_tp, base_risk, conf = chrom[:4]
    tf_weights_per_pair={}
    idx=4
    for pair in pairs:
        n=max(1,len(tf_names_map.get(pair,[])))
        w=np.array(chrom[idx:idx+n],dtype=float)
        if w.sum()<=0: w=np.ones_like(w)/float(len(w))
        else: w=w/(w.sum()+EPS)
        tf_list=tf_names_map.get(pair,[])
        tf_weights_per_pair[pair]={tf: float(weight) for tf,weight in zip(tf_list,w)} if tf_list else {}
        idx+=n
    return atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair

def tournament_selection(scored_population,k=TOURNAMENT_SIZE):
    selected=random.sample(scored_population,k)
    selected.sort(reverse=True,key=lambda x:x[0])
    return selected[0][1]

# -----------------------------
# GA runner (parallelized)
# -----------------------------
def run_ga_vectorized_parallel(combined_data, generations=GENERATIONS, population_size=POPULATION_SIZE, mutation_rate=MUTATION_RATE):
    tf_names_map=build_tf_names(combined_data)
    if os.path.exists(POPULATION_FILE):
        try: population=pickle.load(open(POPULATION_FILE,'rb'))
        except: population=[create_chromosome(tf_names_map) for _ in range(population_size)]
    else: population=[create_chromosome(tf_names_map) for _ in range(population_size)]
    trade_memory={}
    if os.path.exists(TRADE_MEMORY_FILE):
        try: trade_memory=pickle.load(open(TRADE_MEMORY_FILE,'rb'))
        except: trade_memory={}
    last_gen=0
    if os.path.exists(GEN_COUNT_FILE):
        try: last_gen=pickle.load(open(GEN_COUNT_FILE,'rb'))
        except: last_gen=0

    best_score_ever=-np.inf
    best_chrom_ever=None
    early_stop_counter=0

    def evaluate_chrom(chrom):
        atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair = decode_chromosome(chrom, tf_names_map)
        score, _, _ = run_vector_backtest_vectorized(combined_data, BASE_CAPITAL, base_risk, atr_sl, atr_tp, conf, tf_weights_per_pair, trade_memory)
        return score, chrom

    for gen in range(last_gen+1,last_gen+1+generations):
        scored_population = Parallel(n_jobs=-1)(delayed(evaluate_chrom)(c) for c in population)
        scored_population.sort(reverse=True,key=lambda x:x[0])
        best_score,best_chrom=scored_population[0]
        if best_score<best_score_ever:
            best_score=best_score_ever
            best_chrom=best_chrom_ever

        print(f"=== Generation {gen} === Best Score: {best_score:.2f}")
        if best_score>best_score_ever:
            best_score_ever=best_score
            best_chrom_ever=best_chrom
            early_stop_counter=0
        else:
            early_stop_counter+=1
            if early_stop_counter>=EARLY_STOPPING:
                print("⚠️ Early stopping triggered.")
                break

        # Generate next population
        next_population=[best_chrom]
        while len(next_population)<population_size:
            parent1=tournament_selection(scored_population)
            parent2=tournament_selection(scored_population)
            child=[(p1+p2)/2.0 for p1,p2 in zip(parent1,parent2)]
            child=[c*random.uniform(0.95,1.05) if random.random()<mutation_rate else c for c in child]
            next_population.append(child)
        population=next_population

        # Save state
        pickle.dump(population,open(POPULATION_FILE,'wb'))
        pickle.dump(trade_memory,open(TRADE_MEMORY_FILE,'wb'))
        pickle.dump(best_chrom_ever,open(BEST_CHROM_FILE,'wb'))
        pickle.dump(gen,open(GEN_COUNT_FILE,'wb'))

    print("✅ GA complete. Best chromosome saved.")
    return best_chrom_ever, trade_memory

# -----------------------------
# Live signals with high-confidence flag
# -----------------------------
def generate_live_signals_with_sl_tp(best_chrom, combined_data):
    tf_names_map = build_tf_names(combined_data)
    atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair = decode_chromosome(best_chrom, tf_names_map)

    live_signals = {}
    prev_signals = {}

    if os.path.exists(SIGNALS_JSON_PATH):
        try:
            prev_data = json.load(open(SIGNALS_JSON_PATH, 'r'))
            prev_signals = {pair: data.get('strength', 0.0) for pair, data in prev_data.get("pairs", {}).items()}
        except:
            prev_signals = {}

    for pair in pairs:
        tfs = combined_data.get(pair, {})

        price = fetch_live_rate(pair)
        if price <= 0:
            price = np.mean([df['close'].iloc[-1] for df in tfs.values()]) if tfs else 1.0

        signal_strength = sum([
            tf_weights_per_pair.get(pair, {}).get(tf, 0.0) * tfs[tf]['hybrid_signal'].iloc[-1]
            for tf in tf_names_map.get(pair, [])
        ])

        prev_strength = prev_signals.get(pair, 0.0)
        if np.sign(signal_strength) != np.sign(prev_strength):
            signal_strength = 0.7 * prev_strength + 0.3 * signal_strength

        direction = "BUY" if signal_strength > 0 else "SELL" if signal_strength < 0 else "HOLD"

        recent_atr = np.mean([tfs[tf]['atr'].iloc[-1] for tf in tf_names_map.get(pair, [])]) if tfs else 1.0

        score_100 = min(max(int(100*(abs(signal_strength)/(recent_atr+EPS))**0.5), 1), 100)

        sl_multiplier = 0.5
        tp_multiplier = 1.0
        SL = price - atr_sl * recent_atr * sl_multiplier if direction == "BUY" else price + atr_sl * recent_atr * sl_multiplier
        TP = price + atr_tp * recent_atr * tp_multiplier if direction == "BUY" else price - atr_tp * recent_atr * tp_multiplier

        # High-confidence flag
        high_confidence = score_100 >= 80

        live_signals[pair] = {
            "direction": direction,
            "strength": float(signal_strength),
            "score_1_100": score_100,
            "last_price": float(price),
            "SL": float(SL),
            "TP": float(TP),
            "high_confidence": high_confidence
        }

    with open(SIGNALS_JSON_PATH, 'w') as f:
        json.dump({"timestamp": pd.Timestamp.now().isoformat(), "pairs": live_signals}, f, indent=2)

    print(f"📡 Live signals with SL/TP generated and saved to {SIGNALS_JSON_PATH}")
    return live_signals

# -----------------------------
# Email builder with high-confidence visual flag
# -----------------------------
def send_forex_email(signals, recipient="nakatonabira3@gmail.com"):
    def format_price(price, pair=""):
        decimals = 3 if "JPY" in pair else 4
        return f"{price:.{decimals}f}" if price else "-"
    today = pd.Timestamp.now().strftime("%Y-%m-%d")
    table_rows = ""
    flags_map = {"USD":"🇺🇸","EUR":"🇪🇺","GBP":"🇬🇧","JPY":"🇯🇵","AUD":"🇦🇺"}
    for pair, data in signals.items():
        f1,f2 = pair.split("/")
        flag_str = f"{flags_map.get(f1,'')} {flags_map.get(f2,'')}"
        high_conf = "🔥" if data.get("high_confidence") else ""
        table_rows += f"""
        <tr>
          <td>{flag_str} {pair}</td>
          <td>{format_price(data['last_price'], pair)}</td>
          <td>{data['direction']}</td>
          <td>{data['score_1_100']} {high_conf}</td>
          <td>SL:{format_price(data['SL'], pair)} | TP:{format_price(data['TP'], pair)}</td>
        </tr>
        """
    html = f"""
    <html>
      <body>
        <h2>Forex Signals - {today}</h2>
        <table border="1" style="border-collapse:collapse;text-align:center;">
          <tr><th>Instrument</th><th>Price</th><th>Signal</th><th>Score</th><th>SL/TP</th></tr>
          {table_rows}
        </table>
      </body>
    </html>
    """
    msg = MIMEMultipart("alternative")
    msg['From'] = f"Forex Bot <{GMAIL_USER}>"
    msg['To'] = recipient
    msg['Subject'] = f"Forex Signals - {today}"
    msg.attach(MIMEText(html, "html"))
    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as server:
            server.login(GMAIL_USER, GMAIL_APP_PASSWORD)
            server.sendmail(GMAIL_USER, recipient, msg.as_string())
        print(f"📧 Email sent to {recipient}")
    except Exception as e:
        print(f"⚠️ Email send failed: {e}")

# -----------------------------
# MAIN
# -----------------------------
if __name__=="__main__":
    print("Loading combined data...")
    combined_data = load_combined_data(SAVE_FOLDER)

    print("🎯 Running GA + vectorized backtest (parallelized)...")
    best_chrom, trade_memory = run_ga_vectorized_parallel(combined_data)

    print("📡 Generating live signals with SL/TP, 1-100 score, and high-confidence flag...")
    signals = generate_live_signals_with_sl_tp(best_chrom, combined_data)

    print(json.dumps(signals, indent=2))

    print("📨 Sending email with signals...")
    send_forex_email(signals, recipient="nakatonabira3@gmail.com")
