In [1]:
# ======================================================
# Notebook Initialization - Safe for Colab & GitHub Actions
# ======================================================
import os
from pathlib import Path
import sys

# -----------------------------
# 1️⃣ Detect environment
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

# -----------------------------
# 2️⃣ Safe working folder
# -----------------------------
if IN_COLAB:
    SAVE_FOLDER = Path("/content/forex-alpha-models")
else:
    # GitHub Actions or local
    SAVE_FOLDER = Path("./forex-alpha-models")

SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory set to: {SAVE_FOLDER.resolve()}")

# -----------------------------
# 3️⃣ Git config (headless-safe)
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

os.system(f'git config --global user.name "{GIT_NAME}"')
os.system(f'git config --global user.email "{GIT_EMAIL}"')
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 4️⃣ Read tokens safely
# -----------------------------
FOREX_PAT = os.environ.get("FOREX_PAT")
BROWSERLESS_TOKEN = os.environ.get("BROWSERLESS_TOKEN")

if not FOREX_PAT and IN_GHA:
    print("⚠️ Warning: FOREX_PAT not found in GitHub Actions secrets")
if not BROWSERLESS_TOKEN:
    print("⚠️ Warning: BROWSERLESS_TOKEN not found")

# -----------------------------
# 5️⃣ Optional: safe repo paths for outputs & pickles
# -----------------------------
PICKLE_FOLDER = SAVE_FOLDER / "pickles"
PICKLE_FOLDER.mkdir(parents=True, exist_ok=True)  # ✅ parents=True fixes FileNotFoundError

CSV_FOLDER = SAVE_FOLDER / "csvs"
CSV_FOLDER.mkdir(parents=True, exist_ok=True)     # ✅ same here

print(f"✅ Output folders ready: {PICKLE_FOLDER}, {CSV_FOLDER}")

# -----------------------------
# 6️⃣ Python environment info (debug)
# -----------------------------
print(f"Python version: {sys.version}")
print(f"Current directory: {os.getcwd()}")


✅ Working directory set to: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
✅ Output folders ready: forex-alpha-models/pickles, forex-alpha-models/csvs
Python version: 3.11.13 (main, Jun  4 2025, 04:12:12) [GCC 13.3.0]
Current directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models


In [2]:
!pip install mplfinance firebase-admin dropbox requests beautifulsoup4 pandas numpy ta yfinance pyppeteer nest_asyncio lightgbm joblib matplotlib alpha_vantage tqdm scikit-learn












In [3]:
# =========================================
# Fully Automatic Fresh-Run GitHub Workflow (PAT-safe)
# =========================================

import os
import subprocess
import shutil

# -----------------------------
# 0️⃣ User Config
# -----------------------------
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
REPO_FOLDER = GITHUB_REPO  # Local folder
GIT_USER_EMAIL = "nakatonabira3@gmail.com"

# Use environment variable for token
GITHUB_PAT = os.environ.get("FOREX_PAT")
if not GITHUB_PAT:
    raise ValueError("❌ Token not set! Define environment variable FOREX_PAT in Colab or GitHub Actions secret.")

BRANCH = "main"

# -----------------------------
# 1️⃣ Helper function to run shell safely
# -----------------------------
def safe_run(cmd, shell=True, check=True):
    """Run shell command safely with logging."""
    try:
        subprocess.run(cmd, shell=shell, check=check)
    except subprocess.CalledProcessError as e:
        print(f"⚠️ Command failed: {cmd}\n   Reason: {e}")

# -----------------------------
# 2️⃣ Detect sudo availability
# -----------------------------
USE_SUDO = shutil.which("sudo") is not None

# -----------------------------
# 3️⃣ Install Git and Git LFS if missing
# -----------------------------
if shutil.which("git") is None:
    cmd = "apt-get update -qq && apt-get install -y git"
    if USE_SUDO:
        cmd = "sudo " + cmd
    safe_run(cmd)
else:
    print("✅ Git already installed.")

if shutil.which("git-lfs") is None:
    cmd = "apt-get install -y git-lfs"
    if USE_SUDO:
        cmd = "sudo " + cmd
    safe_run(cmd)
else:
    print("✅ Git LFS already installed.")

safe_run("git lfs install")

# -----------------------------
# 4️⃣ Remove local repo for fresh run
# -----------------------------
if os.path.exists(REPO_FOLDER):
    print(f"🗑️ Removing existing local repo '{REPO_FOLDER}' for a fresh run...")
    shutil.rmtree(REPO_FOLDER)

# -----------------------------
# 5️⃣ Configure Git identity
# -----------------------------
print("🔧 Configuring Git identity...")
safe_run(f'git config --global user.name "{GITHUB_USERNAME}"')
safe_run(f'git config --global user.email "{GIT_USER_EMAIL}"')

# -----------------------------
# 6️⃣ Clone repo fresh (PAT-safe)
# -----------------------------
REPO_URL = f"https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"
print(f"📥 Cloning repo '{REPO_FOLDER}' from GitHub...")
safe_run(f"git clone {REPO_URL}")

orig_dir = os.getcwd()
os.chdir(REPO_FOLDER)

# -----------------------------
# 6️⃣a Pull LFS-tracked files
# -----------------------------
print("📦 Pulling Git LFS-tracked files...")
safe_run(f"git lfs pull https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git")

# -----------------------------
# 7️⃣ Track CSV/PKL files with Git LFS
# -----------------------------
print("📌 Tracking CSV/PKL files with Git LFS...")
safe_run("git lfs track '*.csv'")
safe_run("git lfs track '*.pkl'")
safe_run("git add .gitattributes")
safe_run('git commit -m "Track CSV/PKL files with Git LFS" || echo "No .gitattributes changes"')

# -----------------------------
# 8️⃣ Stage, commit, and push changes safely (PAT-safe)
# -----------------------------
print("📂 Staging all new/modified files...")
safe_run("git add -A")

# Only commit if there are changes
status_result = subprocess.run("git status --porcelain", shell=True, capture_output=True, text=True)
if status_result.stdout.strip():
    safe_run('git commit -m "Auto-update: new or modified files"')
    print("🚀 Pushing changes to GitHub (PAT-safe)...")
    safe_run(f"git push https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git {BRANCH}")
else:
    print("✅ No changes detected, skipping commit/push.")

# -----------------------------
# 9️⃣ List LFS-tracked files
# -----------------------------
print("📋 LFS-tracked files:")
safe_run("git lfs ls-files")

# -----------------------------
# 10️⃣ Return to original directory
# -----------------------------
os.chdir(orig_dir)
print("✅ Fresh-run GitHub repo workflow complete!")


✅ Git already installed.
✅ Git LFS already installed.
Updated Git hooks.
Git LFS initialized.
🗑️ Removing existing local repo 'forex-ai-models' for a fresh run...
🔧 Configuring Git identity...


📥 Cloning repo 'forex-ai-models' from GitHub...


Cloning into 'forex-ai-models'...


📦 Pulling Git LFS-tracked files...
📌 Tracking CSV/PKL files with Git LFS...
"*.csv" already supported
"*.pkl" already supported
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
No .gitattributes changes
📂 Staging all new/modified files...
✅ No changes detected, skipping commit/push.
📋 LFS-tracked files:
✅ Fresh-run GitHub repo workflow complete!


In [4]:
import os

# Set your keys (only for this session)
os.environ['ALPHA_VANTAGE_KEY'] = '1W58NPZXOG5SLHZ6'
os.environ['BROWSERLESS_TOKEN'] = '2St0qUktyKsA0Bsb5b510553885cae26942e44c26c0f19c3d'

# Test if they work
print("Alpha Vantage Key:", os.environ.get('ALPHA_VANTAGE_KEY'))
print("Browserless Token:", os.environ.get('BROWSERLESS_TOKEN'))




Alpha Vantage Key: 1W58NPZXOG5SLHZ6
Browserless Token: 2St0qUktyKsA0Bsb5b510553885cae26942e44c26c0f19c3d


In [5]:
import os
import pandas as pd
import requests
import hashlib
from pathlib import Path
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import time

# ======================================================
# CONFIGURATION
# ======================================================
SAVE_FOLDER = Path("/content/forex-alpha-models") if "google.colab" in str(os.sys.modules) else Path("./forex-alpha-models")
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)

PICKLE_FOLDER = SAVE_FOLDER / "pickles"
PICKLE_FOLDER.mkdir(parents=True, exist_ok=True)
CSV_FOLDER = SAVE_FOLDER / "csvs"
CSV_FOLDER.mkdir(parents=True, exist_ok=True)

GIT_NAME = os.environ.get("GIT_USER_NAME", "Abdul Rahim")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
GITHUB_TOKEN = os.environ.get("FOREX_PAT")
BRANCH = "main"

ALPHA_VANTAGE_KEY = os.environ.get("ALPHA_VANTAGE_KEY")
if not ALPHA_VANTAGE_KEY:
    raise ValueError("❌ Missing ALPHA_VANTAGE_KEY environment variable!")

# ======================================================
# SAFE GIT + LFS SETUP
# ======================================================
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
subprocess.run(["git", "config", "--global", "credential.helper", "store"], check=False)

cred_file = Path.home() / ".git-credentials"
cred_file.write_text(f"https://{GITHUB_USERNAME}:{GITHUB_TOKEN}@github.com\n")

subprocess.run(["git", "lfs", "install"], check=False)

# ======================================================
# HELPERS
# ======================================================
def ensure_tz_naive(df):
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)
    return df

def file_hash(filepath, chunk_size=8192):
    if not filepath.exists():
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

def fetch_alpha_vantage_fx(pair, outputsize='compact', max_retries=3, retry_delay=5):
    base_url = 'https://www.alphavantage.co/query'
    from_currency, to_currency = pair.split('/')
    params = {
        'function': 'FX_DAILY',
        'from_symbol': from_currency,
        'to_symbol': to_currency,
        'outputsize': outputsize,
        'datatype': 'json',
        'apikey': ALPHA_VANTAGE_KEY
    }
    for attempt in range(max_retries):
        try:
            r = requests.get(base_url, params=params, timeout=30)
            r.raise_for_status()
            data = r.json()
            if 'Time Series FX (Daily)' not in data:
                raise ValueError(f"Unexpected API response: {data}")
            ts = data['Time Series FX (Daily)']
            df = pd.DataFrame(ts).T
            df.index = pd.to_datetime(df.index)
            df = df.sort_index()
            df = df.rename(columns={
                '1. open':'open',
                '2. high':'high',
                '3. low':'low',
                '4. close':'close'
            }).astype(float)
            return ensure_tz_naive(df)
        except Exception as e:
            print(f"⚠️ Attempt {attempt+1} failed fetching {pair}: {e}")
            time.sleep(retry_delay)
    print(f"❌ Failed to fetch {pair} after {max_retries} retries")
    return pd.DataFrame()

# ======================================================
# REPO ENSURE
# ======================================================
def ensure_repo_cloned(repo_url, repo_folder, branch="main"):
    repo_folder = Path(repo_folder)
    if not (repo_folder / ".git").exists():
        if repo_folder.exists():
            subprocess.run(["rm", "-rf", str(repo_folder)], check=True)
        print("📥 Cloning repo...")
        subprocess.run(["git", "clone", "-b", branch, repo_url, str(repo_folder)], check=True)
    else:
        print("🔄 Repo exists, pulling latest...")
        subprocess.run(["git", "-C", str(repo_folder), "fetch", "origin"], check=True)
        subprocess.run(["git", "-C", str(repo_folder), "checkout", branch], check=False)
        subprocess.run(["git", "-C", str(repo_folder), "pull", "origin", branch], check=False)
        subprocess.run(["git", "-C", str(repo_folder), "lfs", "pull"], check=False)

# ======================================================
# THREAD-SAFE FX PROCESSING
# ======================================================
lock = threading.Lock()

def process_pair(pair):
    filename = f"{pair.replace('/', '_')}.csv"
    filepath = SAVE_FOLDER / filename

    existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
    old_hash = file_hash(filepath)
    new_df = fetch_alpha_vantage_fx(pair)
    if new_df.empty:
        return None, f"No new data for {pair}"

    combined_df = pd.concat([existing_df, new_df]) if not existing_df.empty else new_df
    combined_df = combined_df[~combined_df.index.duplicated(keep='last')]
    combined_df.sort_index(inplace=True)

    with lock:
        combined_df.to_csv(filepath)

    new_hash = file_hash(filepath)
    changed = old_hash != new_hash
    return str(filepath) if changed else None, f"{pair} {'updated' if changed else 'no changes'}"

# ======================================================
# MAIN EXECUTION
# ======================================================
pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
REPO_URL = f"https://{GITHUB_USERNAME}:{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

ensure_repo_cloned(REPO_URL, SAVE_FOLDER, BRANCH)

changed_files = []
tasks = []

with ThreadPoolExecutor(max_workers=8) as executor:
    for pair in pairs:
        tasks.append(executor.submit(process_pair, pair))
    for future in as_completed(tasks):
        filepath, msg = future.result()
        print(msg)
        if filepath:
            changed_files.append(filepath)

if changed_files:
    print(f"🚀 Committing {len(changed_files)} updated files...")
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "commit", "-m", "Update Alpha Vantage FX data"], check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "push", "origin", BRANCH], check=False)
else:
    print("✅ No changes to commit.")

print("✅ All FX pairs processed, saved, and pushed (LFS-ready, 403-proof).")


Updated Git hooks.
Git LFS initialized.
🔄 Repo exists, pulling latest...


Already on 'main'


Your branch is up to date with 'origin/main'.


From https://github.com/rahim-dotAI/forex-ai-models
 * branch            main       -> FETCH_HEAD


Already up to date.
GBP/USD no changes


EUR/USD no changes
AUD/USD no changes


USD/JPY no changes
✅ No changes to commit.
✅ All FX pairs processed, saved, and pushed (LFS-ready, 403-proof).


In [6]:
import os
import time
import hashlib
import subprocess
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd
import yfinance as yf

# ============================================================
# 1️⃣ SAFE WORKING FOLDER (Colab & GitHub Actions)
# ============================================================
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

SAVE_FOLDER = Path("/content/forex-alpha-models") if IN_COLAB else Path("./forex-alpha-models")
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory: {SAVE_FOLDER.resolve()}")

# ============================================================
# 2️⃣ GIT CONFIG
# ============================================================
GIT_NAME = os.environ.get("GIT_USER_NAME", "Abdul Rahim")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = "rahim-dotAI"
GITHUB_REPO = "forex-ai-models"
BRANCH = "main"

subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
subprocess.run(["git", "config", "--global", "credential.helper", "store"], check=False)

# ============================================================
# 3️⃣ TOKENS
# ============================================================
GITHUB_TOKEN = os.environ.get("FOREX_PAT")
if not GITHUB_TOKEN:
    raise ValueError("❌ FOREX_PAT missing! Set environment variable.")

cred_file = Path.home() / ".git-credentials"
cred_file.write_text(f"https://{GITHUB_USERNAME}:{GITHUB_TOKEN}@github.com\n")

subprocess.run(["git", "lfs", "install"], check=False)

# ============================================================
# 4️⃣ REPO ENSURE
# ============================================================
def ensure_repo():
    if not (SAVE_FOLDER / ".git").exists():
        if SAVE_FOLDER.exists():
            subprocess.run(["rm", "-rf", str(SAVE_FOLDER)], check=True)
        print("📥 Cloning fresh repo...")
        subprocess.run([
            "git", "clone", "-b", BRANCH,
            f"https://{GITHUB_USERNAME}:{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git",
            str(SAVE_FOLDER)
        ], check=True)
    else:
        print("🔄 Repo exists, pulling latest...")
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "fetch", "origin"], check=True)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "checkout", BRANCH], check=False)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "pull", "origin", BRANCH], check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "lfs", "pull"], check=False)

ensure_repo()

# ============================================================
# 5️⃣ FX CONFIG
# ============================================================
FX_PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
TIMEFRAMES = {
    "1m_7d": ("1m", "7d"),
    "5m_1mo": ("5m", "1mo"),
    "15m_60d": ("15m", "60d"),
    "1h_2y": ("1h", "2y"),
    "1d_5y": ("1d", "5y")
}

# ============================================================
# 6️⃣ HELPERS
# ============================================================
def file_hash(filepath, chunk_size=8192):
    if not filepath.exists():
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

def ensure_tz_naive(df):
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce')
    if df.index.tz is not None:
        df.index = df.index.tz_convert(None)
    return df

def merge_data(existing_df, new_df):
    existing_df = ensure_tz_naive(existing_df)
    new_df = ensure_tz_naive(new_df)
    if existing_df.empty:
        return new_df
    if new_df.empty:
        return existing_df
    combined_df = pd.concat([existing_df, new_df])
    combined_df = combined_df[~combined_df.index.duplicated(keep="last")]
    combined_df.sort_index(inplace=True)
    return combined_df

# ============================================================
# 7️⃣ WORKER FUNCTION
# ============================================================
def process_pair_tf(pair, tf_name, interval, period, max_retries=3, retry_delay=5):
    symbol = pair.replace("/", "") + "=X"
    filename = f"{pair.replace('/', '_')}_{tf_name}.csv"
    filepath = SAVE_FOLDER / filename

    existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
    old_hash = file_hash(filepath)

    for attempt in range(max_retries):
        try:
            df = yf.download(symbol, interval=interval, period=period, progress=False, auto_adjust=False, threads=True)
            if df.empty:
                raise ValueError("No data returned")
            df = df[[c for c in ['Open','High','Low','Close','Volume'] if c in df.columns]].copy()
            df.rename(columns=lambda x: x.lower(), inplace=True)
            df = ensure_tz_naive(df)

            combined_df = merge_data(existing_df, df)
            combined_df.to_csv(filepath)
            if old_hash != file_hash(filepath):
                return f"📈 Updated {pair} {tf_name}", str(filepath)
            else:
                return f"✅ No changes {pair} {tf_name}", None
        except Exception as e:
            print(f"⚠️ Attempt {attempt+1}/{max_retries} failed for {pair} {tf_name}: {e}")
            if attempt < max_retries:
                time.sleep(retry_delay)
            else:
                return f"❌ Failed {pair} {tf_name}", None

# ============================================================
# 8️⃣ PARALLEL EXECUTION (Snake-safe with glob)
# ============================================================
changed_files = []
tasks = []

with ThreadPoolExecutor(max_workers=8) as executor:
    for pair in FX_PAIRS:
        for tf_name, (interval, period) in TIMEFRAMES.items():
            tasks.append(executor.submit(process_pair_tf, pair, tf_name, interval, period))

for future in as_completed(tasks):
    msg, filename = future.result()
    print(msg)
    if filename:
        changed_files.append(filename)

# ============================================================
# 9️⃣ COMMIT & PUSH
# ============================================================
if changed_files:
    print(f"🚀 Committing {len(changed_files)} updated files...")
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "commit", "-m", "Update multiple FX files"], check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "push", "origin", BRANCH], check=False)
else:
    print("✅ No changes detected, nothing to push.")

print("🎯 All FX pairs & timeframes processed safely (Colab & GHA compatible, LFS & 403-proof).")


✅ Working directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models


Updated Git hooks.
Git LFS initialized.
🔄 Repo exists, pulling latest...


Already on 'main'


Your branch is up to date with 'origin/main'.


From https://github.com/rahim-dotAI/forex-ai-models
 * branch            main       -> FETCH_HEAD
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()


Already up to date.


  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')


  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')


  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')


  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  existing_df = pd.read_csv(filepath, index_col=0, parse_dates=True) if filepath.exists() else pd.DataFrame()
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')
  df.index = pd.to_datetime(df.index, errors='coerce')


📈 Updated USD/JPY 5m_1mo
📈 Updated AUD/USD 1m_7d
📈 Updated AUD/USD 1d_5y
📈 Updated GBP/USD 1h_2y
📈 Updated USD/JPY 1m_7d
📈 Updated EUR/USD 5m_1mo
📈 Updated USD/JPY 1d_5y
📈 Updated AUD/USD 1h_2y
📈 Updated GBP/USD 15m_60d
📈 Updated USD/JPY 1h_2y
📈 Updated AUD/USD 15m_60d
📈 Updated GBP/USD 1d_5y
📈 Updated GBP/USD 1m_7d
📈 Updated EUR/USD 15m_60d
📈 Updated GBP/USD 5m_1mo
📈 Updated AUD/USD 5m_1mo
📈 Updated USD/JPY 15m_60d
📈 Updated EUR/USD 1m_7d
📈 Updated EUR/USD 1h_2y
📈 Updated EUR/USD 1d_5y
🚀 Committing 20 updated files...
On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	AUD_USD_15m_60d.csv
	AUD_USD_1d_5y.csv
	AUD_USD_1h_2y.csv
	AUD_USD_1m_7d.csv
	AUD_USD_5m_1mo.csv
	EUR_USD_15m_60d.csv
	EUR_USD_1d_5y.csv
	EUR_USD_1h_2y.csv
	EUR_USD_1m_7d.csv
	EUR_USD_5m_1mo.csv
	GBP_USD_15m_60d.csv
	GBP_USD_1d_5y.csv
	GBP_USD_1h_2y.csv
	GBP_USD_1m_7d.csv
	GBP_USD_5m_1mo.csv
	USD_JPY_15m_60d.csv
	USD_JPY_1d_5y.c

fatal: pathspec 'forex-alpha-models/USD_JPY_5m_1mo.csv' did not match any files


🎯 All FX pairs & timeframes processed safely (Colab & GHA compatible, LFS & 403-proof).


Everything up-to-date


In [7]:
# ======================================================
# FX Data Processing & Git Automation - Updated
# ======================================================
import os
import pandas as pd
import hashlib
from pathlib import Path
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import time
import sys

# -----------------------------
# 0️⃣ Environment Detection & Safe Folder
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_COLAB:
    SAVE_FOLDER = Path("/content/forex-alpha-models")
else:
    SAVE_FOLDER = Path("./forex-alpha-models")

SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory: {SAVE_FOLDER.resolve()}")

# -----------------------------
# 1️⃣ Git Config & Repo Info
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Abdul Rahim")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = os.environ.get("GITHUB_USERNAME", "rahim-dotAI")
GITHUB_REPO = os.environ.get("GITHUB_REPO", "forex-ai-models")
GITHUB_TOKEN = os.environ.get("FOREX_PAT", "").strip()

if not GITHUB_TOKEN and IN_GHA:
    raise ValueError("❌ Token not set! Define environment variable FOREX_PAT.")

BRANCH = "main"
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

# Configure Git
subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 2️⃣ Ensure Repo & Git LFS
# -----------------------------
def ensure_repo():
    """Clone or update repo safely with retries and Git LFS."""
    if not (SAVE_FOLDER / ".git").exists():
        if SAVE_FOLDER.exists():
            subprocess.run(["rm", "-rf", str(SAVE_FOLDER)], check=True)
        for attempt in range(3):
            try:
                print(f"📥 Cloning repo (attempt {attempt+1})...")
                subprocess.run(["git", "clone", "-b", BRANCH, REPO_URL, str(SAVE_FOLDER)], check=True)
                break
            except subprocess.CalledProcessError:
                time.sleep(5)
    else:
        print("🔄 Repo exists, pulling latest changes...")
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "fetch", "origin"], check=True)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "checkout", BRANCH], check=False)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "pull", "origin", BRANCH], check=False)

    # Git LFS setup
    if IN_COLAB:
        subprocess.run("apt-get update && apt-get install git-lfs -y", shell=True)
    subprocess.run(["git", "lfs", "install"], check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "lfs", "track", "*_combined.csv"], check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "add", ".gitattributes"], check=False)
    subprocess.run(f'git -C {SAVE_FOLDER} commit -m "Track combined CSVs with Git LFS" || echo "No changes"', shell=True)

ensure_repo()

# -----------------------------
# 3️⃣ Output Folders
# -----------------------------
PICKLE_FOLDER = SAVE_FOLDER / "pickles"
CSV_FOLDER = SAVE_FOLDER / "csvs"
for folder in [PICKLE_FOLDER, CSV_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)
print(f"✅ Output folders ready: {PICKLE_FOLDER}, {CSV_FOLDER}")

# -----------------------------
# 4️⃣ Helper Functions
# -----------------------------
def ensure_tz_naive(df):
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce').tz_localize(None)
    return df

def file_hash(filepath, chunk_size=8192):
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

lock = threading.Lock()

def combine_fx_data(av_df, yf_df):
    av_df = ensure_tz_naive(av_df)
    yf_df = ensure_tz_naive(yf_df)
    if av_df.empty and yf_df.empty:
        return pd.DataFrame()
    combined_df = pd.merge(
        yf_df, av_df[['open','high','low','close']],
        left_index=True, right_index=True, how='outer', suffixes=('','_av')
    )
    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].fillna(combined_df[f'{col}_av'])
    combined_df.drop(columns=[f'{col}_av' for col in ['open','high','low','close']], errors='ignore', inplace=True)
    combined_df.sort_index(inplace=True)
    combined_df.dropna(subset=['open','high','low','close'], inplace=True)
    combined_df = combined_df[~combined_df.index.duplicated(keep="last")]
    return combined_df

# -----------------------------
# 5️⃣ Worker Function
# -----------------------------
def process_pair_tf(pair, tf_name, max_retries=3):
    for attempt in range(max_retries):
        try:
            av_file = CSV_FOLDER / f"{pair.replace('/','_')}_daily.csv"
            yf_file = CSV_FOLDER / f"{pair.replace('/','_')}_{tf_name}.csv"

            av_df = ensure_tz_naive(pd.read_csv(av_file, index_col=0, parse_dates=True)) if av_file.exists() else pd.DataFrame()
            yf_df = ensure_tz_naive(pd.read_csv(yf_file, index_col=0, parse_dates=True)) if yf_file.exists() else pd.DataFrame()

            combined_df = combine_fx_data(av_df, yf_df)
            if combined_df.empty:
                return f"⚪ No data to combine for {pair} {tf_name}", None

            combined_file = CSV_FOLDER / f"{pair.replace('/','_')}_{tf_name}_combined.csv"
            old_hash = file_hash(combined_file)

            with lock:
                combined_df.to_csv(combined_file)

            new_hash = file_hash(combined_file)
            if old_hash != new_hash:
                return f"📌 Updated {pair} {tf_name}", combined_file
            else:
                return f"✅ No changes for {pair} {tf_name}", None
        except Exception as e:
            print(f"⚠️ Attempt {attempt+1} failed for {pair} {tf_name}: {e}")
            time.sleep(3)
    return f"❌ Failed to combine {pair} {tf_name}", None

# -----------------------------
# 6️⃣ Parallel Execution
# -----------------------------
pairs = ["EUR/USD","GBP/USD","USD/JPY","AUD/USD"]
timeframes = ["1m_7d","5m_1mo","15m_60d","1h_2y","1d_5y"]

changed_files = []
with ThreadPoolExecutor(max_workers=8) as executor:
    futures = [executor.submit(process_pair_tf, p, tf) for p in pairs for tf in timeframes]
    for future in as_completed(futures):
        msg, file = future.result()
        print(msg)
        if file:
            changed_files.append(str(file))

# -----------------------------
# 7️⃣ Commit & Push Changes
# -----------------------------
if changed_files:
    print(f"🚀 Committing {len(changed_files)} files...")
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "commit", "-m", "Update combined FX data"], check=False)

    push_cmd = f"git -C {SAVE_FOLDER} push {REPO_URL} {BRANCH}"
    for attempt in range(3):
        if subprocess.run(push_cmd, shell=True).returncode == 0:
            print("✅ Push successful.")
            break
        else:
            print(f"⚠️ Push attempt {attempt+1} failed, retrying...")
            time.sleep(5)
else:
    print("✅ No combined files changed, nothing to push.")

print("🎯 All FX pairs processed and combined (parallel, single push).")


✅ Working directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
🔄 Repo exists, pulling latest changes...


Already on 'main'


Your branch is up to date with 'origin/main'.


Already up to date.
Updated Git hooks.
Git LFS initialized.
"*_combined.csv" already supported
On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	forex-alpha-models/

nothing added to commit but untracked files present (use "git add" to track)
No changes
✅ Output folders ready: forex-alpha-models/pickles, forex-alpha-models/csvs
⚪ No data to combine for GBP/USD 5m_1mo
⚪ No data to combine for USD/JPY 1d_5y
⚪ No data to combine for GBP/USD 15m_60d
⚪ No data to combine for USD/JPY 15m_60d
⚪ No data to combine for EUR/USD 15m_60d
⚪ No data to combine for USD/JPY 1m_7d
⚪ No data to combine for EUR/USD 5m_1mo
⚪ No data to combine for USD/JPY 1h_2y
⚪ No data to combine for EUR/USD 1h_2y
⚪ No data to combine for EUR/USD 1d_5y
⚪ No data to combine for USD/JPY 5m_1mo
⚪ No data to combine for GBP/USD 1h_2y
⚪ No data to combine for GBP/USD 1m_7d
⚪ No data to combine for EUR/USD 1m_7d
⚪ No data to combine

From https://github.com/rahim-dotAI/forex-ai-models
 * branch            main       -> FETCH_HEAD


In [8]:
import os
import requests
import re

def fetch_live_rate(pair):
    """
    Fetch live FX rate from X-Rates using Browserless.
    """
    from_currency, to_currency = pair.split('/')
    browserless_token = os.environ.get('BROWSERLESS_TOKEN')
    if not browserless_token:
        raise ValueError("Set BROWSERLESS_TOKEN in your environment variables")

    url = f"https://production-sfo.browserless.io/content?token={browserless_token}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}

    try:
        res = requests.post(url, json=payload)
        # Regex to extract the FX value
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0
    except Exception as e:
        print(f"Failed to fetch {pair}: {e}")
        return 0

# --- Fetch live prices for all pairs ---
pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
live_prices = {pair: fetch_live_rate(pair) for pair in pairs}

for pair, price in live_prices.items():
    print(f"{pair}: {price}")


EUR/USD: 1.163
GBP/USD: 1.34
USD/JPY: 152.6
AUD/USD: 0.6587


In [9]:
# ======================================================
# FX Data + Indicators Processing - Updated for Environment Awareness
# ======================================================
import os
import pandas as pd
import hashlib
import numpy as np
import ta
from ta.momentum import WilliamsRIndicator
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import subprocess
import threading
import time
import sys

# -----------------------------
# 0️⃣ Environment Detection & Safe Folders
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

if IN_COLAB:
    SAVE_FOLDER = Path("/content/forex-alpha-models")
else:
    SAVE_FOLDER = Path("./forex-alpha-models")

SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory: {SAVE_FOLDER.resolve()}")

combined_save_path = SAVE_FOLDER / "combined_with_indicators"
combined_save_path.mkdir(parents=True, exist_ok=True)

pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# -----------------------------
# 1️⃣ Git Config & Repo Info
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Abdul Rahim")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
GITHUB_USERNAME = os.environ.get("GITHUB_USERNAME", "rahim-dotAI")
GITHUB_REPO = os.environ.get("GITHUB_REPO", "forex-ai-models")
GITHUB_TOKEN = os.environ.get("FOREX_PAT", "").strip()

if not GITHUB_TOKEN and IN_GHA:
    raise ValueError("❌ FOREX_PAT environment variable not found")

BRANCH = "main"
REPO_URL = f"https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git"

subprocess.run(["git", "config", "--global", "user.name", GIT_NAME], check=False)
subprocess.run(["git", "config", "--global", "user.email", GIT_EMAIL], check=False)
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 2️⃣ Ensure Repo Exists or Synced
# -----------------------------
def ensure_repo():
    if not (SAVE_FOLDER / ".git").exists():
        print("📥 Cloning fresh repository...")
        if SAVE_FOLDER.exists():
            subprocess.run(["rm", "-rf", str(SAVE_FOLDER)], check=False)
        subprocess.run(["git", "clone", "-b", BRANCH, REPO_URL, str(SAVE_FOLDER)], check=True)
    else:
        print("🔄 Repo found, updating...")
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "fetch", "origin"], check=False)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "checkout", BRANCH], check=False)
        subprocess.run(["git", "-C", str(SAVE_FOLDER), "pull", "origin", BRANCH], check=False)

ensure_repo()

# -----------------------------
# 3️⃣ Utilities
# -----------------------------
def file_hash(filepath, chunk_size=8192):
    if not os.path.exists(filepath):
        return None
    md5 = hashlib.md5()
    with open(filepath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()

def ensure_tz_naive(df):
    if df is None or df.empty:
        return df
    df.index = pd.to_datetime(df.index, errors='coerce').tz_localize(None)
    return df

def safe_numeric(df, columns=None):
    if columns is None:
        columns = ['open','high','low','close']
    for col in columns:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.dropna(subset=columns, inplace=True)
    return df

def combine_fx_data(av_df, yf_df):
    av_df = ensure_tz_naive(av_df)
    yf_df = ensure_tz_naive(yf_df)
    if av_df is None or av_df.empty:
        return safe_numeric(yf_df)
    if yf_df is None or yf_df.empty:
        return safe_numeric(av_df)
    combined_df = pd.merge(
        yf_df,
        av_df[['open','high','low','close']],
        left_index=True, right_index=True,
        how='outer',
        suffixes=('','_av')
    )
    for col in ['open','high','low','close']:
        combined_df[col] = combined_df[col].fillna(combined_df[f'{col}_av'])
    combined_df.drop(columns=[f'{col}_av' for col in ['open','high','low','close']], errors='ignore', inplace=True)
    combined_df.sort_index(inplace=True)
    combined_df = safe_numeric(combined_df)
    combined_df = ensure_tz_naive(combined_df)
    return combined_df

# -----------------------------
# 4️⃣ Add Indicators
# -----------------------------
def add_all_indicators(df):
    df = ensure_tz_naive(df)
    if df is None or df.empty:
        return df
    df = safe_numeric(df.copy())
    df = df.sort_index()

    # Trend indicators
    trend = {
        'SMA_10': lambda d: ta.trend.sma_indicator(d['close'],10),
        'SMA_50': lambda d: ta.trend.sma_indicator(d['close'],50),
        'SMA_200': lambda d: ta.trend.sma_indicator(d['close'],200),
        'EMA_10': lambda d: ta.trend.ema_indicator(d['close'],10),
        'EMA_50': lambda d: ta.trend.ema_indicator(d['close'],50),
        'EMA_200': lambda d: ta.trend.ema_indicator(d['close'],200),
        'MACD': lambda d: ta.trend.macd(d['close']),
        'MACD_signal': lambda d: ta.trend.macd_signal(d['close']),
        'ADX': lambda d: ta.trend.adx(d['high'], d['low'], d['close'],14)
    }

    # Momentum indicators
    momentum = {
        'RSI_14': lambda d: ta.momentum.rsi(d['close'],14),
        'StochRSI': lambda d: ta.momentum.stochrsi(d['close'],14),
        'CCI': lambda d: ta.trend.cci(d['high'],d['low'],d['close'],20),
        'ROC': lambda d: ta.momentum.roc(d['close'],12),
        'Williams_%R': lambda d: WilliamsRIndicator(d['high'],d['low'],d['close'],14).williams_r()
    }

    # Volatility indicators
    volatility = {
        'Bollinger_High': lambda d: ta.volatility.bollinger_hband(d['close'],20,2),
        'Bollinger_Low': lambda d: ta.volatility.bollinger_lband(d['close'],20,2),
        'ATR': lambda d: ta.volatility.average_true_range(d['high'],d['low'],d['close'],14),
        'STDDEV_20': lambda d: d['close'].rolling(20).std()
    }

    # Volume indicators
    volume = {}
    if 'volume' in df.columns:
        volume = {
            'OBV': lambda d: ta.volume.on_balance_volume(d['close'],d['volume']),
            'MFI': lambda d: ta.volume.money_flow_index(d['high'],d['low'],d['close'],d['volume'],14)
        }

    indicators = {**trend,**momentum,**volatility,**volume}
    for name, func in indicators.items():
        try:
            df[name] = func(df)
        except Exception:
            df[name] = np.nan

    # Derived signals
    df['EMA_10_cross_EMA_50'] = (df['EMA_10'] > df['EMA_50']).astype(int)
    df['EMA_50_cross_EMA_200'] = (df['EMA_50'] > df['EMA_200']).astype(int)
    df['SMA_10_cross_SMA_50'] = (df['SMA_10'] > df['SMA_50']).astype(int)
    df['SMA_50_cross_SMA_200'] = (df['SMA_50'] > df['SMA_200']).astype(int)

    df.replace([np.inf,-np.inf], np.nan, inplace=True)
    df.ffill(inplace=True)
    df.bfill(inplace=True)
    df.fillna(0,inplace=True)

    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        scaler = MinMaxScaler()
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

    return df

# -----------------------------
# 5️⃣ Worker Function
# -----------------------------
lock = threading.Lock()

def process_pair_file(pair, tf_file, max_retries=3):
    av_file = SAVE_FOLDER / f"{pair.replace('/','_')}_daily.csv"

    def safe_read_csv(path):
        for attempt in range(max_retries):
            try:
                if path.exists():
                    return ensure_tz_naive(pd.read_csv(path, index_col=0, parse_dates=True))
                else:
                    return pd.DataFrame()
            except Exception as e:
                print(f"⚠️ Retry {attempt+1} reading {path}: {e}")
                time.sleep(2)
        return pd.DataFrame()

    av_df = safe_read_csv(av_file)
    yf_path = SAVE_FOLDER / tf_file
    yf_df = safe_read_csv(yf_path)

    if yf_df.empty and av_df.empty:
        return None, f"{pair} ({tf_file}) skipped - no data"

    combined_df = combine_fx_data(av_df, yf_df)
    combined_df = add_all_indicators(combined_df)

    save_file = combined_save_path / f"{pair.replace('/','_')}_{tf_file.replace('.csv','')}_combined.pkl"
    old_hash = file_hash(save_file)

    for attempt in range(max_retries):
        try:
            with lock:
                combined_df.to_pickle(save_file, protocol=4)
            break
        except Exception as e:
            print(f"⚠️ Retry {attempt+1} writing {save_file}: {e}")
            time.sleep(2)

    new_hash = file_hash(save_file)
    changed = old_hash != new_hash
    return save_file if changed else None, f"{pair} ({tf_file}) {'updated' if changed else 'no change'}"

# -----------------------------
# 6️⃣ Execute in Parallel
# -----------------------------
changed_files = []
tasks = []
files_to_process = []

for pair in pairs:
    for tf_file in list(SAVE_FOLDER.glob(f"{pair.replace('/','_')}*.csv")):
        tf_file_name = tf_file.name
        if any(tf_file_name.endswith(x) for x in ["daily.csv", "_combined.csv", "_combined.pkl"]):
            continue
        files_to_process.append((pair, tf_file_name))

max_workers = max(1, min(8, len(files_to_process), (os.cpu_count() or 4)*2))
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    for pair, tf_file in files_to_process:
        tasks.append(executor.submit(process_pair_file, pair, tf_file))
    for future in as_completed(tasks):
        changed_file, msg = future.result()
        print(msg)
        if changed_file:
            changed_files.append(str(changed_file))

# -----------------------------
# 7️⃣ Commit & Push Changes
# -----------------------------
if changed_files:
    print(f"🚀 Committing {len(changed_files)} modified files...")
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "add"] + changed_files, check=False)
    subprocess.run(["git", "-C", str(SAVE_FOLDER), "commit", "-m", "📈 Auto update combined indicators"], check=False)
    try:
        subprocess.run(
            f"git -C {SAVE_FOLDER} push https://{GITHUB_TOKEN}@github.com/{GITHUB_USERNAME}/{GITHUB_REPO}.git {BRANCH}",
            shell=True, check=False
        )
        print("✅ Push complete.")
    except Exception as e:
        print(f"⚠️ Push failed: {e}")
else:
    print("✅ No data changes detected — skipping push.")

print("✅ All FX pairs processed and saved successfully.")


✅ Working directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
🔄 Repo found, updating...


Already on 'main'


Your branch is up to date with 'origin/main'.


Already up to date.
✅ No data changes detected — skipping push.
✅ All FX pairs processed and saved successfully.


From https://github.com/rahim-dotAI/forex-ai-models
 * branch            main       -> FETCH_HEAD


In [10]:
# ======================================================
# Cell 10 - Hybrid Signal Pipeline + Safe Environment
# ======================================================
import os
import sys
import json
import re
import requests
import numpy as np
import pandas as pd
import yfinance as yf
import ta
from ta.momentum import WilliamsRIndicator
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
import joblib
import datetime as dt
from concurrent.futures import ThreadPoolExecutor, as_completed

# -----------------------------
# 1️⃣ Detect environment
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

# -----------------------------
# 2️⃣ Safe working folder
# -----------------------------
if IN_COLAB:
    SAVE_FOLDER = Path("/content/forex-alpha-models")
else:
    SAVE_FOLDER = Path("./forex-alpha-models")

SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory: {SAVE_FOLDER.resolve()}")

# -----------------------------
# 3️⃣ Git config
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
os.system(f'git config --global user.name "{GIT_NAME}"')
os.system(f'git config --global user.email "{GIT_EMAIL}"')
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 4️⃣ Read tokens
# -----------------------------
FOREX_PAT = os.environ.get("FOREX_PAT")
BROWSERLESS_TOKEN = os.environ.get("BROWSERLESS_TOKEN")

if not FOREX_PAT and IN_GHA:
    print("⚠️ Warning: FOREX_PAT not found in GitHub Actions secrets")
if not BROWSERLESS_TOKEN:
    print("⚠️ Warning: BROWSERLESS_TOKEN not found")

# -----------------------------
# 5️⃣ Safe subfolders for outputs
# -----------------------------
MODEL_DIR = SAVE_FOLDER / "models"
PICKLE_FOLDER = SAVE_FOLDER / "pickles"
CSV_FOLDER = SAVE_FOLDER / "csvs"

for folder in [MODEL_DIR, PICKLE_FOLDER, CSV_FOLDER]:
    folder.mkdir(parents=True, exist_ok=True)

BROKER_JSON = SAVE_FOLDER / "broker_signals.json"
BROKER_LOG = SAVE_FOLDER / "broker_signals_log.csv"

print(f"✅ Output folders ready: {MODEL_DIR}, {PICKLE_FOLDER}, {CSV_FOLDER}")

# -----------------------------
# 6️⃣ Python environment info
# -----------------------------
print(f"Python version: {sys.version}")
print(f"Current directory: {os.getcwd()}")

# -----------------------------
# 7️⃣ CONFIG
# -----------------------------
PAIRS = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]
TIMEFRAMES = {
    "1m_7d": ("1m", "7d"),
    "5m_1mo": ("5m", "1mo"),
    "15m_60d": ("15m", "60d"),
    "1h_2y": ("1h", "2y"),
    "1d_5y": ("1d", "5y"),
}
INJECT_CANDLES = 5

# Delete old models
for f in MODEL_DIR.glob("*.pkl"):
    f.unlink()

# -----------------------------
# 8️⃣ Live price fetch
# -----------------------------
def fetch_live_rate(pair):
    from_currency, to_currency = pair.split('/')
    if not BROWSERLESS_TOKEN:
        print(f"⚠️ BROWSERLESS_TOKEN not set, skipping live rates for {pair}")
        return 0
    url = f"https://production-sfo.browserless.io/content?token={BROWSERLESS_TOKEN}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}
    try:
        res = requests.post(url, json=payload)
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0
    except Exception as e:
        print(f"⚠️ Live price fetch failed for {pair}: {e}")
        return 0

live_prices = {pair: fetch_live_rate(pair) for pair in PAIRS}

# -----------------------------
# 9️⃣ Data & feature functions
# -----------------------------
def fetch_data(symbol, interval, period):
    df = yf.download(symbol.replace('/', '') + "=X", interval=interval, period=period,
                     progress=False, auto_adjust=True)
    df.dropna(inplace=True)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]
    df.index = pd.to_datetime(df.index, errors='coerce').tz_localize(None)
    return df

def inject_live_price(df, live_price, n_candles=INJECT_CANDLES):
    df_copy = df.copy()
    n_inject = min(n_candles, len(df_copy))
    for col in ["Open","High","Low","Close"]:
        if col in df_copy.columns:
            df_copy.iloc[-n_inject:, df_copy.columns.get_loc(col)] = live_price
    df_copy.index = pd.to_datetime(df_copy.index, errors='coerce').tz_localize(None)
    return df_copy

def add_all_indicators(df):
    if df is None or df.empty:
        return df
    df = df.copy()
    df.index = pd.to_datetime(df.index, errors='coerce').tz_localize(None)

    df["SMA_10"] = ta.trend.sma_indicator(df["Close"], 10)
    df["SMA_50"] = ta.trend.sma_indicator(df["Close"], 50)
    df["EMA_10"] = ta.trend.ema_indicator(df["Close"], 10)
    df["EMA_50"] = ta.trend.ema_indicator(df["Close"], 50)
    df["EMA_200"] = ta.trend.ema_indicator(df["Close"], 200)
    df["MACD"] = ta.trend.macd(df["Close"])
    df["MACD_signal"] = ta.trend.macd_signal(df["Close"])
    df["ADX"] = ta.trend.adx(df["High"], df["Low"], df["Close"], window=14)
    df["WilliamsR"] = WilliamsRIndicator(df["High"], df["Low"], df["Close"], lbp=14).williams_r()
    df["Bollinger_High"] = ta.volatility.bollinger_hband(df["Close"], window=20)
    df["Bollinger_Low"] = ta.volatility.bollinger_lband(df["Close"], window=20)
    df["ATR"] = ta.volatility.average_true_range(df["High"], df["Low"], df["Close"], window=14)

    close = df["Close"].values
    df["return"] = np.concatenate([[0], (close[1:] - close[:-1])/close[:-1]])
    df["ma_fast"] = pd.Series(close).rolling(5).mean()
    df["ma_slow"] = pd.Series(close).rolling(20).mean()
    delta = np.diff(close, prepend=close[0])
    gain = np.where(delta>0, delta, 0)
    loss = np.where(delta<0, -delta, 0)
    avg_gain = pd.Series(gain).rolling(14).mean()
    avg_loss = pd.Series(loss).rolling(14).mean()
    rs = avg_gain / avg_loss.replace(0, 1e-9)
    df["rsi"] = 100 - (100/(1+rs))

    df.replace([np.inf, -np.inf], np.nan, inplace=True)
    df.fillna(0, inplace=True)
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    if len(numeric_cols) > 0:
        scaler = MinMaxScaler()
        df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    return df

def generate_features(df):
    return add_all_indicators(df).select_dtypes(include=[np.number])

def make_labels(df):
    future = df["Close"].shift(-1)
    signal = np.where(future>df["Close"], 1, np.where(future<df["Close"], -1, 0))
    return signal[:-1]

def train_or_load_model(features, labels, model_path):
    if features.empty or len(labels)==0:
        return None, []
    model = SGDClassifier(loss="log_loss", max_iter=1000, tol=1e-3)
    model.fit(features, labels)
    joblib.dump({"model": model, "features": list(features.columns)}, model_path)
    return model, list(features.columns)

def hybrid_signal(model, features, trained_features):
    if model is None or features.empty:
        return pd.Series([0]*len(features), index=features.index)
    for col in trained_features:
        if col not in features.columns:
            features[col] = 0
    features_aligned = features[trained_features].copy()
    features_aligned.fillna(0, inplace=True)
    preds = model.predict(features_aligned)
    return pd.Series(preds, index=features.index)

# -----------------------------
# 10️⃣ Process one pair/timeframe
# -----------------------------
def process_pair_tf(pair, tf_name, interval, period, live_price):
    df = fetch_data(pair, interval, period)
    if len(df)<2:
        return None, None, None
    df_live = inject_live_price(df, live_price)
    features = generate_features(df_live)
    labels = make_labels(df_live)
    features = features.iloc[:len(labels)]
    model_path = MODEL_DIR / f"{pair.replace('/','_')}_{tf_name}.pkl"
    model, trained_features = train_or_load_model(features, labels, model_path)
    signals = hybrid_signal(model, features, trained_features)
    df_live = df_live.iloc[:len(signals)].copy()
    df_live["hybrid_signal"] = signals.values
    df_live.index = pd.to_datetime(df_live.index, errors='coerce').tz_localize(None)
    latest_signal = int(df_live["hybrid_signal"].iloc[-1])
    long_count = int((df_live["hybrid_signal"]==1).sum())
    short_count = int((df_live["hybrid_signal"]==-1).sum())
    hold_count = int((df_live["hybrid_signal"]==0).sum())
    result = {
        "pair": pair,
        "timeframe": tf_name,
        "long": long_count,
        "short": short_count,
        "hold": hold_count,
        "latest_signal": latest_signal,
        "live_price": live_price
    }
    return result, df, df_live

# -----------------------------
# 11️⃣ Run hybrid pipeline
# -----------------------------
def run_hybrid():
    broker_output = {"timestamp": dt.datetime.now(dt.timezone.utc).isoformat(), "pairs": {}}
    log_rows = []
    tasks = []
    with ThreadPoolExecutor(max_workers=6) as executor:
        for pair in PAIRS:
            live_price = live_prices[pair]
            broker_output["pairs"][pair] = {}
            for tf_name, (interval, period) in TIMEFRAMES.items():
                tasks.append(executor.submit(process_pair_tf, pair, tf_name, interval, period, live_price))
        for future in as_completed(tasks):
            result, df, df_live = future.result()
            if result is None:
                continue
            pair, tf_name = result["pair"], result["timeframe"]
            broker_output["pairs"][pair][tf_name] = {
                "long": result["long"],
                "short": result["short"],
                "hold": result["hold"],
                "latest_signal": result["latest_signal"],
                "live_price": float(result["live_price"])
            }
            log_rows.append({
                "timestamp": dt.datetime.now(dt.timezone.utc),
                "pair": pair,
                "timeframe": tf_name,
                **{k: result[k] for k in ["long","short","hold","latest_signal"]},
                "live_price": result["live_price"]
            })
    with open(BROKER_JSON, "w") as f:
        json.dump(broker_output, f, indent=2)
    log_df = pd.DataFrame(log_rows)
    if not BROKER_LOG.exists():
        log_df.to_csv(BROKER_LOG, index=False)
    else:
        log_df.to_csv(BROKER_LOG, mode="a", header=False, index=False)
    print(f"💾 Broker JSON saved: {BROKER_JSON}")
    print(f"📑 Signals logged to CSV: {BROKER_LOG}")
    return broker_output

# -----------------------------
# 12️⃣ Run script
# -----------------------------
if __name__ == "__main__":
    signals = run_hybrid()
    print("\n📊 Latest Signals JSON:\n", json.dumps(signals, indent=2))


✅ Working directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
✅ Output folders ready: forex-alpha-models/models, forex-alpha-models/pickles, forex-alpha-models/csvs
Python version: 3.11.13 (main, Jun  4 2025, 04:12:12) [GCC 13.3.0]
Current directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models


💾 Broker JSON saved: forex-alpha-models/broker_signals.json
📑 Signals logged to CSV: forex-alpha-models/broker_signals_log.csv

📊 Latest Signals JSON:
 {
  "timestamp": "2025-10-08T22:32:10.747666+00:00",
  "pairs": {
    "EUR/USD": {
      "1h_2y": {
        "long": 321,
        "short": 5711,
        "hold": 170,
        "latest_signal": 1,
        "live_price": 1.163
      },
      "15m_60d": {
        "long": 374,
        "short": 5599,
        "hold": 229,
        "latest_signal": 1,
        "live_price": 1.163
      },
      "1d_5y": {
        "long": 1715,
        "short": 4123,
        "hold": 364,
        "latest_signal": 1,
        "live_price": 1.163
      },
      "1m_7d": {
        "long": 3859,
        "short": 2327,
        "hold": 16,
        "latest_signal": 1,
        "live_price": 1.163
      },
      "5m_1mo": {
        "long": 1291,
        "short": 1799,
        "hold": 3112,
        "latest_signal": 0,
        "live_price": 1.163
      }
    },
    "GBP/USD": {
 

In [11]:
# ======================================================
# Cell 11 - Initialization & CSV → PKL with Hybrid Signals
# ======================================================
import os
import pandas as pd
from pathlib import Path

# -----------------------------
# 1️⃣ Detect environment
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

# -----------------------------
# 2️⃣ Safe working folders
# -----------------------------
if IN_COLAB:
    CSV_FOLDER = Path("/content/forex-alpha-models")  # folder where fetched CSVs are
    SAVE_FOLDER = Path("/content/combined_data")       # folder for backtest-ready .pkl files
else:
    CSV_FOLDER = Path("./forex-alpha-models")
    SAVE_FOLDER = Path("./combined_data")

CSV_FOLDER.mkdir(parents=True, exist_ok=True)
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)

print(f"✅ CSV folder: {CSV_FOLDER.resolve()}")
print(f"✅ Save folder: {SAVE_FOLDER.resolve()}")

# -----------------------------
# 3️⃣ Git config (optional)
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")

os.system(f'git config --global user.name "{GIT_NAME}"')
os.system(f'git config --global user.email "{GIT_EMAIL}"')
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 4️⃣ Tokens check
# -----------------------------
FOREX_PAT = os.environ.get("FOREX_PAT")
BROWSERLESS_TOKEN = os.environ.get("BROWSERLESS_TOKEN")

if not FOREX_PAT and IN_GHA:
    print("⚠️ Warning: FOREX_PAT not found in GitHub Actions secrets")
if not BROWSERLESS_TOKEN:
    print("⚠️ Warning: BROWSERLESS_TOKEN not found")

# -----------------------------
# 5️⃣ FX Pairs
# -----------------------------
pairs = ["EUR/USD", "GBP/USD", "USD/JPY", "AUD/USD"]

# -----------------------------
# 6️⃣ Helper: temporary hybrid signal
# -----------------------------
def generate_temp_signal(df, fast=5, slow=20):
    if len(df) < slow:
        return pd.Series([0]*len(df), index=df.index)
    fast_ma = df['close'].rolling(fast).mean()
    slow_ma = df['close'].rolling(slow).mean()
    signal = (fast_ma - slow_ma).apply(lambda x: 1 if x > 0 else (-1 if x < 0 else 0))
    return signal.fillna(0)

# -----------------------------
# 7️⃣ Process all CSVs
# -----------------------------
for pair in pairs:
    csv_files = list(CSV_FOLDER.glob(f"{pair.replace('/','_')}_*_combined.csv"))
    if not csv_files:
        print(f"⚠️ No CSV files found for {pair}, skipping.")
        continue

    for csv_file in csv_files:
        try:
            # Load CSV
            df = pd.read_csv(csv_file, index_col=0, parse_dates=True)

            # Normalize timestamps
            df.index = pd.to_datetime(df.index, errors='coerce').tz_localize(None)

            # Check required OHLC columns
            if not all(col in df.columns for col in ['open', 'high', 'low', 'close']):
                print(f"⚠️ {csv_file} missing OHLC columns, skipping.")
                continue

            # Generate temporary hybrid_signal
            df['hybrid_signal'] = generate_temp_signal(df)

            # Compute ATR if missing
            if 'atr' not in df.columns:
                high, low, close = df['high'].values, df['low'].values, df['close'].values
                tr = pd.Series(
                    [max(h-l, abs(h-close[i-1]), abs(l-close[i-1])) if i>0 else h-l
                     for i, (h, l) in enumerate(zip(high, low))],
                    index=df.index
                )
                df['atr'] = tr.rolling(14).mean().fillna(1e-5).clip(lower=1e-5)

            # Save as pickle
            pkl_file = SAVE_FOLDER / f"{csv_file.stem}.pkl"
            df.to_pickle(pkl_file)
            print(f"✅ Saved {pkl_file} with temporary hybrid_signal")

        except Exception as e:
            print(f"❌ Failed to process {csv_file}: {e}")

print("🎯 All CSVs processed, hybrid_signal generated, and converted to .pkl for backtest.")


✅ CSV folder: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models
✅ Save folder: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/combined_data
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
⚠️ No CSV files found for EUR/USD, skipping.
⚠️ No CSV files found for GBP/USD, skipping.
⚠️ No CSV files found for USD/JPY, skipping.
⚠️ No CSV files found for AUD/USD, skipping.
🎯 All CSVs processed, hybrid_signal generated, and converted to .pkl for backtest.


In [12]:
#!/usr/bin/env python3
"""
Hybrid Vectorized Backtest + GA + Momentum-aware Live Browserless Signals + Email
- Multi-timeframe calculations preserved
- GA loads previous population and continues from last generation
- Live signals with SL, TP, and 1-100 scoring
- High-confidence trade flags included
- Signals sent via Gmail (hardcoded App password)
- GA evaluation fully parallelized
- Safe for GitHub Actions and Colab (path-agnostic)
"""

# ======================================================
# Initialization
# ======================================================
import os
import sys
import json
import pickle
import random
import re
from pathlib import Path
import numpy as np
import pandas as pd
import requests
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from joblib import Parallel, delayed
import ta  # technical indicators

# -----------------------------
# 1️⃣ Detect environment
# -----------------------------
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

IN_GHA = "GITHUB_ACTIONS" in os.environ

# -----------------------------
# 2️⃣ Safe working folder
# -----------------------------
ROOT_PATH = Path("/content") if IN_COLAB else Path(".")
SAVE_FOLDER = ROOT_PATH / "forex-alpha-models" / "combined_with_indicators"
SAVE_FOLDER.mkdir(parents=True, exist_ok=True)
os.chdir(SAVE_FOLDER)
print(f"✅ Working directory: {SAVE_FOLDER.resolve()}")

# -----------------------------
# 3️⃣ Git config
# -----------------------------
GIT_NAME = os.environ.get("GIT_USER_NAME", "Forex AI Bot")
GIT_EMAIL = os.environ.get("GIT_USER_EMAIL", "nakatonabira3@gmail.com")
os.system(f'git config --global user.name "{GIT_NAME}"')
os.system(f'git config --global user.email "{GIT_EMAIL}"')
print(f"✅ Git configured: {GIT_NAME} <{GIT_EMAIL}>")

# -----------------------------
# 4️⃣ Read tokens
# -----------------------------
FOREX_PAT = os.environ.get("FOREX_PAT")
BROWSERLESS_TOKEN = os.environ.get("BROWSERLESS_TOKEN")

if not FOREX_PAT and IN_GHA:
    print("⚠️ Warning: FOREX_PAT not found in GitHub Actions secrets")
if not BROWSERLESS_TOKEN:
    print("⚠️ Warning: BROWSERLESS_TOKEN not found")

# -----------------------------
# 5️⃣ Safe subfolders for outputs
# -----------------------------
BEST_CHROM_FILE = SAVE_FOLDER / "best_chromosome.pkl"
TRADE_MEMORY_FILE = SAVE_FOLDER / "trade_memory.pkl"
POPULATION_FILE = SAVE_FOLDER / "population.pkl"
GEN_COUNT_FILE = SAVE_FOLDER / "generation_count.pkl"
SIGNALS_JSON_PATH = SAVE_FOLDER / "broker_signals.json"

for path in [SAVE_FOLDER, BEST_CHROM_FILE, TRADE_MEMORY_FILE, POPULATION_FILE, GEN_COUNT_FILE, SIGNALS_JSON_PATH]:
    path.parent.mkdir(parents=True, exist_ok=True)

print(f"✅ Output paths ready: {SAVE_FOLDER}")

# -----------------------------
# 6️⃣ Python environment info
# -----------------------------
print(f"Python version: {sys.version}")
print(f"Current directory: {os.getcwd()}")

# -----------------------------
# CONFIG
# -----------------------------
pairs = ['EUR/USD', 'GBP/USD', 'USD/JPY', 'AUD/USD']
ATR_PERIOD = 14
MIN_ATR = 1e-5
BASE_CAPITAL = 100
MAX_POSITION_FRACTION = 0.1
POPULATION_SIZE = 12
GENERATIONS = 10
MUTATION_RATE = 0.2
EARLY_STOPPING = 5
TOURNAMENT_SIZE = 3
EPS = 1e-8

# -----------------------------
# Gmail Config
# -----------------------------
GMAIL_USER = "nakatonabira3@gmail.com"
GMAIL_APP_PASSWORD = "gmwohahtltmcewug"

# -----------------------------
# Browserless Fetch
# -----------------------------
def fetch_live_rate(pair: str, timeout: int = 8) -> float:
    from_currency, to_currency = pair.split('/')
    token = BROWSERLESS_TOKEN
    if not token:
        print(f"⚠️ No BROWSERLESS_TOKEN found for {pair}.")
        return 0.0
    url = f"https://production-sfo.browserless.io/content?token={token}"
    payload = {"url": f"https://www.x-rates.com/calculator/?from={from_currency}&to={to_currency}&amount=1"}
    try:
        res = requests.post(url, json=payload, timeout=timeout)
        match = re.search(r'ccOutputRslt[^>]*>([\d,.]+)', res.text)
        return float(match.group(1).replace(',', '')) if match else 0.0
    except Exception as e:
        print(f"⚠️ fetch_live_rate error for {pair}: {e}")
        return 0.0

# -----------------------------
# Data Preparation Helpers
# -----------------------------
def make_index_tz_naive(df):
    if isinstance(df.index, pd.DatetimeIndex):
        df.index = pd.to_datetime(df.index, errors='coerce')
        if df.index.tz is not None:
            df.index = df.index.tz_convert(None)
    return df

def seed_hybrid_signal_if_needed(df):
    if 'hybrid_signal' not in df.columns:
        df['hybrid_signal'] = 0.0
    # Stronger seeding with RSI + SMA
    if df['hybrid_signal'].abs().sum() < 1e-6:
        rsi = ta.momentum.rsi(df['close'], window=14)
        sma_fast = df['close'].rolling(10, min_periods=1).mean()
        sma_slow = df['close'].rolling(50, min_periods=1).mean()
        df['hybrid_signal'] = (np.sign(sma_fast - sma_slow) + np.sign(rsi - 50)) / 2.0
    df['hybrid_signal'] = df['hybrid_signal'].fillna(0).astype(float)
    return df

def ensure_atr(df):
    if 'atr' in df.columns and not df['atr'].isnull().all():
        df['atr'] = df['atr'].fillna(MIN_ATR).clip(lower=MIN_ATR)
        return df
    high, low, close = df['high'].values, df['low'].values, df['close'].values
    tr = np.maximum.reduce([
        high - low,
        np.abs(high - np.roll(close, 1)),
        np.abs(low - np.roll(close, 1))
    ])
    tr[0] = high[0] - low[0] if len(tr) > 0 else MIN_ATR
    df['atr'] = pd.Series(tr, index=df.index).rolling(ATR_PERIOD, min_periods=1).mean().fillna(MIN_ATR).clip(lower=MIN_ATR)
    return df

def load_combined_data(folder):
    combined_data = {}
    for pair in pairs:
        combined_data[pair] = {}
        prefix = pair.replace('/', '_')
        for p in Path(folder).glob(f"{prefix}_*_combined.pkl"):
            tf_name = p.name.replace(f"{prefix}_", "").replace("_combined.pkl", "")
            try:
                df = pd.read_pickle(p)
                df = make_index_tz_naive(df)
                if not all(c in df.columns for c in ['open', 'high', 'low', 'close']):
                    continue
                df = seed_hybrid_signal_if_needed(df)
                df = ensure_atr(df)
                combined_data[pair][tf_name] = df
            except Exception as e:
                print(f"⚠️ Failed to load {p}: {e}")
    return combined_data

# -----------------------------
# Vectorized Backtest
# -----------------------------
def run_vector_backtest_vectorized(combined_data, capital, base_risk, atr_sl, atr_tp, conf_mult, tf_weights_per_pair, trade_memory=None):
    if trade_memory is None:
        trade_memory = {pair: [] for pair in combined_data.keys()}
    results = {}
    precomputed = {}
    for pair, tfs in combined_data.items():
        if not tfs:
            results[pair] = {'equity_curve': pd.Series([capital]), 'total_pnl': 0, 'max_drawdown': 0}
            continue
        all_idxs = sorted(set().union(*[set(pd.to_datetime(df.index, errors='coerce')) for df in tfs.values()]))
        df_all = pd.DataFrame(index=all_idxs)
        for tf_name, df in tfs.items():
            df_all[f'close_{tf_name}'] = df['close'].reindex(df_all.index).ffill()
            df_all[f'signal_{tf_name}'] = df['hybrid_signal'].reindex(df_all.index).ffill().fillna(0.0)
            df_all[f'atr_{tf_name}'] = df['atr'].reindex(df_all.index).ffill().fillna(MIN_ATR).clip(lower=MIN_ATR)
        df_all['price'] = df_all[[c for c in df_all.columns if c.startswith('close_')]].mean(axis=1).clip(lower=EPS)
        df_all['atr'] = df_all[[c for c in df_all.columns if c.startswith('atr_')]].mean(axis=1).clip(lower=MIN_ATR)
        precomputed[pair] = df_all

    for pair, df_all in precomputed.items():
        tfs = combined_data.get(pair, {})
        if not tfs:
            continue
        agg_signal = sum([df_all[f'signal_{tf}'] * tf_weights_per_pair.get(pair, {}).get(tf, 0.0) for tf in tfs.keys()])
        mean_abs_signal = np.mean([df_all[f'signal_{tf}'].abs().mean() for tf in tfs.keys()]) if tfs else 0.0
        conf_threshold = conf_mult * (mean_abs_signal + EPS)
        df_all['agg_signal'] = np.where(np.abs(agg_signal) >= conf_threshold, agg_signal, 0.0)
        price, atr, agg_signal = df_all['price'].values, df_all['atr'].values, df_all['agg_signal'].values
        n = len(price)
        if n <= 1:
            results[pair] = {'equity_curve': pd.Series([capital]), 'total_pnl': 0, 'max_drawdown': 0}
            continue
        memory_factor = 1.0
        if trade_memory.get(pair):
            total_prev_pnl = sum([float(tr.get('pnl', 0)) for tr in trade_memory[pair]])
            memory_factor = max(0.2, 1.0 + total_prev_pnl / max(1.0, capital * 10.0))
        size = (capital * base_risk * np.abs(agg_signal)) / (atr_sl * (atr / price) + EPS)
        size = np.minimum(size * memory_factor, capital * MAX_POSITION_FRACTION)
        size = np.nan_to_num(size, nan=0.0, posinf=capital * MAX_POSITION_FRACTION)
        direction = np.sign(agg_signal)
        pnl = direction * size * (atr_tp * atr / price)
        equity = np.zeros(n, dtype=float)
        equity[0] = capital
        for i in range(1, n):
            equity[i] = equity[i - 1] + float(pnl[i])
        trade_memory.setdefault(pair, []).append({'equity': float(equity[-1]), 'pnl': float(equity[-1] - capital)})
        if len(trade_memory[pair]) > 200:
            trade_memory[pair] = trade_memory[pair][-200:]
        equity_series = pd.Series(equity, index=df_all.index)
        results[pair] = {
            'equity_curve': equity_series,
            'total_pnl': float(equity[-1] - capital),
            'max_drawdown': float((equity_series.cummax() - equity_series).max()),
        }
    total_pnl = sum([r['total_pnl'] for r in results.values()])
    max_dd = max([r['max_drawdown'] for r in results.values()] or [0.0])
    score = total_pnl / (1.0 + max_dd) if (1.0 + max_dd) != 0 else total_pnl
    return score, results, trade_memory

# -----------------------------
# GA Functions
# -----------------------------
def build_tf_names(combined_data):
    return {pair: sorted(list(combined_data[pair].keys())) for pair in pairs}

def create_chromosome(tf_names_map):
    chrom = [random.uniform(1.0, 2.0), random.uniform(2.0, 4.0), random.uniform(0.005, 0.02), random.uniform(0.3, 0.7)]
    for pair in pairs:
        n = max(1, len(tf_names_map.get(pair, [])))
        w = np.random.dirichlet(np.ones(n)).tolist()
        chrom.extend(w)
    return chrom

def decode_chromosome(chrom, tf_names_map):
    atr_sl, atr_tp, base_risk, conf = chrom[:4]
    tf_weights_per_pair = {}
    idx = 4
    for pair in pairs:
        n = max(1, len(tf_names_map.get(pair, [])))
        w = np.array(chrom[idx:idx + n], dtype=float)
        if w.sum() <= 0:
            w = np.ones_like(w) / float(len(w))
        else:
            w = w / (w.sum() + EPS)
        tf_list = tf_names_map.get(pair, [])
        tf_weights_per_pair[pair] = {tf: float(weight) for tf, weight in zip(tf_list, w)} if tf_list else {}
        idx += n
    return atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair

def tournament_selection(scored_population, k=TOURNAMENT_SIZE):
    selected = random.sample(scored_population, k)
    selected.sort(reverse=True, key=lambda x: x[0])
    return selected[0][1]

def run_ga_vectorized_parallel(combined_data, generations=GENERATIONS, population_size=POPULATION_SIZE, mutation_rate=MUTATION_RATE):
    tf_names_map = build_tf_names(combined_data)

    if os.path.exists(POPULATION_FILE):
        try:
            population = pickle.load(open(POPULATION_FILE, 'rb'))
        except:
            population = [create_chromosome(tf_names_map) for _ in range(population_size)]
    else:
        population = [create_chromosome(tf_names_map) for _ in range(population_size)]

    trade_memory = {}
    if os.path.exists(TRADE_MEMORY_FILE):
        try:
            trade_memory = pickle.load(open(TRADE_MEMORY_FILE, 'rb'))
        except:
            trade_memory = {}

    last_gen = 0
    if os.path.exists(GEN_COUNT_FILE):
        try:
            last_gen = pickle.load(open(GEN_COUNT_FILE, 'rb'))
        except:
            last_gen = 0

    best_score_ever = -np.inf
    best_chrom_ever = None
    early_stop_counter = 0

    def evaluate_chrom(chrom):
        atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair = decode_chromosome(chrom, tf_names_map)
        score, _, _ = run_vector_backtest_vectorized(combined_data, BASE_CAPITAL, base_risk, atr_sl, atr_tp, conf, tf_weights_per_pair, trade_memory)
        return score, chrom

    for gen in range(last_gen + 1, last_gen + 1 + generations):
        scored_population = Parallel(n_jobs=-1)(delayed(evaluate_chrom)(c) for c in population)
        scored_population.sort(reverse=True, key=lambda x: x[0])
        best_score, best_chrom = scored_population[0]

        if best_score < best_score_ever:
            best_score, best_chrom = best_score_ever, best_chrom_ever

        print(f"=== Generation {gen} === Best Score: {best_score:.2f}")

        if best_score > best_score_ever:
            best_score_ever, best_chrom_ever, early_stop_counter = best_score, best_chrom, 0
        else:
            early_stop_counter += 1
            if early_stop_counter >= EARLY_STOPPING:
                print("⚠️ Early stopping triggered.")
                break

        next_population = [best_chrom]
        while len(next_population) < population_size:
            p1, p2 = tournament_selection(scored_population), tournament_selection(scored_population)
            child = [(a + b) / 2 for a, b in zip(p1, p2)]
            child = [c * random.uniform(0.95, 1.05) if random.random() < mutation_rate else c for c in child]
            next_population.append(child)
        population = next_population

        # Save population
        for path, data in [(POPULATION_FILE, population), (TRADE_MEMORY_FILE, trade_memory), (BEST_CHROM_FILE, best_chrom_ever), (GEN_COUNT_FILE, gen)]:
            path.parent.mkdir(parents=True, exist_ok=True)
            pickle.dump(data, open(path, 'wb'))

    print("✅ GA complete. Best chromosome saved.")
    return best_chrom_ever, trade_memory

# -----------------------------
# Live Signal Generation
# -----------------------------
def generate_live_signals_with_sl_tp(best_chrom, combined_data):
    tf_names_map = build_tf_names(combined_data)
    atr_sl, atr_tp, base_risk, conf, tf_weights_per_pair = decode_chromosome(best_chrom, tf_names_map)
    live_signals = {}
    prev_signals = {}

    if os.path.exists(SIGNALS_JSON_PATH):
        try:
            prev_data = json.load(open(SIGNALS_JSON_PATH, 'r'))
            prev_signals = {pair: data.get('strength', 0.0) for pair, data in prev_data.get("pairs", {}).items()}
        except:
            prev_signals = {}

    for pair in pairs:
        tfs = combined_data.get(pair, {})
        price = fetch_live_rate(pair)
        if price <= 0:
            price = np.mean([df['close'].iloc[-1] for df in tfs.values()]) if tfs else 1.0
        signal_strength = sum([tf_weights_per_pair.get(pair, {}).get(tf, 0.0) * tfs[tf]['hybrid_signal'].iloc[-1] for tf in tf_names_map.get(pair, [])])
        prev_strength = prev_signals.get(pair, 0.0)
        if np.sign(signal_strength) != np.sign(prev_strength):
            signal_strength = 0.7 * prev_strength + 0.3 * signal_strength
        direction = "BUY" if signal_strength > 0 else "SELL" if signal_strength < 0 else "HOLD"
        recent_atr = np.mean([tfs[tf]['atr'].iloc[-1] for tf in tf_names_map.get(pair, [])]) if tfs else 1.0
        score_100 = min(max(int(100 * (abs(signal_strength) / (recent_atr + EPS)) ** 0.5), 1), 100)
        SL = price - atr_sl * recent_atr * 0.5 if direction == "BUY" else price + atr_sl * recent_atr * 0.5
        TP = price + atr_tp * recent_atr * 1.0 if direction == "BUY" else price - atr_tp * recent_atr * 1.0
        high_conf = score_100 >= 80
        live_signals[pair] = {"direction": direction, "strength": float(signal_strength), "score_1_100": score_100, "last_price": float(price), "SL": float(SL), "TP": float(TP), "high_confidence": high_conf}

    with open(SIGNALS_JSON_PATH, 'w') as f:
        json.dump({"timestamp": pd.Timestamp.now().isoformat(), "pairs": live_signals}, f, indent=2)

    print(f"📡 Live signals saved to {SIGNALS_JSON_PATH}")
    return live_signals

# -----------------------------
# Email Function
# -----------------------------
def send_forex_email(signals, recipient="nakatonabira3@gmail.com"):
    def fmt(price, pair=""):
        decimals = 3 if "JPY" in pair else 4
        return f"{price:.{decimals}f}" if price else "-"

    today = pd.Timestamp.now().strftime("%Y-%m-%d")
    flags = {"USD": "🇺🇸", "EUR": "🇪🇺", "GBP": "🇬🇧", "JPY": "🇯🇵", "AUD": "🇦🇺"}
    rows = ""
    for pair, d in signals.items():
        f1, f2 = pair.split("/")
        flag_str = f"{flags.get(f1, '')} {flags.get(f2, '')}"
        conf = "🔥" if d.get("high_confidence") else ""
        rows += f"""<tr><td>{flag_str} {pair}</td><td>{fmt(d['last_price'], pair)}</td><td>{d['direction']}</td><td>{d['score_1_100']} {conf}</td><td>SL:{fmt(d['SL'], pair)} | TP:{fmt(d['TP'], pair)}</td></tr>"""

    html = f"""<html><body><h2>Forex Signals - {today}</h2><table border="1" style="border-collapse:collapse;text-align:center;"><tr><th>Instrument</th><th>Price</th><th>Signal</th><th>Score</th><th>SL/TP</th></tr>{rows}</table></body></html>"""
    msg = MIMEMultipart("alternative")
    msg['From'] = f"Forex Bot <{GMAIL_USER}>"
    msg['To'] = recipient
    msg['Subject'] = f"Forex Signals - {today}"
    msg.attach(MIMEText(html, "html"))

    try:
        with smtplib.SMTP_SSL("smtp.gmail.com", 465) as s:
            s.login(GMAIL_USER, GMAIL_APP_PASSWORD)
            s.sendmail(GMAIL_USER, recipient, msg.as_string())
        print(f"📧 Email sent to {recipient}")
    except Exception as e:
        print(f"⚠️ Email send failed: {e}")

# -----------------------------
# MAIN
# -----------------------------
if __name__ == "__main__":
    print("Loading combined data...")
    combined_data = load_combined_data(SAVE_FOLDER)

    print("🎯 Running GA optimization...")
    best_chrom, trade_memory = run_ga_vectorized_parallel(combined_data)

    print("📡 Generating live signals...")
    signals = generate_live_signals_with_sl_tp(best_chrom, combined_data)

    print(json.dumps(signals, indent=2))

    print("📨 Sending email...")
    send_forex_email(signals)


✅ Working directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/combined_with_indicators/forex-alpha-models/combined_with_indicators
✅ Git configured: Rahim AI Bot <nakatonabira3@gmail.com>
✅ Output paths ready: forex-alpha-models/combined_with_indicators
Python version: 3.11.13 (main, Jun  4 2025, 04:12:12) [GCC 13.3.0]
Current directory: /home/runner/work/forex-ai-models/forex-ai-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/forex-alpha-models/combined_with_indicators
Loading combined data...
🎯 Running GA optimization...


=== Generation 1 === Best Score: 0.00
=== Generation 2 === Best Score: 0.00
=== Generation 3 === Best Score: 0.00
=== Generation 4 === Best Score: 0.00
=== Generation 5 === Best Score: 0.00
=== Generation 6 === Best Score: 0.00
⚠️ Early stopping triggered.
✅ GA complete. Best chromosome saved.
📡 Generating live signals...


📡 Live signals saved to forex-alpha-models/combined_with_indicators/broker_signals.json
{
  "EUR/USD": {
    "direction": "HOLD",
    "strength": 0.0,
    "score_1_100": 1,
    "last_price": 1.163,
    "SL": 2.044641891626124,
    "TP": -1.0761722004366863,
    "high_confidence": false
  },
  "GBP/USD": {
    "direction": "HOLD",
    "strength": 0.0,
    "score_1_100": 1,
    "last_price": 1.34,
    "SL": 2.221641891626124,
    "TP": -0.8991722004366862,
    "high_confidence": false
  },
  "USD/JPY": {
    "direction": "HOLD",
    "strength": 0.0,
    "score_1_100": 1,
    "last_price": 152.6,
    "SL": 153.4816418916261,
    "TP": 150.3608277995633,
    "high_confidence": false
  },
  "AUD/USD": {
    "direction": "HOLD",
    "strength": 0.0,
    "score_1_100": 1,
    "last_price": 0.6587,
    "SL": 1.5403418916261238,
    "TP": -1.5804722004366862,
    "high_confidence": false
  }
}
📨 Sending email...


📧 Email sent to nakatonabira3@gmail.com
