# ETF Analysis Pipeline

Runs the full analysis pipeline via thin wrappers over `notebooks/scripts/s1-s6`.

| Section | Script | Description |
|---------|--------|-------------|
| 0 | - | Setup & Configuration |
| 1 | s1_universe | Universe Discovery (~5,000 ETFs) |
| 2 | s2_collect | Historical Data Collection (IB + yfinance) |
| 3 | s3_factors | Factor Scoring (Mom/Qual/Val/Vol) |
| 4 | s4_optimize | Portfolio Construction |
| 5 | s5_backtest | Backtesting |
| 6 | s6_trades | Trade Recommendations |

Output: `~/trading/live_portfolio/trade_plan.csv` â†’ used by `02_execute_trades.ipynb`

---
## Section 0: Setup

In [None]:
import sys
import warnings
from pathlib import Path

import nest_asyncio
nest_asyncio.apply()
warnings.filterwarnings("ignore")

PROJECT_ROOT = Path.cwd().parent
sys.path.insert(0, str(PROJECT_ROOT / "src"))
sys.path.insert(0, str(PROJECT_ROOT / "notebooks"))

DATA_DIR = Path.home() / "trade_data" / "ETFTrader"
RAW_DIR = DATA_DIR / "raw"
PROCESSED_DIR = DATA_DIR / "processed"
IB_CACHE_DIR = DATA_DIR / "ib_historical"
LIVE_DIR = Path.home() / "trading" / "live_portfolio"

for d in [PROCESSED_DIR, LIVE_DIR, IB_CACHE_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# Strategy parameters
FACTOR_WEIGHTS = {"momentum": 0.35, "quality": 0.30, "value": 0.15, "volatility": 0.20}
NUM_POSITIONS = 20
OPTIMIZER_TYPE = "rankbased"
REBALANCE_FREQ = "bimonthly"

# IB settings
IB_HOST = "127.0.0.1"
IB_PORT = 4001
IB_CLIENT_ID = 5

print(f"Data:       {DATA_DIR}")
print(f"Output:     {LIVE_DIR}")
print(f"Positions:  {NUM_POSITIONS}")
print(f"Rebalance:  {REBALANCE_FREQ}")

---
## Section 1: Universe Discovery

In [None]:
from scripts.s1_universe import discover_universe

all_tickers, categories, universe_df = discover_universe(PROJECT_ROOT)
print(f"Universe: {len(all_tickers)} ETFs")

---
## Section 2: Data Collection

Set `RUN_COLLECTION = True` to connect to IB Gateway.

In [None]:
RUN_COLLECTION = False  # Set True to connect to IB

from scripts.s2_collect import collect_data, apply_quality_filter

prices = collect_data(
    tickers=all_tickers,
    ib_cache_dir=IB_CACHE_DIR,
    processed_dir=PROCESSED_DIR,
    ib_host=IB_HOST, ib_port=IB_PORT, ib_client_id=IB_CLIENT_ID,
    run_collection=RUN_COLLECTION,
)

if prices is not None:
    prices = apply_quality_filter(prices)
    print(f"Prices: {prices.shape[1]} tickers x {prices.shape[0]} days")

---
## Section 3: Factor Scoring

In [None]:
from scripts.s3_factors import score_factors

combined_scores, prices_basic = score_factors(
    prices, factor_weights=FACTOR_WEIGHTS,
    categories=categories, raw_dir=RAW_DIR,
)
print(f"Scores: {len(combined_scores)} tickers")
combined_scores.head(20)

---
## Section 4: Portfolio Construction

In [None]:
from scripts.s4_optimize import build_portfolio

target_weights = build_portfolio(
    combined_scores, prices_basic,
    num_positions=NUM_POSITIONS,
    optimizer_type=OPTIMIZER_TYPE,
)

target_weights.to_csv(LIVE_DIR / "target_portfolio_latest.csv", header=True)
print(f"Portfolio: {len(target_weights)} positions")
target_weights

---
## Section 5: Backtesting

In [None]:
from scripts.s5_backtest import run_backtest

results = run_backtest(
    prices_basic, combined_scores,
    rebalance_frequency=REBALANCE_FREQ,
    num_positions=NUM_POSITIONS,
)

m = results["metrics"]
print(f"CAGR:     {m.get('cagr', 0):.1%}")
print(f"Sharpe:   {m.get('sharpe_ratio', 0):.2f}")
print(f"Max DD:   {m.get('max_drawdown', 0):.1%}")
print(f"Rebal/yr: {m.get('num_rebalances', 0) / max(1, len(prices_basic) / 252):.1f}")

---
## Section 6: Trade Recommendations

Connects to IB, pulls live positions, generates trade plan with $70k cash reserve.

In [None]:
from scripts.s6_trades import generate_trades

trades = generate_trades(
    target_weights, LIVE_DIR,
    ib_host=IB_HOST, ib_port=IB_PORT,
)

if trades:
    import pandas as pd
    pd.DataFrame(trades)

---

**Next step:** Open `02_execute_trades.ipynb` to review, edit, and execute the trade plan.