## Multi-Asset Directional Change Optimization via NSGA-II

**Objective:**  
Optimize a **universal scaling factor** $k$ such that:

$
\theta_i = k \cdot \sigma_i
$

where $\sigma_i$ is the log-return volatility of asset *i*.

**Notes:**
- Use your calibrated squash parameter  
- Employs dimensionless scores for robustness

In [1]:
import numpy as np
import random
import plotly.express as px
import pandas as pd
from deap import base, creator, tools, algorithms
from multiprocessing import Pool
from functools import partial

from core.data import load_all_klines
from core.utils import select_least_correlated_with
from core.dc import compute_directional_change_events, attach_OSV_EXT_to_runs
from core.opt import event_density_score, up_down_asymmetry

# --------------------------------------------------------------------------
# Load and prepare data
# --------------------------------------------------------------------------
df = load_all_klines(
    root="data/data/spot/monthly/klines/",
    interval="1h",
    range_folder="2017-01-01_2025-10-08",
    min_years=4,
)

df_btc_diverse, _ = select_least_correlated_with(
    df, reference_symbol="BTCUSDT", quantile=0.5
)
symbols = df_btc_diverse.index.get_level_values("Symbol").unique().tolist()
print(f"Total symbols in filtered dataset: {len(symbols)}")

all_prices, volatilities = {}, {}
for symbol in symbols:
    prices = df_btc_diverse.loc[symbol, "Close"].dropna().to_numpy()
    if len(prices) < 50:
        continue
    sigma = np.std(np.diff(np.log(prices)))
    if sigma <= 1e-8:
        continue
    all_prices[symbol], volatilities[symbol] = prices, sigma

SQUASH_VALUE = 1.76


# --------------------------------------------------------------------------
# Evaluation function (top-level, picklable)
# --------------------------------------------------------------------------
def evaluate_k(individual, all_prices, volatilities, d_target=0.002, squash=4.0):
    k = individual[0]
    dens_sum = asym_sum = count = 0

    for symbol, prices in all_prices.items():
        sigma = volatilities[symbol]
        theta = k * sigma
        events, runs = compute_directional_change_events(prices, theta)
        if not events:
            continue
        runs = attach_OSV_EXT_to_runs(runs, theta)
        runs = [r for r in runs if r.get("OSV_EXT") is not None]
        if len(runs) < 2:
            continue
        dens = event_density_score(
            prices, events, d_target=d_target, alpha=2.0, beta=1.5
        )
        mu_up, mu_down = up_down_asymmetry(runs, theta, squash=squash)
        dens_sum += dens
        asym_sum += (mu_up + mu_down) / 2
        count += 1

    if count == 0:
        return 0.0, 0.0
    return dens_sum / count, asym_sum / count


# --------------------------------------------------------------------------
# NSGA-II optimization
# --------------------------------------------------------------------------
def run_nsga2_global_k(
    all_prices,
    volatilities,
    d_target=0.003,
    squash=4.0,
    k_low=8.0,
    k_high=52.0,
    ngen=40,
    pop_size=50,
    seed=42,
    n_jobs=8,
):
    random.seed(seed)
    np.random.seed(seed)

    if hasattr(creator, "FitnessMulti"):
        del creator.FitnessMulti
    if hasattr(creator, "Individual"):
        del creator.Individual

    creator.create("FitnessMulti", base.Fitness, weights=(1.0, 1.0))
    creator.create("Individual", list, fitness=creator.FitnessMulti)

    toolbox = base.Toolbox()
    toolbox.register("attr_k", lambda: random.uniform(k_low, k_high))
    toolbox.register(
        "individual", tools.initRepeat, creator.Individual, toolbox.attr_k, n=1
    )
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    # Bind static args to top-level evaluate function
    toolbox.register(
        "evaluate",
        partial(
            evaluate_k,
            all_prices=all_prices,
            volatilities=volatilities,
            d_target=d_target,
            squash=squash,
        ),
    )

    toolbox.register("mate", tools.cxBlend, alpha=0.5)
    toolbox.register(
        "mutate", tools.mutPolynomialBounded, low=k_low, up=k_high, eta=20.0, indpb=1.0
    )
    toolbox.register("select", tools.selNSGA2)

    # Multiprocessing pool
    pool = Pool(processes=n_jobs)
    toolbox.register("map", pool.map)

    pop = toolbox.population(n=pop_size)
    hof = tools.ParetoFront()

    print("Starting NSGA-II optimization...")
    algorithms.eaMuPlusLambda(
        population=pop,
        toolbox=toolbox,
        mu=pop_size,
        lambda_=2 * pop_size,
        cxpb=0.7,
        mutpb=0.3,
        ngen=ngen,
        halloffame=hof,
        verbose=True,
    )
    pool.close()
    pool.join()

    pareto_ks = np.array([ind[0] for ind in hof])
    pareto_scores = np.array([ind.fitness.values for ind in hof])
    return pareto_ks, pareto_scores


# --------------------------------------------------------------------------
# Run optimization
# --------------------------------------------------------------------------
print("\nRunning global NSGA-II optimization over k...")
pareto_ks, pareto_scores = run_nsga2_global_k(
    all_prices=all_prices,
    volatilities=volatilities,
    d_target=0.0035,
    squash=SQUASH_VALUE,
    k_low=4.0,
    k_high=64.0,
    ngen=80,  # 80 generations
    pop_size=150,  # larger population
    seed=42,
    n_jobs=32,  # if you have 32 threads/cores
)

if len(pareto_ks) == 0:
    raise RuntimeError("No Pareto-optimal solutions found!")

# --------------------------------------------------------------------------
# Select best solution and print results
# --------------------------------------------------------------------------
totals = pareto_scores.sum(axis=1)
best_idx = np.argmax(totals)
best_k = pareto_ks[best_idx]
best_density, best_asym = pareto_scores[best_idx]

print("\n" + "=" * 60)
print("✅ FINAL RECOMMENDATION")
print("=" * 60)
print(f"Global squash parameter:       {SQUASH_VALUE:.2f}")
print(f"Optimal universal k:           {best_k:.2f}")
print(f"  → Avg event density score:   {best_density:.4f}")
print(f"  → Avg asymmetry score:       {best_asym:.4f}")
print("\nUse for any asset:")
print(f"  θ = {best_k:.2f} × asset_volatility")
print("=" * 60)

# --------------------------------------------------------------------------
# Plot Pareto front with Plotly Express (interactive)
# --------------------------------------------------------------------------

# Prepare data frame for convenience
pareto_df = pd.DataFrame(
    {
        "Event Density Score": pareto_scores[:, 0],
        "Asymmetry Score": pareto_scores[:, 1],
        "k": pareto_ks,
    }
)

# Mark the best individual
pareto_df["is_best"] = False
pareto_df.loc[best_idx, "is_best"] = True

# Base plot: color by 'k', use smaller markers
fig = px.scatter(
    pareto_df,
    x="Event Density Score",
    y="Asymmetry Score",
    color="k",
    color_continuous_scale="plasma",
    hover_data={"k": True, "Event Density Score": ":.4f", "Asymmetry Score": ":.4f"},
    labels={"k": "k (θ = k·σ)"},
    title="Pareto Front: Global k Optimization",
)

# Highlight best point in red
best_point = pareto_df.loc[best_idx]
fig.add_scatter(
    x=[best_point["Event Density Score"]],
    y=[best_point["Asymmetry Score"]],
    mode="markers",
    marker=dict(color="red", size=14, line=dict(color="black", width=1)),
    name=f"Selected Optimum (k = {best_k:.2f})",
    hovertemplate=(
        f"<b>Selected Optimum</b><br>"
        f"k = {best_k:.2f}<br>"
        f"Event Density: {best_point['Event Density Score']:.4f}<br>"
        f"Asymmetry: {best_point['Asymmetry Score']:.4f}<extra></extra>"
    ),
)

# Adjust layout aesthetics
fig.update_traces(marker=dict(size=7, opacity=0.85), selector=dict(mode="markers"))
fig.update_layout(
    width=900,
    height=700,
    legend=dict(yanchor="bottom", y=0.02, xanchor="right", x=0.98),
    template="plotly_white",
)

fig.show()


Dropped 3 symbols with < 4 years of data:
  GASUSDT: 928 days
  GLMUSDT: 928 days
  SNTUSDT: 697 days

Loaded 32 symbols | Shape: (1795894, 11)
Correlation threshold vs BTCUSDT (q=0.50): 0.177
Selected symbols: ['BTCUSDT', 'BATUSDT', 'DASHUSDT', 'DCRUSDT', 'DGBUSDT', 'GNOUSDT', 'IOTAUSDT', 'LRCUSDT', 'MANAUSDT', 'NEOUSDT', 'NMRUSDT', 'QTUMUSDT', 'SCUSDT', 'XTZUSDT', 'XVGUSDT', 'ZECUSDT', 'ZENUSDT']
Total symbols in filtered dataset: 17

Running global NSGA-II optimization over k...
Starting NSGA-II optimization...
gen	nevals
0  	150   
1  	300   
2  	300   
3  	300   
4  	300   
5  	300   
6  	300   
7  	300   
8  	300   
9  	300   
10 	300   
11 	300   
12 	300   
13 	300   
14 	300   
15 	300   
16 	300   
17 	300   
18 	300   
19 	300   
20 	300   
21 	300   
22 	300   
23 	300   
24 	300   
25 	300   
26 	300   
27 	300   
28 	300   
29 	300   
30 	300   
31 	300   
32 	300   
33 	300   
34 	300   
35 	300   
36 	300   
37 	300   
38 	300   
39 	300   
40 	300   
41 	300   
42 	30