# Troubleshooting brute-force baseline
# Goal: reproduce FVU stats for brute-force search on a simple SR toy problem and inspect the ranked candidates.

In [1]:
import numpy as np
import pandas as pd
from simplipy import SimpliPyEngine

from flash_ansr import SkeletonPool
from flash_ansr.baselines import BruteForceModel

In [2]:
# Build toy SR problem: y = 2*x1 + 3 with two features
engine = SimpliPyEngine.load("dev_7-3", install=True)

# Deterministic pool with two variables to keep search small but allow constant term
sample_strategy = {
    "n_operator_distribution": "equiprobable_lengths",
    "min_operators": 0,
    "max_operators": 2,
    "power": 1,
    "max_length": 4,
    "max_tries": 1,
    "independent_dimensions": True,
}

support_sampler_config = {
    "support_prior": {"name": "uniform", "kwargs": {"low": -1, "high": 1, "min_value": -1, "max_value": 1}},
    "n_support_prior": {"name": "uniform", "kwargs": {"low": 16, "high": 16, "min_value": 16, "max_value": 16}},
}

pool = SkeletonPool.from_dict(
    skeletons={("x1",), ("x2",), ("<constant>",)},
    simplipy_engine=engine,
    sample_strategy=sample_strategy,
    literal_prior={"name": "normal", "kwargs": {"loc": 0, "scale": 1}},
    variables=["x1", "x2"],
    support_sampler_config=support_sampler_config,
)

# Synthetic data
rng = np.random.default_rng(0)
X = rng.uniform(-2, 2, size=(64, 2)).astype(np.float32)
y = (2.0 * X[:, [0]]) + 3.0

model = BruteForceModel(
    simplipy_engine=engine,
    skeleton_pool=pool,
    max_expressions=64,
    max_length=3,
    include_constant_token=True,
    ignore_holdouts=True,
    n_restarts=2,
    refiner_p0_noise=None,
)

model.fit(X, y)
print(f"fit results: {len(model._results)} expressions explored")

fit results: 63 expressions explored


In [3]:
# Inspect ranked results with FVU and scores
cols = ["expression", "fvu", "score", "complexity"]
summary = model.results[cols].copy()
summary_sorted = summary.sort_values("score", ascending=True).reset_index(drop=True)
summary_sorted.head(12)

Unnamed: 0,expression,fvu,score,complexity
0,"[pow1_2, x2]",,-15.55356,2
1,"[pow1_2, x1]",,-15.55356,2
2,"[pow1_4, x2]",,-15.55356,2
3,"[pow1_4, x1]",,-15.55356,2
4,"[asin, x2]",,-15.55356,2
5,"[asin, x1]",,-15.55356,2
6,"[acos, x2]",,-15.55356,2
7,"[acos, x1]",,-15.55356,2
8,[<constant>],0.984375,0.043161,1
9,"[neg, <constant>]",0.984375,0.093161,2


In [4]:
# Show best and worst entries and what expression was chosen
best = summary_sorted.iloc[0]
worst = summary_sorted.iloc[-1]
chosen_expr = model.get_expression()
print("Best expression:", chosen_expr)
print("Best FVU:", best.fvu, "Score:", best.score)
print("Worst FVU:", worst.fvu, "Score:", worst.score, "Expr:", worst.expression)

# Evaluate the best prediction on the training data
best_pred = model.predict(X, nth_best=0)
residual_var = np.var(y - best_pred, ddof=1)
orig_var = np.var(y, ddof=1)
fvu_manual = float(residual_var / orig_var)
print("Manual FVU of best prediction:", fvu_manual)

Best expression: pow1_2(x2)
Best FVU: nan Score: -15.553559774527022
Worst FVU: 17201.85326942853 Score: 4.335575238845225 Expr: ['inv', 'x1']
Manual FVU of best prediction: nan


  return x ** 0.5
