In [1]:
import os
cwd =  os.getcwd().replace("notebooks/research","")
os.chdir(cwd)

In [2]:
# --- Robust notebook shim for legacy joblib artifacts expecting `encoders.*` ---
import sys, types, numpy as np

# Create/replace a lightweight 'encoders' module in sys.modules
enc_mod = types.ModuleType("encoders")

try:
    from sentence_transformers import SentenceTransformer
except Exception as e:
    SentenceTransformer = None
    print("NOTE: sentence-transformers not available:", e)

class _SBERTBase:
    """
    Compat shim implementing the sklearn Transformer API expected by saved Pipelines.
    Handles pickles that don't call __init__ and are missing attributes.
    Provides both class names: SBERTEncoder and SBERTFeaturizer.
    """
    # NOTE: __init__ might not be called during unpickle; use _ensure_attrs() everywhere.
    def __init__(self, model="sentence-transformers/all-MiniLM-L6-v2", **kwargs):
        self.model_name = model
        self._enc = None
        self._kwargs = kwargs

    def _ensure_attrs(self):
        # Add any attributes that might be missing from legacy pickles
        if not hasattr(self, "model_name") or self.model_name is None:
            self.model_name = "sentence-transformers/all-MiniLM-L6-v2"
        if not hasattr(self, "_enc"):
            self._enc = None
        if not hasattr(self, "_kwargs"):
            self._kwargs = {}

    def _ensure_encoder(self):
        self._ensure_attrs()
        if self._enc is None:
            if SentenceTransformer is None:
                raise RuntimeError(
                    "sentence-transformers not installed in this kernel; "
                    "pip install sentence-transformers && restart kernel"
                )
            self._enc = SentenceTransformer(self.model_name)

    # sklearn API
    def fit(self, X, y=None):
        self._ensure_attrs()
        return self

    def transform(self, X):
        self._ensure_encoder()
        return np.asarray(self._enc.encode(list(X), show_progress_bar=False))

    # some older code may call .encode directly; alias it
    def encode(self, X):
        return self.transform(X)

# Expose both legacy names on the encoders module
class SBERTEncoder(_SBERTBase): ...
class SBERTFeaturizer(_SBERTBase): ...

enc_mod.SBERTEncoder = SBERTEncoder
enc_mod.SBERTFeaturizer = SBERTFeaturizer
sys.modules["encoders"] = enc_mod

# Make sure your package code is importable too (if needed)
import pathlib
if str(pathlib.Path("src").resolve()) not in sys.path:
    sys.path.append(str(pathlib.Path("src").resolve()))
print("encoders shim ready (SBERTEncoder + SBERTFeaturizer) and sys.path configured")

encoders shim ready (SBERTEncoder + SBERTFeaturizer) and sys.path configured


In [3]:
import joblib
from pathlib import Path

def load_mapper():
    for name in [".artifacts/defi_mapper.joblib", ".artifacts/defi_mapper_embed.joblib"]:
        p = Path(name).resolve()
        if p.exists():
            print("Loading:", p.as_posix())
            return joblib.load(p.as_posix())
    raise FileNotFoundError("No mapper artifact found in .artifacts/")

pipe = load_mapper()
print(pipe)


Loading: /Users/ian_moore/repos/micro-lm/.artifacts/defi_mapper.joblib
Pipeline(steps=[('sbertencoder', <__main__.SBERTEncoder object at 0x3178fb370>),
                ('calibratedclassifiercv',
                 CalibratedClassifierCV(cv=3,
                                        estimator=LogisticRegression(C=8.0,
                                                                     class_weight='balanced',
                                                                     max_iter=2000,
                                                                     random_state=0),
                                        method='isotonic'))])


In [4]:
prompt = "supply 7.0245 SOL to maker"
pred  = pipe.predict([prompt])[0]
probs = pipe.predict_proba([prompt])[0]
print("Predicted:", pred)
print("Top-3:", sorted(zip(pipe.classes_, probs), key=lambda t: t[1], reverse=True)[:3])

  return forward_call(*args, **kwargs)


Predicted: deposit_asset
Top-3: [('deposit_asset', 1.0), ('borrow_asset', 0.0), ('claim_rewards', 0.0)]


  return forward_call(*args, **kwargs)


### 1) Imports (from ngeodesic.core)

In [5]:
from ngeodesic.core.pca_warp import pca3_and_warp
from ngeodesic.core.parser import (
    geodesic_parse_report,
    geodesic_parse_with_prior,
    geodesic_parse_report_conf,
)
from ngeodesic.core.matched_filter import half_sine_proto, nxcorr, null_threshold
from ngeodesic.core.denoise import TemporalDenoiser
from ngeodesic.core.energies import perpendicular_energy


### 2) Get token-time hidden states (SBERT → Transformers)

In [6]:
import torch
from transformers import AutoTokenizer, AutoModel

BASE = "sentence-transformers/all-MiniLM-L6-v2"
tok = AutoTokenizer.from_pretrained(BASE)
mdl = AutoModel.from_pretrained(BASE, output_hidden_states=True).eval()

def get_hidden_states(text: str, layer_offset: int = -4):
    with torch.no_grad():
        out = mdl(**tok(text, return_tensors="pt"))
    hs = out.hidden_states
    k  = max(-(len(hs)-1), min(layer_offset, -1))  # clamp to valid non-embedding
    return hs[k].squeeze(0).float().cpu().numpy()  # [T, H]


### 3) Fit the Stage-11 PCA warp once, then apply

In [7]:
import numpy as np

# (a) fit on a calibration batch
cal_texts = [
    "supply 7.0245 SOL to maker",
    "swap 10 ETH to USDC on uniswap",
    "borrow 500 USDC with ETH collateral at 70% ltv",
]
H = np.vstack([ get_hidden_states(t) for t in cal_texts ])   # (ΣT, H)
proj = pca3_and_warp(H)  # -> dict(mean, pcs, scales, center)

# (b) helper to apply the warp to a single prompt
def apply_warp_to_text(text: str):
    Z  = get_hidden_states(text)               # [T, H]
    Y  = (Z - proj["mean"]) @ proj["pcs"].T    # [T, 3]
    Yw = Y / (proj["scales"] + 1e-8)           # whitened [T, 3]
    return Yw


  return forward_call(*args, **kwargs)


### 4) Run the consolidated Stage-11 parser (no priors)

In [8]:
text = "swap 10 ETH to USDC on uniswap"
text = "supply 7.0245 SOL to maker"
Yw   = apply_warp_to_text(text)            # [T, 3]
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]       # three channels
keep, order = geodesic_parse_report(traces, sigma=9, proto_width=160)
keep, order

(['0'], ['0'])

In [9]:
keep, order, dbg = geodesic_parse_report_conf(traces, sigma=9, proto_width=160)
dbg["channels"]

{'0': {'peak_idx': 80,
  'z_res': 3.9721450552825166,
  'z_raw': 4.24521414157031,
  'z_cm': -0.5420038174461331,
  'score': 5.670230711910641},
 '1': {'peak_idx': 78,
  'z_res': -3.939877753110114,
  'z_raw': 4.151917132317742,
  'z_cm': -0.5420038174461331,
  'score': -2.279110900183017},
 '2': {'peak_idx': 78,
  'z_res': -2.948766826521682,
  'z_raw': -2.7206724989994737,
  'z_cm': -0.5420038174461331,
  'score': -4.037035826121472}}

### 5a) Fix A (simple & correct): use an identity 3D projection

In [10]:
import numpy as np
from ngeodesic.core.funnel_profile import attach_projection_info
from ngeodesic.core.parser import geodesic_parse_with_prior

proj_feat = {
    "mean":   np.zeros(3),     # (3,)
    "pcs":    np.eye(3),       # (3,3)
    "scales": np.ones(3),      # (3,)
    "center": np.array([0.0, 0.0]),  # (2,) for (PC1,PC2)
}
priors = attach_projection_info(priors, proj_feat)

keep_p, order_p = geodesic_parse_with_prior(traces, priors=priors, sigma=9, proto_width=160)
keep_p, order_p
passed = bool(keep_p)
passed

keep_p, order_p

NameError: name 'priors' is not defined

### 5b) (optional): fit a feature-space 3D warp from calibration

In [None]:
import numpy as np
from ngeodesic.core.parser import moving_average  # for a quick _features clone here
from ngeodesic.core.funnel_profile import attach_projection_info, priors_from_profile, fit_radial_profile, analytic_core_template, blend_profiles

def _features(x, w=160):
    pos   = np.maximum(0.0, x)
    ma    = moving_average(pos, k=w)
    j     = int(np.argmax(ma))
    halfw = max(1, w // 2)
    area  = float(pos[max(0, j - halfw): j + halfw + 1].sum())
    meanp = float(pos.mean())
    return np.array([j / max(1, len(x) - 1), area, meanp], dtype=float)  # (3,)

# 1) collect 3D features per channel over calibration traces
F = []  # will be (N,3)
for t in cal_texts:
    Yw = apply_warp_to_text(t)           # your [T,3] PCA channels over time
    Sraw = [moving_average(Yw[:,i], k=9) for i in range(3)]
    for ch in Sraw:
        F.append(_features(ch, w=160))
F = np.asarray(F, float)  # (N,3)

# 2) fit a 3->3 whitened PCA in feature space
mu = F.mean(axis=0)
Fc = F - mu
U, S, Vt = np.linalg.svd(Fc, full_matrices=False)
pcs = Vt[:3, :]                 # (3,3)
Y   = Fc @ pcs.T                # (N,3)
scales = Y.std(axis=0, ddof=1) + 1e-8

proj_feat = {
    "mean":   mu,               # (3,)
    "pcs":    pcs,              # (3,3)
    "scales": scales,           # (3,)
    "center": np.array([0.0, 0.0]),
}
priors = attach_projection_info(priors, proj_feat)

# 3) proceed as before
keep_p, order_p = geodesic_parse_with_prior(traces, priors=priors, sigma=9, proto_width=160)
passed = bool(keep_p)
passed

keep_p, order_p

In [None]:
from ngeodesic.core.parser import geodesic_parse_with_prior  # Stage-11 parser

def ngf_pass(traces, priors, *, sigma=9, proto_width=160, **knobs):
    # knobs: z (abs floor), rel_floor, alpha (prior mix), beta_s (prior weight), q_s (prior sharpness)
    keep, order = geodesic_parse_with_prior(
        traces,
        priors=priors,
        sigma=sigma,
        proto_width=proto_width,
        **knobs,
    )
    return bool(keep), {"keep": keep, "order": order}

passed, info = ngf_pass(traces, priors, z=2.2, rel_floor=0.70, alpha=0.05, beta_s=0.25, q_s=2.0)
print("PASS" if passed else "ABSTAIN", info)

### 6) Another example 

In [11]:
text = "borrow 500 USDC with ETH collateral at 70% ltv"
Yw   = apply_warp_to_text(text)            # [T, 3]
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]       # three channels

T = len(traces[0])  # tokens in your PCA channel
proto_w = max(11, min(int(0.6 * T), 61))  # 60% of T, clamped to [11, 61]
sigma   = max(3, min(int(T / 10), 9))     # ~ T/10, clamped to [3, 9]

keep, order, dbg = geodesic_parse_report_conf(traces, sigma=sigma, proto_width=proto_w)
print(keep, order)
print(dbg["channels"])

['1'] ['1']
{'0': {'peak_idx': 3, 'z_res': -1.820862128793049, 'z_raw': -1.3018366857035226, 'z_cm': -0.2763791283445771, 'score': -2.341596803074458}, '1': {'peak_idx': 5, 'z_res': 2.930356015228629, 'z_raw': 3.3201549913281116, 'z_cm': -0.2763791283445771, 'score': 4.258418011759874}, '2': {'peak_idx': 2, 'z_res': -1.93899111437996, 'z_raw': -1.9461171041916485, 'z_cm': -0.2763791283445771, 'score': -2.7174379560566195}}


In [12]:
from ngeodesic.core.parser import geodesic_parse_report_conf, geodesic_parse_with_prior

def ngf_pass_from_traces(traces, *, priors=None, T=None):
    T = T or len(traces[0])
    proto_w = max(11, min(int(0.6 * T), 61))   # adapt to sequence length
    sigma   = max(3,  min(int(T / 10), 9))

    if priors is not None:
        keep, order = geodesic_parse_with_prior(
            traces, priors=priors, sigma=sigma, proto_width=proto_w,
            # gentle, practical defaults for short prompts:
            z=2.0, rel_floor=0.65, alpha=0.08, beta_s=0.35, q_s=2.0
        )
        return bool(keep), {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w}

    keep, order, dbg = geodesic_parse_report_conf(traces, sigma=sigma, proto_width=proto_w)
    # simple rule: pass if any kept channel has score > 0 (the parser already applies relative gating)
    passed = bool(keep) and any(dbg["channels"][k]["score"] > 0 for k in keep)
    return passed, {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w, "dbg": dbg}

def normalize_protocols(text: str) -> str:
    # tiny lexicon with edit-distance fallback
    vocab = ["uniswap","maker","aave","compound","curve"]
    # fast path
    for w in vocab:
        if w in text.lower():
            return text
    # fallback: nearest vocab by Levenshtein (tiny threshold)
    try:
        import difflib
        toks = text.split()
        fixed = []
        for t in toks:
            cand = difflib.get_close_matches(t.lower(), vocab, n=1, cutoff=0.75)
            fixed.append(cand[0] if cand else t)
        return " ".join(fixed)
    except Exception:
        return text

In [13]:
from ngeodesic.core.parser import geodesic_parse_report_conf, geodesic_parse_with_prior
from ngeodesic.core.matched_filter import half_sine_proto, nxcorr, null_threshold
import numpy as np

def ngf_pass_strict(traces, *, priors=None, T=None, mapper=None, allow=("deposit","swap")):
    T = T or len(traces[0])
    if T < 6:
        return False, {"reason":"too_short", "T": T}

    proto_w = max(11, min(int(0.6 * T), 61))
    sigma   = max(3,  min(int(T / 10), 9))

    def _area_ok(x, j, w=proto_w, min_area=6.0):
        pos = np.maximum(0.0, x); L=max(1,w//2)
        return float(pos[max(0, j-L): j+L+1].sum()) >= min_area

    if priors is not None:
        keep, order = geodesic_parse_with_prior(
            traces, priors=priors, sigma=sigma, proto_width=proto_w,
            z=2.2, rel_floor=0.70, alpha=0.08, beta_s=0.35, q_s=2.0
        )
        passed = bool(keep)
        info   = {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w, "mode":"prior"}
    else:
        keep, order, dbg = geodesic_parse_report_conf(traces, sigma=sigma, proto_width=proto_w)
        passed = False
        if keep:
            # area floor on the best kept channel
            k = keep[0]; ch = int(k)
            peak = dbg["channels"][k]["peak_idx"]
            if _area_ok(traces[ch], peak):
                # absolute CFAR on residual stream (recompute residual view the same way the parser does)
                # quick residual approx: center each channel then subtract mean across channels
                X = np.stack(traces, 1); Xc = X - X.mean(0, keepdims=True)
                resid = Xc[:, ch]
                q = half_sine_proto(width=proto_w)
                c = nxcorr(resid, q, mode="same")
                thr = null_threshold(resid, q, shifts=600, z=2.4, mode="perm")
                passed = float(c.max()) >= float(thr)
        info = {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w, "mode":"report"}

    if mapper is not None and passed:
        lbl = mapper.predict([" "])[0]  # replace with your prompt variable if available
        if lbl not in allow:
            return False, {**info, "reason":"intent_mismatch", "label": lbl}

    return passed, info


In [14]:
text = "borrow 500 USDC with ETH collateral at 70% ltv"
Yw = apply_warp_to_text(text)
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]
passed, info = ngf_pass_from_traces(traces, priors=None)  # or priors=your_funnel_priors
print("PASS" if passed else "ABSTAIN", info)

PASS {'keep': ['1'], 'order': ['1'], 'sigma': 3, 'proto_w': 11, 'dbg': {'channels': {'0': {'peak_idx': 3, 'z_res': -1.820862128793049, 'z_raw': -1.3018366857035226, 'z_cm': -0.2763791283445771, 'score': -2.341596803074458}, '1': {'peak_idx': 5, 'z_res': 2.930356015228629, 'z_raw': 3.3201549913281116, 'z_cm': -0.2763791283445771, 'score': 4.258418011759874}, '2': {'peak_idx': 2, 'z_res': -1.93899111437996, 'z_raw': -1.9461171041916485, 'z_cm': -0.2763791283445771, 'score': -2.7174379560566195}}, 'smax': 4.258418011760874}}


In [15]:
text = "swap 10 ETH to USDC on uniswap"

text = normalize_protocols(text)
Yw = apply_warp_to_text(text)

#Yw = apply_warp_to_text(text)
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]
passed, info = ngf_pass_strict(traces, priors=None)  # or priors=your_funnel_priors
print("PASS" if passed else "ABSTAIN", info)

ABSTAIN {'keep': ['1'], 'order': ['1'], 'sigma': 3, 'proto_w': 11, 'mode': 'report'}


In [16]:
text = "thats a wrap"
Yw = apply_warp_to_text(text)
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]
passed, info = ngf_pass_strict(traces, priors=None)  # or priors=your_funnel_priors
print("PASS" if passed else "ABSTAIN", info)

ABSTAIN {'keep': ['0', '1'], 'order': ['1', '0'], 'sigma': 3, 'proto_w': 11, 'mode': 'report'}


In [17]:
text = "borrow 500 USDC with ETH collateral at 70% ltv"
Yw   = apply_warp_to_text(text)            # [T, 3]
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]       # three channels
priors = attach_projection_info(priors, proj_feat)
keep, order = geodesic_parse_with_prior(
    traces, priors,
    sigma=9, proto_width=160,
    alpha=0.05, beta_s=0.25, q_s=2,
    tau_rel=0.60, tau_abs_q=0.93, null_K=40, seed=42
)
keep, order

NameError: name 'priors' is not defined

In [18]:
text = "borrow 500 USDC with ETH collateral at 70% ltv"
Yw   = apply_warp_to_text(text)            # [T, 3]
traces = [Yw[:,0], Yw[:,1], Yw[:,2]]       # three channels

T = len(traces[0])  # tokens in your PCA channel
proto_w = max(11, min(int(0.6 * T), 61))  # 60% of T, clamped to [11, 61]
sigma   = max(3, min(int(T / 10), 9))     # ~ T/10, clamped to [3, 9]

keep, order, dbg = geodesic_parse_report_conf(traces, sigma=sigma, proto_width=proto_w)
print(keep, order)
print(dbg["channels"])

['1'] ['1']
{'0': {'peak_idx': 3, 'z_res': -1.820862128793049, 'z_raw': -1.3018366857035226, 'z_cm': -0.2763791283445771, 'score': -2.341596803074458}, '1': {'peak_idx': 5, 'z_res': 2.930356015228629, 'z_raw': 3.3201549913281116, 'z_cm': -0.2763791283445771, 'score': 4.258418011759874}, '2': {'peak_idx': 2, 'z_res': -1.93899111437996, 'z_raw': -1.9461171041916485, 'z_cm': -0.2763791283445771, 'score': -2.7174379560566195}}


### WDD Doctrine

In [19]:
# === Action-specific WDD: deposit uses earlier layer & its own warp/priors ===
import os, re, difflib, numpy as np, torch, joblib
from transformers import AutoTokenizer, AutoModel

# ---------------- base encoders ----------------
BASE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

tok = AutoTokenizer.from_pretrained(BASE_MODEL)
mdl = AutoModel.from_pretrained(BASE_MODEL, output_hidden_states=True).eval()

def get_hidden_states(text: str, layer_offset: int) -> np.ndarray:
    with torch.no_grad():
        out = mdl(**tok(text, return_tensors="pt"))
    hs = out.hidden_states
    k  = max(-(len(hs)-1), min(layer_offset, -1))
    return hs[k].squeeze(0).float().cpu().numpy()  # [T,H]

# ---------------- PCA warp H->3 ----------------
def fit_token_warp(hiddens, d=3, whiten=True):
    X = np.vstack(hiddens); mu = X.mean(0); Xc = X - mu
    U,S,Vt = np.linalg.svd(Xc, full_matrices=False)
    pcs = Vt[:d,:]; Y = Xc @ pcs.T
    scales = Y.std(0, ddof=1) + 1e-8 if whiten else np.ones(d)
    return {"mean": mu, "pcs": pcs, "scales": scales}

def apply_token_warp(Z, warp):
    Y = (Z - warp["mean"]) @ warp["pcs"].T
    return Y / (warp["scales"] + 1e-8)

def traces_from_text(warp, text, layer_offset):
    Z = get_hidden_states(text, layer_offset=layer_offset)
    Yw = apply_token_warp(Z, warp)   # [T,3]
    return [Yw[:,0], Yw[:,1], Yw[:,2]]

# ---------------- NGF bits ----------------
from ngeodesic.core.parser import moving_average, geodesic_parse_with_prior
from ngeodesic.core.matched_filter import half_sine_proto, nxcorr
from ngeodesic.core.funnel_profile import (
    fit_radial_profile, analytic_core_template, blend_profiles,
    priors_from_profile, attach_projection_info
)

def normalize_protocols(text: str) -> str:
    vocab = ["uniswap","maker","aave","compound","curve","balancer"]
    toks = text.split()
    fixed = []
    for t in toks:
        cand = difflib.get_close_matches(t.lower(), vocab, n=1, cutoff=0.75)
        fixed.append(cand[0] if cand else t)
    return " ".join(fixed)

def infer_action(text: str) -> str:
    t=text.lower()
    if re.search(r"\b(supply|deposit)\b", t): return "deposit"
    if re.search(r"\b(swap|exchange|trade)\b", t): return "swap"
    return "unknown"

def adaptive_windows_short(T: int):
    # slightly wider for tiny T
    proto_w = max(13, min(int(0.7*T), 61))
    sigma   = max(4,  min(int(T/8),  9))
    return sigma, proto_w

def parser_features(x, w):
    pos=np.maximum(0.0,x); ma=moving_average(pos, k=w); j=int(np.argmax(ma))
    halfw=max(1,w//2); area=float(pos[max(0,j-halfw):j+halfw+1].sum()); meanp=float(pos.mean())
    return np.array([j/max(1,len(x)-1), area, meanp], float)

def mf_peak(x, proto_w):
    q = half_sine_proto(width=proto_w)
    c = nxcorr(x, q, mode="same")
    return float(np.maximum(0.0, c).max())

def build_priors_feature_MFpeak(warp, texts, layer_offset, proto_w=160):
    F, Zs = [], []
    for t in texts:
        tr = traces_from_text(warp, t, layer_offset=layer_offset)
        S  = [moving_average(ch, k=min(9, max(3, len(ch)//6))) for ch in tr]
        for ch in S:
            F.append(parser_features(ch, w=proto_w))
            Zs.append(mf_peak(ch, proto_w))
    F=np.asarray(F); Zs=np.asarray(Zs)
    center = np.median(F[:,:2], axis=0)
    R = np.linalg.norm(F[:,:2] - center[None,:], axis=1)
    r_grid, z_data = fit_radial_profile(R, Zs, n_r=220, fit_quantile=0.65)
    z_core  = analytic_core_template(r_grid, k=0.18, p=1.7, r0_frac=0.14)
    z_blend = blend_profiles(z_data, z_core, blend_core=0.25)
    pri     = priors_from_profile(r_grid, z_blend)
    proj    = {"mean": np.zeros(3), "pcs": np.eye(3), "scales": np.ones(3), "center": center.astype(float)}
    return attach_projection_info(pri, proj)

def wdd_prior_pass(traces, priors, *, z=1.8, rel_floor=0.58, alpha=0.14, beta_s=0.50, q_s=2.0):
    T = len(traces[0])
    if T < 6: return False, {"reason":"too_short","T":T}
    sigma, proto_w = adaptive_windows_short(T)
    keep, order = geodesic_parse_with_prior(
        traces, priors=priors, sigma=sigma, proto_width=proto_w,
        z=z, rel_floor=rel_floor, alpha=alpha, beta_s=beta_s, q_s=q_s
    )
    return bool(keep), {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w, "mode":"prior"}

# ---------------- Build per-action warps & priors ----------------
os.makedirs(".artifacts", exist_ok=True)

# SWAP: keep your current setup at layer -4
SWAP_LAYER  = -4
SWAP_WARP   = ".artifacts/wdd_warp_swap_L-4.joblib"
SWAP_PRIORS = ".artifacts/wdd_priors_swap_L-4.joblib"

# DEPOSIT: build at earlier layer -6 for stronger token dynamics on short prompts
DEP_LAYER  = -6
DEP_WARP   = ".artifacts/wdd_warp_deposit_L-6.joblib"
DEP_PRIORS = ".artifacts/wdd_priors_deposit_L-6.joblib"

# Calibration sets (expand as needed)
deposit_cal = [
    "supply 7.0245 SOL to maker",
    "deposit 3 WBTC into vault",
    "supply 150 USDC to aave",
    "deposit 2 ETH to compound",
    "supply 0.5 WETH to maker",
    "deposit 200 DAI into vault",
    "supply 10 SOL to aave",
    "deposit 25 USDC to maker",
    "supply 3 ETH to maker",
]
swap_cal = [
    "swap 10 ETH to USDC on uniswap",
    "swap 2000 USDC to ETH on uniswap",
    "swap 1 WBTC for WETH on curve",
    "swap 50 SOL to USDC on uniswap",
    "swap 0.75 ETH to DAI on balancer",
    "swap 250 DAI to USDC on uniswap",
]

# Build/load warps
if os.path.exists(SWAP_WARP):
    warp_swap = joblib.load(SWAP_WARP)
else:
    Hs = [get_hidden_states(t, layer_offset=SWAP_LAYER) for t in swap_cal]
    warp_swap = fit_token_warp(Hs, d=3, whiten=True); joblib.dump(warp_swap, SWAP_WARP)

if os.path.exists(DEP_WARP):
    warp_dep = joblib.load(DEP_WARP)
else:
    Hd = [get_hidden_states(t, layer_offset=DEP_LAYER) for t in deposit_cal]
    warp_dep = fit_token_warp(Hd, d=3, whiten=True); joblib.dump(warp_dep, DEP_WARP)

# Build priors with MF-peak targets
priors_swap = build_priors_feature_MFpeak(warp_swap, swap_cal,    layer_offset=SWAP_LAYER, proto_w=160)
priors_dep  = build_priors_feature_MFpeak(warp_dep,  deposit_cal, layer_offset=DEP_LAYER,  proto_w=160)
joblib.dump(priors_swap, SWAP_PRIORS); joblib.dump(priors_dep, DEP_PRIORS)

# ---------------- Test set ----------------
tests = [
    "supply 7.0245 SOL to maker",
    "swap 10 ETH to USDC on uniswap",
    "swap 10 ETH to USDC on uniswa",          # typo
    "attempt a borrow with low health factor",
    "that's a wrap",
]

print(f"{'prompt'.ljust(40)} | prior  | keep | sigma | proto_w | which_prior")
print("-"*100)
for raw in tests:
    text = normalize_protocols(raw)
    act  = infer_action(text)
    if act == "swap":
        traces = traces_from_text(warp_swap, text, layer_offset=SWAP_LAYER)
        ok, info = wdd_prior_pass(traces, priors_swap, z=1.8, rel_floor=0.58, alpha=0.12, beta_s=0.48)
        which = "swap(L-4)"
    elif act == "deposit":
        traces = traces_from_text(warp_dep, text, layer_offset=DEP_LAYER)
        ok, info = wdd_prior_pass(traces, priors_dep,  z=1.8, rel_floor=0.58, alpha=0.16, beta_s=0.52)
        which = "deposit(L-6)"
    else:
        ok, info, which = False, {"keep":[],"sigma":None,"proto_w":None}, "unknown"

    print(f"{raw.ljust(40)} | {('PASS' if ok else 'ABSTAIN'):>6} | {','.join(info.get('keep',[])) or '-':^4} |"
          f" {info.get('sigma') if info.get('sigma') is not None else '-':^5} |"
          f" {info.get('proto_w') if info.get('proto_w') is not None else '-':^7} | {which}")


prompt                                   | prior  | keep | sigma | proto_w | which_prior
----------------------------------------------------------------------------------------------------
supply 7.0245 SOL to maker               | ABSTAIN |  -   |   4   |   13    | deposit(L-6)
swap 10 ETH to USDC on uniswap           |   PASS |  2   |   4   |   13    | swap(L-4)
swap 10 ETH to USDC on uniswa            |   PASS |  2   |   4   |   13    | swap(L-4)
attempt a borrow with low health factor  | ABSTAIN |  -   |   -   |    -    | unknown
that's a wrap                            | ABSTAIN |  -   |   -   |    -    | unknown


In [24]:
# === Make deposits PASS: auto-layer search (-5,-7), rebuild warp+prior, gentle gates, MF fallback ===
import os, re, difflib, numpy as np, torch, joblib
from transformers import AutoTokenizer, AutoModel

# ---------------- base encoder ----------------
BASE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
tok = AutoTokenizer.from_pretrained(BASE_MODEL)
mdl = AutoModel.from_pretrained(BASE_MODEL, output_hidden_states=True).eval()

def get_hidden_states(text: str, layer_offset: int) -> np.ndarray:
    with torch.no_grad():
        out = mdl(**tok(text, return_tensors="pt"))
    hs = out.hidden_states
    k  = max(-(len(hs)-1), min(layer_offset, -1))
    return hs[k].squeeze(0).float().cpu().numpy()  # [T,H]

# ---------------- PCA warp H->3 ----------------
def fit_token_warp(hiddens, d=3, whiten=True):
    X = np.vstack(hiddens); mu = X.mean(0); Xc = X - mu
    U,S,Vt = np.linalg.svd(Xc, full_matrices=False)
    pcs = Vt[:d,:]; Y = Xc @ pcs.T
    scales = Y.std(0, ddof=1) + 1e-8 if whiten else np.ones(d)
    return {"mean": mu, "pcs": pcs, "scales": scales}

def apply_token_warp(Z, warp):
    Y = (Z - warp["mean"]) @ warp["pcs"].T
    return Y / (warp["scales"] + 1e-8)

def traces_from_text(warp, text, layer_offset):
    Z = get_hidden_states(text, layer_offset=layer_offset)
    Yw = apply_token_warp(Z, warp)   # [T,3]
    return [Yw[:,0], Yw[:,1], Yw[:,2]]

# ---------------- NGF bits ----------------
from ngeodesic.core.parser import moving_average, geodesic_parse_with_prior
from ngeodesic.core.matched_filter import half_sine_proto, nxcorr
from ngeodesic.core.funnel_profile import (
    fit_radial_profile, analytic_core_template, blend_profiles,
    priors_from_profile, attach_projection_info
)

def normalize_protocols(text: str) -> str:
    vocab = ["uniswap","maker","makerdao","aave","compound","curve","balancer"]
    toks = text.split()
    fixed = []
    for t in toks:
        cand = difflib.get_close_matches(t.lower(), vocab, n=1, cutoff=0.75)
        fixed.append(cand[0] if cand else t)
    # unify "maker" → "makerdao" for consistency
    txt = " ".join(fixed)
    txt = re.sub(r"\bmaker\b", "makerdao", txt, flags=re.I)
    return txt

def infer_action(text: str) -> str:
    t=text.lower()
    if re.search(r"\b(supply|deposit)\b", t): return "deposit"
    if re.search(r"\b(swap|exchange|trade)\b", t): return "swap"
    return "unknown"

def adaptive_windows_short(T: int):
    proto_w = max(13, min(int(0.7*T), 61))
    sigma   = max(4,  min(int(T/8),  9))
    return sigma, proto_w

def parser_features(x, w):
    pos=np.maximum(0.0,x); ma=moving_average(pos, k=w); j=int(np.argmax(ma))
    halfw=max(1,w//2); area=float(pos[max(0,j-halfw):j+halfw+1].sum()); meanp=float(pos.mean())
    return np.array([j/max(1,len(x)-1), area, meanp], float)

def mf_peak(x, proto_w):
    q = half_sine_proto(width=proto_w)
    c = nxcorr(x, q, mode="same")
    return float(np.maximum(0.0, c).max())

def build_priors_feature_MFpeak(warp, texts, layer_offset, proto_w=160):
    F, Zs = [], []
    for t in texts:
        tr = traces_from_text(warp, t, layer_offset=layer_offset)
        S  = [moving_average(ch, k=min(9, max(3, len(ch)//6))) for ch in tr]
        for ch in S:
            F.append(parser_features(ch, w=proto_w))
            Zs.append(mf_peak(ch, proto_w))
    F=np.asarray(F); Zs=np.asarray(Zs)
    center = np.median(F[:,:2], axis=0)
    R = np.linalg.norm(F[:,:2] - center[None,:], axis=1)
    r_grid, z_data = fit_radial_profile(R, Zs, n_r=220, fit_quantile=0.65)
    z_core  = analytic_core_template(r_grid, k=0.18, p=1.7, r0_frac=0.14)
    z_blend = blend_profiles(z_data, z_core, blend_core=0.25)
    pri     = priors_from_profile(r_grid, z_blend)
    proj    = {"mean": np.zeros(3), "pcs": np.eye(3), "scales": np.ones(3), "center": center.astype(float)}
    return attach_projection_info(pri, proj)

def wdd_prior_pass(traces, priors, *, z=1.7, rel_floor=0.55, alpha=0.16, beta_s=0.54, q_s=2.0):
    T = len(traces[0])
    if T < 6: return False, {"reason":"too_short","T":T}
    sigma, proto_w = adaptive_windows_short(T)
    keep, order = geodesic_parse_with_prior(
        traces, priors=priors, sigma=sigma, proto_width=proto_w,
        z=z, rel_floor=rel_floor, alpha=alpha, beta_s=beta_s, q_s=q_s
    )
    return bool(keep), {"keep": keep, "order": order, "sigma": sigma, "proto_w": proto_w, "mode":"prior"}

# ---------------- Existing swap path (kept at L=-4) ----------------
SWAP_LAYER = -4
swap_cal = [
    "swap 10 ETH to USDC on uniswap",
    "swap 2000 USDC to ETH on uniswap",
    "swap 1 WBTC for WETH on curve",
    "swap 50 SOL to USDC on uniswap",
    "swap 0.75 ETH to DAI on balancer",
    "swap 250 DAI to USDC on uniswap",
]
SWAP_WARP   = ".artifacts/wdd_warp_swap_L-4.joblib"
SWAP_PRIORS = ".artifacts/wdd_priors_swap_L-4.joblib"

if os.path.exists(SWAP_WARP):
    warp_swap = joblib.load(SWAP_WARP)
else:
    Hs = [get_hidden_states(t, layer_offset=SWAP_LAYER) for t in swap_cal]
    warp_swap = fit_token_warp(Hs, d=3, whiten=True); joblib.dump(warp_swap, SWAP_WARP)

if os.path.exists(SWAP_PRIORS):
    priors_swap = joblib.load(SWAP_PRIORS)
else:
    priors_swap = build_priors_feature_MFpeak(warp_swap, swap_cal, SWAP_LAYER, proto_w=160)
    joblib.dump(priors_swap, SWAP_PRIORS)

# ---------------- Deposit path: auto-pick best layer ----------------
deposit_cal = [
    "supply 7.0245 SOL to makerdao",
    "deposit 3 WBTC into vault",
    "supply 150 USDC to aave",
    "deposit 2 ETH to compound",
    "supply 0.5 WETH to makerdao",
    "deposit 200 DAI into vault",
    "supply 10 SOL to aave",
    "deposit 25 USDC to makerdao",
    "supply 3 ETH to makerdao",
]
DEP_CAND_LAYERS = [-5, -6, -7]

def avg_mf_on_cal(layer):
    # build a quick warp per layer, measure mean MF peak across deposit cal
    Hd = [get_hidden_states(t, layer_offset=layer) for t in deposit_cal]
    w  = fit_token_warp(Hd, d=3, whiten=True)
    peaks = []
    for t in deposit_cal:
        tr = traces_from_text(w, t, layer_offset=layer)
        # mild smoothing on each channel, take max across channels
        pks = []
        for ch in tr:
            T = len(ch); proto_w = max(13, min(int(0.7*T), 61))
            pks.append(mf_peak(moving_average(ch, k=min(9, max(3, T//6))), proto_w))
        peaks.append(max(pks))
    return np.mean(peaks), w

best_layer, best_warp, best_score = None, None, -1
for L in DEP_CAND_LAYERS:
    score, w = avg_mf_on_cal(L)
    if score > best_score:
        best_score, best_layer, best_warp = score, L, w

# persist the best deposit warp + priors
DEP_LAYER  = best_layer
DEP_WARP   = f".artifacts/wdd_warp_deposit_L{DEP_LAYER}.joblib"
DEP_PRIORS = f".artifacts/wdd_priors_deposit_L{DEP_LAYER}.joblib"
joblib.dump(best_warp, DEP_WARP)
priors_dep = build_priors_feature_MFpeak(best_warp, deposit_cal, DEP_LAYER, proto_w=160)
joblib.dump(priors_dep, DEP_PRIORS)

# ---------------- Test set (+ deposit fallback) ----------------
def deposit_fallback_pass(traces, floor=0.18):
    # if prior abstains but MF peak (any channel) is decent, accept
    T = len(traces[0]); proto_w = max(13, min(int(0.7*T), 61))
    mx = 0.0
    for ch in traces:
        mx = max(mx, mf_peak(moving_average(ch, k=min(9, max(3, len(ch)//6))), proto_w))
    return mx >= floor, mx

tests = [
    "supply 7.0245 SOL to maker",        # normalizes to makerdao
    "swap 10 ETH to USDC on uniswap",
    "swap 10 ETH to USDC on uniswa",
    "attempt a borrow with low health factor",
    "that's a wrap",
    "sing a swap",
    "trade a pop",
    "trade 5.6456 WETH for AAVE on sushiswap (optimism)",
    "trade 5.9195 ETH for ARB on sushiswap (arbitrum)",
    "market swap 4709.1849 ARB->WBTC using uniswap on ethereum",
]

print(f"{'prompt'.ljust(42)} | prior  | keep | sigma | proto_w | which_prior      | note")
print("-"*120)
for raw in tests:
    text = normalize_protocols(raw)
    act  = infer_action(text)
    note = ""  # <-- ensure defined for all branches

    if act == "swap":
        traces = traces_from_text(warp_swap, text, layer_offset=SWAP_LAYER)
        ok, info = wdd_prior_pass(traces, priors_swap, z=1.7, rel_floor=0.55, alpha=0.14, beta_s=0.50)
        which = "swap(L-4)"

    elif act == "deposit":
        traces = traces_from_text(best_warp, text, layer_offset=DEP_LAYER)
        ok, info = wdd_prior_pass(traces, priors_dep,  z=1.7, rel_floor=0.55, alpha=0.16, beta_s=0.54)
        which = f"deposit(L{DEP_LAYER})"
        if not ok:
            # fallback: MF floor
            ok_fallback, mx = deposit_fallback_pass(traces, floor=0.18)
            if ok_fallback:
                ok = True
                info.setdefault("keep", [])
                note = f"fallback: MF_peak={mx:.2f}"
            else:
                note = f"mf={mx:.2f}"
    else:
        ok, info, which = False, {"keep":[], "sigma":None, "proto_w":None}, "unknown"

    print(f"{raw.ljust(42)} | {('PASS' if ok else 'ABSTAIN'):>6} | {','.join(info.get('keep',[])) or '-':^4} | "
          f"{info.get('sigma') if info.get('sigma') is not None else '-':^5} | "
          f"{info.get('proto_w') if info.get('proto_w') is not None else '-':^7} | "
          f"{which:<16} | {note}")

prompt                                     | prior  | keep | sigma | proto_w | which_prior      | note
------------------------------------------------------------------------------------------------------------------------
supply 7.0245 SOL to maker                 |   PASS |  0   |   4   |   13    | deposit(L-6)     | 
swap 10 ETH to USDC on uniswap             |   PASS |  2   |   4   |   13    | swap(L-4)        | 
swap 10 ETH to USDC on uniswa              |   PASS |  2   |   4   |   13    | swap(L-4)        | 
attempt a borrow with low health factor    | ABSTAIN |  -   |   -   |    -    | unknown          | 
that's a wrap                              | ABSTAIN |  -   |   -   |    -    | unknown          | 
sing a swap                                | ABSTAIN |  -   |   -   |    -    | swap(L-4)        | 
trade a pop                                | ABSTAIN |  -   |   -   |    -    | swap(L-4)        | 
trade 5.6456 WETH for AAVE on sushiswap (optimism) |   PASS |  2   |   4   |   

In [None]:
from micro_lm.verify.wdd_helpers import make_trace_encoder, build_and_save_warp, build_priors_from_texts
import joblib

enc = make_trace_encoder()
cal_texts = [
    # deposit
    "supply 7.0245 SOL to maker",
    "deposit 16.104 WETH into stargate on base",
    "supply 34.3849 AVAX to lido",
    "fund compound with 3877.0728 ARB (optimism) — this minute",
    "add liquidity: deposit 21761 USDC to sushiswap (arbitrum)",
    "deposit 19057 USDT into pendle on polygon",
    "add liquidity: deposit 504.9005 ARB to yearn (polygon)",
    "supply 1394.564 ARB to yearn — today",
    "supply 1777.8921 ARB to curve — ok with higher gas",
    "fund uniswap with 33.9314 SOL (optimism)",
    # swap
    "swap 10 ETH to USDC on uniswap",
    "trade 38.4142 WETH for AAVE on balancer (polygon) with slippage 1%",
    "market swap 15289 USDT->OP using balancer on avalanche",
    "trade 4047.6346 LINK for ETH on sushiswap (solana) with slippage 0.5% — asap",
    "trade 5.6456 WETH for AAVE on sushiswap (optimism)",
    "trade 5.9195 ETH for ARB on sushiswap (arbitrum)",
    "market swap 4709.1849 ARB->WBTC using uniswap on ethereum",
    "market swap 27.444 SOL->AAVE using uniswap on ethereum — asap",
    "convert 751.3489 AAVE into ARB via curve on solana with slippage 0.2%",
    "trade 14.6456 WETH for OP on sushiswap (arbitrum) — asap, high yield mode"
]

warp = build_and_save_warp(enc, cal_texts, path=".artifacts/wdd_token_warp.joblib")
priors = build_priors_from_texts(enc, warp, cal_texts)
joblib.dump(priors, ".artifacts/wdd_priors.joblib")

# priors = build_priors_from_texts(enc, warp, cal_texts, proto_w=160)  # cal_texts include swaps + deposits
# joblib.dump(priors, ".artifacts/wdd_priors.joblib")
