In [6]:
import numpy as np
import csv
from math import sqrt
from scipy.special import erfc

In [7]:
# ------------------------
# Utilities
# ------------------------

VISIBLE_PDGS = np.array([
    211, -211,    # pi±
    111,          # pi0
    321, -321,    # K±
    2212, -2212,  # p, p̅
    2112, -2112,  # n, n̅
    130,          # K_L0
    22,           # γ
    11, -11,      # e±
    13, -13       # μ±
], dtype=int)

CHARGED_PDGS = np.array([
    211, -211,
    321, -321,
    2212, -2212,
    11, -11,
    13, -13
], dtype=int)

LEPTON_PDGS = {11, -11, 13, -13}

def compute_thrust_event_aleph_like(px, py, pz, n_iter=10, n_random=5):
    """
    ALEPH-like thrust finder:
    - Uses |p|, |pz|, and a few random seeds
    - Less 'perfect' than all-seeds, closer to ALEPH reconstruction
    """
    p = np.column_stack((px, py, pz))
    p_mag = np.linalg.norm(p, axis=1)
    total_mag = np.sum(p_mag)

    if total_mag == 0:
        return 0.0, 1.0, np.array([0., 0., 1.])

    seeds = []

    # 1. Seed from largest |p|
    idx_maxp = np.argmax(p_mag)
    seeds.append(p[idx_maxp] / p_mag[idx_maxp])

    # 2. Seed from largest |pz|
    idx_maxpz = np.argmax(np.abs(p[:, 2]))
    if p_mag[idx_maxpz] > 0:
        seeds.append(p[idx_maxpz] / p_mag[idx_maxpz])

    # 3. Random seeds from visible particles
    n_available = len(p)
    if n_available > 0:
        n_rand = min(n_random, n_available)
        rand_idx = np.random.choice(n_available, n_rand, replace=False)
        for i in rand_idx:
            if p_mag[i] > 0:
                seeds.append(p[i] / p_mag[i])

    best_thrust = -1.0
    best_axis = np.array([0., 0., 1.])

    # Iterate thrust maximization
    for axis in seeds:
        prev_thrust = -1.0
        for _ in range(n_iter):
            dots = p @ axis
            signs = np.sign(dots)
            new_axis = np.sum(signs[:, None] * p, axis=0)
            norm = np.linalg.norm(new_axis)
            if norm == 0:
                break
            axis = new_axis / norm
            thrust_val = np.sum(np.abs(p @ axis)) / total_mag
            if abs(thrust_val - prev_thrust) < 1e-6:
                break
            prev_thrust = thrust_val

        # Final thrust
        thrust_val = np.sum(np.abs(p @ axis)) / total_mag
        if thrust_val > best_thrust:
            best_thrust = thrust_val
            best_axis = axis

    cos_theta_thrust = abs(best_axis[2])
    return best_thrust, cos_theta_thrust, best_axis

def compute_thrust_with_angle2(px, py, pz, n_iter=20, tol=1e-8):
    """
    Iterative thrust with |p|-seed, early convergence, and sign-robust stopping.

    Returns
    -------
    thrust_val : float
    cos_theta_thrust : float
    axis : np.ndarray shape (3,)
    """
    p = np.column_stack((px, py, pz)).astype(float)
    p_mag = np.linalg.norm(p, axis=1)
    denom = p_mag.sum()
    if denom == 0.0:
        return 0.0, 1.0, np.array([0.0, 0.0, 1.0])

    # Seed with direction of largest |p|
    lead = np.argmax(p_mag)
    axis = p[lead]
    norm = np.linalg.norm(axis)
    axis = axis / norm if norm > 0.0 else np.array([0.0, 0.0, 1.0])

    for _ in range(n_iter):
        # s_i = sign(p_i · n); resolve exact zeros deterministically as +1
        proj = p @ axis
        signs = np.sign(proj)
        signs[signs == 0.0] = 1.0

        new_axis = (signs[:, None] * p).sum(axis=0)
        new_norm = np.linalg.norm(new_axis)
        if new_norm == 0.0:
            break
        new_axis /= new_norm

        # Convergence up to a global sign: stop when n and new_n are aligned
        if 1.0 - abs(np.dot(axis, new_axis)) < tol:
            axis = new_axis
            break

        axis = new_axis

    thrust_val = np.sum(np.abs(p @ axis)) / denom
    cos_theta_thrust = abs(axis[2])
    return thrust_val, cos_theta_thrust, axis


def has_identified_lepton(pdg_h, E_h, p_h, p_min=2.0):
    for pdg, E, p in zip(pdg_h, E_h, p_h):
        if abs(pdg) in LEPTON_PDGS and p > p_min:
            return True
    return False

In [8]:
def _vt_unit(px, py, eps=1e-12):
    pT = np.hypot(px, py)
    vx = np.divide(px, pT, out=np.zeros_like(px, dtype=float), where=pT>eps)
    vy = np.divide(py, pT, out=np.zeros_like(py, dtype=float), where=pT>eps)
    return vx, vy, pT

def _sigma_d0_um(px, py, pz, a_um=25.0, b_um=95.0):
    """ALEPH-like σ(d0): sqrt(25^2 + (95/p)^2) in microns."""
    p = np.sqrt(px**2 + py**2 + pz**2) + 1e-12
    return np.sqrt(a_um**2 + (b_um/p)**2)

def _erfc_half_from_absS(S_abs):
    """
    Return 0.5*erfc(S/sqrt(2)) for array-like S_abs using
    Abramowitz–Stegun 7.1.26 (no SciPy/np.erfc needed).
    """
    S_abs = np.asarray(S_abs, float)
    y = S_abs / np.sqrt(2.0)
    # A&S constants
    p  = 0.3275911
    a1 = 0.254829592; a2 = -0.284496736; a3 = 1.421413741
    a4 = -1.453152027; a5 =  1.061405429
    t = 1.0/(1.0 + p*y)
    poly = (((((a5*t + a4)*t + a3)*t + a2)*t + a1)*t)
    erfc_y = poly * np.exp(-y*y)
    return 0.5 * erfc_y

def track_pv_probability_simple(x_mm, y_mm, px, py, pz,
                                a_um=25.0, b_um=95.0,
                                sigma_scale=1.3977865, S_cap=5.0):
    """
    PV probability using transverse d0 only.
    x,y in mm; p in GeV; σ(d0) in μm. No SciPy needed.
    """
    p = sqrt(px*px + py*py + pz*pz)
    if p <= 0.0:
        return 1.0
    pT = sqrt(px*px + py*py)
    if pT <= 0.0:
        return 1.0

    vx, vy = px/pT, py/pT
    d0_mm = abs(x_mm*vy - y_mm*vx)

    # σ(d0) in μm, with your global scale
    sig = sqrt((a_um*sigma_scale)**2 + (b_um*sigma_scale/p)**2)
    S   = (1e3 * d0_mm) / max(sig, 1e-3)   # mm → μm
    S   = min(abs(S), S_cap)               # cap |S| to avoid one crazy track dominating

    # one-sided PV prob: 0.5*erfc(S/√2) via Abramowitz–Stegun 7.1.26
    y = S / sqrt(2.0)
    pc, a1,a2,a3,a4,a5 = 0.3275911, 0.254829592, -0.284496736, 1.421413741, -1.453152027, 1.061405429
    t = 1.0/(1.0 + pc*y)
    erfc_y = (((((a5*t + a4)*t + a3)*t + a2)*t + a1)*t) * np.exp(-y*y)
    return float(0.5 * erfc_y)

def hemisphere_btag_aleph_simple(
    x_mm, y_mm, px, py, pz, pdg,
    alpha_hemi_cut=0.001, min_tracks=7, pmin=0.5,
    a_um=25.0, b_um=95.0, ip_cap_mm=4.0,
    sigma_scale=1.3977865, S_cap=5.0, use_topk=4
):
    """
    Tag a PRESELECTED hemisphere (you already masked it).
    Good tracks: |PDG| in CHARGED_PDGS and p > pmin.
    Product of the K most displaced tracks (smallest PV probs).
    """
    x = np.asarray(x_mm, float); y = np.asarray(y_mm, float)
    px = np.asarray(px, float);  py = np.asarray(py, float);  pz = np.asarray(pz, float)
    pdg = np.asarray(pdg)

    p = np.sqrt(px**2 + py**2 + pz**2)
    good = (np.isin(np.abs(pdg), CHARGED_PDGS) & (p > pmin))

    # IP sanity: drop huge-d0 tracks
    vx, vy, _ = _vt_unit(px, py)
    d0_mm = np.abs(x*vy - y*vx)
    good &= (d0_mm < ip_cap_mm)

    idx = np.flatnonzero(good)
    n_good = idx.size
    if n_good < min_tracks:
        return False, 1.0, int(n_good), []

    # per-track PV probs
    probs = np.array([
        track_pv_probability_simple(x[i], y[i], px[i], py[i], pz[i],
                                    a_um=a_um, b_um=b_um,
                                    sigma_scale=sigma_scale, S_cap=S_cap)
        for i in idx
    ], float)
    probs = np.clip(probs, 1e-300, 1.0)

    # product of K most displaced (smallest probs)
    if use_topk and probs.size > use_topk:
        kth = np.partition(probs, use_topk-1)[:use_topk]
        alpha_hemi = float(np.exp(np.sum(np.log(kth))))
    else:
        alpha_hemi = float(np.exp(np.sum(np.log(probs))))

    return (alpha_hemi < alpha_hemi_cut), alpha_hemi, int(n_good), probs.tolist()

In [9]:
def compute_missing_energy(px, py, pz, E, pdg, mask1, mask2,
                           sqrt_s=91.2, method="simple", tol=1e-6):
    """
    ALEPH-style missing energy:
    - Compute visible hemisphere energies excluding neutrinos
    - Compute missing energy as E_true - E_vis.
    - Returns also the hemisphere invariant masses (m1^2, m2^2).
    
    Parameters
    ----------
    px,py,pz,E : arrays
        Particle kinematics
    pdg : array
        PDG codes
    mask1,mask2 : bool arrays
        Hemisphere assignments
    sqrt_s : float
        CM energy [GeV]
    method : {"simple","aleph"}
        Method for computing E_true
    tol : float
        Numerical tolerance for negative m^2
    """

    s = sqrt_s**2
    E_beam = sqrt_s / 2.0

    # --- Visible hemisphere energies (exclude neutrinos)
    vis_mask1 = mask1 & np.isin(pdg, list(VISIBLE_PDGS), assume_unique=True)
    vis_mask2 = mask2 & np.isin(pdg, list(VISIBLE_PDGS), assume_unique=True)

    E_vis1 = np.sum(E[vis_mask1])
    E_vis2 = np.sum(E[vis_mask2])

    # --- Invariant masses
    def m2(px, py, pz, E, mask):
        E_h  = E[mask].sum()
        px_h = px[mask].sum()
        py_h = py[mask].sum()
        pz_h = pz[mask].sum()
        val = E_h**2 - (px_h**2 + py_h**2 + pz_h**2)
        # safeguard: clip only tiny negatives
        if val < -tol:
            return val  # keep genuine negatives
        return max(val, 0.0)

    m1_sq = m2(px, py, pz, E, vis_mask1)
    m2_sq = m2(px, py, pz, E, vis_mask2)

    # --- E_true depending on method
    if method == "simple":
        E_true1 = E_beam
        E_true2 = E_beam

    elif method == "aleph":
        E_true1 = (s + m1_sq - m2_sq) / (2 * sqrt_s)
        E_true2 = (s + m2_sq - m1_sq) / (2 * sqrt_s)

    else:
        raise ValueError("method must be 'simple' or 'aleph'")

    # --- Missing energies
    E_miss1 = E_true1 - E_vis1
    E_miss2 = E_true2 - E_vis2

    return (E_vis1, E_miss1, m1_sq), (E_vis2, E_miss2, m2_sq)

In [10]:
# ------------------------
# Main function
# ------------------------

def select_missing_energy_events_fast_with_btag(
    input_path,
    sqrt_s=91.2,
    min_tracks=7,
    progress_step=100_000,
    energy_method="aleph",
    btag_mode="aleph"
):
    scanned = 0
    kept = 0
    NMAX = 500_000

    cut_counts = {
        "total": 0,
        "thrust": 0,
        "cos": 0,
        "missing_E": 0,
        "btag": 0,
        "opp_veto": 0,
        "tracks": 0,
        "lep_veto": 0,
    }

    # --- missing-energy bins (exclusive) ---
    interval_edges = [20, 25, 30, 35, 40, 45]
    interval_counts = {f"{lo}-{hi}": 0 for lo, hi in zip(interval_edges[:-1], interval_edges[1:])}
    interval_counts[">=45"] = 0

    # --- b-tag working point (matches your validated settings) ---
    B_ALPHA_CUT   = 0.001     
    B_MIN_TRACKS  = 7         # ALEPH-like requirement for tag hemi
    B_PMIN        = 0.5       # GeV for good tracks (b-tag definition)
    B_A_UM        = 25.0
    B_B_UM        = 95.0
    B_IP_CAP_MM   = 4.0
    B_SIG_SCALE   = 1.3977865 # measured negative-S width
    B_S_CAP       = 5.0
    B_USE_TOPK    = 4         # product of 4 most displaced tracks

    with open(input_path, "r") as fin:
        for ev_id, raw in enumerate(fin):
            line = raw.strip()
            if not line or line.startswith("#"):
                continue

            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")

            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E = arr[:, 1:5].T
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T

            scanned += 1
            cut_counts["total"] += 1
            if scanned % progress_step == 0:
                print(f"Processed {scanned:,} events... kept {kept:,}")

            # --- visible mask (exclude neutrinos, etc.) ---
            vis_mask = np.isin(pdg, VISIBLE_PDGS, assume_unique=True)

            # --- thrust and |cosθ| cuts (ALEPH-like) ---
            thrust, cos_theta, axis = compute_thrust_event_aleph_like(
                px[vis_mask], py[vis_mask], pz[vis_mask]
            )
            if thrust <= 0.85:
                continue
            cut_counts["thrust"] += 1

            if cos_theta >= 0.7:
                continue
            cut_counts["cos"] += 1

            # --- hemispheres (using axis from visible set) ---
            dots  = px * axis[0] + py * axis[1] + pz * axis[2]
            mask1 = dots > 0
            mask2 = ~mask1

            # --- missing energies per hemisphere ---
            (E_vis1, E_miss1, m1_sq), (E_vis2, E_miss2, m2_sq) = compute_missing_energy(
                px, py, pz, E, pdg, mask1, mask2, sqrt_s=sqrt_s, method=energy_method
            )
            E_vis  = [E_vis1, E_vis2]
            E_miss = [E_miss1, E_miss2]

            # --- missing-E region cut (either hemisphere > 20 GeV) ---
            if not (E_miss[0] > 20 or E_miss[1] > 20):
                continue
            cut_counts["missing_E"] += 1

            # --- identify signal vs opposite hemisphere ---
            sig_idx = 0 if E_miss[0] > E_miss[1] else 1
            opp_idx = 1 - sig_idx
            opp_mask = mask1 if opp_idx == 0 else mask2

            # --- b-tag on the OPPOSITE hemisphere, visible tracks only ---
            if btag_mode == "aleph":
                final_mask = vis_mask & opp_mask
                is_b_tagged, alpha_hemi, n_good_btag, _ = hemisphere_btag_aleph_simple(
                    x_mm=x_prod[final_mask], y_mm=y_prod[final_mask],
                    px=px[final_mask],       py=py[final_mask],       pz=pz[final_mask],
                    pdg=pdg[final_mask],
                    alpha_hemi_cut=B_ALPHA_CUT,
                    min_tracks=B_MIN_TRACKS, pmin=B_PMIN,
                    a_um=B_A_UM, b_um=B_B_UM, ip_cap_mm=B_IP_CAP_MM,
                    sigma_scale=B_SIG_SCALE, S_cap=B_S_CAP, use_topk=B_USE_TOPK
                )
                if not is_b_tagged:
                    continue
                cut_counts["btag"] += 1
            # else: if btag_mode != "aleph", skip b-tagging

            # --- opposite-hemisphere missing-E veto (strict hygiene) ---
            if E_miss[opp_idx] >= 20:
                continue
            cut_counts["opp_veto"] += 1

            # --- tracks cut on the opposite hemisphere (your E>1GeV definition) ---
            #     (this is separate from the b-tag's momentum-based good-track def)
            E_h   = E[opp_mask]
            pdg_h = pdg[opp_mask]
            track_mask = np.isin(pdg_h, CHARGED_PDGS, assume_unique=True) & (E_h > 1.0)
            if np.count_nonzero(track_mask) < min_tracks:
                continue
            cut_counts["tracks"] += 1

            # --- semileptonic veto on the SIGNAL hemisphere ---
            sig_mask = mask1 if sig_idx == 0 else mask2
            if has_identified_lepton(
                pdg[sig_mask],
                E[sig_mask],
                np.sqrt(px[sig_mask]**2 + py[sig_mask]**2 + pz[sig_mask]**2)
            ):
                continue
            cut_counts["lep_veto"] += 1

            # --- fill missing-energy bins (exclusive) using the SIGNAL hemisphere ---
            E_sig = E_miss[sig_idx]
            placed = False
            for lo, hi in zip(interval_edges[:-1], interval_edges[1:]):
                if lo <= E_sig < hi:
                    interval_counts[f"{lo}-{hi}"] += 1
                    placed = True
                    break
            if not placed and E_sig >= interval_edges[-1]:
                interval_counts[">=45"] += 1

            kept += 1

    # --- cutflow summary ---
    print("\n--- Cutflow summary ---")
    for step, n in cut_counts.items():
        frac = 100.0 * n / cut_counts["total"] if cut_counts["total"] > 0 else 0.0
        print(f"{step:12s}: {n:,}  ({frac:.6f}%)")

    print(f"\nFinal kept events: {kept:,} ({100*kept/cut_counts['total']:.6f}%)")

    print("\n--- Events per missing-energy bin (exclusive) ---")
    for k, v in interval_counts.items():
        print(f"{k:>6s}: {v}")

    return cut_counts, interval_counts


In [12]:
# ------------------------
# Main function
# ------------------------

def select_missing_energy_events_fast_with_btag(
    input_path,
    sqrt_s=91.2,
    min_tracks=7,                 
    progress_step=100_000,
    energy_method="aleph",
    btag_mode="aleph"
):
    scanned = 0
    kept = 0
    NMAX = 500_000

    cut_counts = {
        "total": 0,
        "thrust": 0,
        "cos": 0,
        "missing_E": 0,
        "btag": 0,
        "opp_veto": 0,
        "tracks": 0,
        "lep_veto": 0,
    }

    # --- missing-energy bins (exclusive) ---
    interval_edges = [20, 25, 30, 35, 40, 45]
    interval_counts = {f"{lo}-{hi}": 0 for lo, hi in zip(interval_edges[:-1], interval_edges[1:])}
    interval_counts[">=45"] = 0

    # --- b-tag working point (your FINAL settings) ---
    JP_ALPHA_CUT   = 0.001       # Jet-Probability hemisphere CL cut (tight)
    B_MIN_TRACKS   = 7           # multiplicity on the tag hemisphere
    B_PMIN         = 0.5         # GeV (good tracks for b-tag)
    B_A_UM         = 25.0
    B_B_UM         = 95.0
    B_IP_CAP_MM    = 4.0
    B_SIG_SCALE    = 1.3978
    B_S_CAP        = 5.0         # cap |S|
    # topology
    POS_S_MIN      = 3.0         # need displaced positive tracks above this
    N_POS_MIN      = 2
    POS_S_HARD     = 3.7         # at least one hard positive displacement

    with open(input_path, "r") as fin:
        for ev_id, raw in enumerate(fin):
            #if ev_id >= NMAX:
                #break
            line = raw.strip()
            if not line or line.startswith("#"):
                continue

            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")

            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E = arr[:, 1:5].T
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T

            scanned += 1
            cut_counts["total"] += 1
            if scanned % progress_step == 0:
                print(f"Processed {scanned:,} events... kept {kept:,}")

            # --- visible mask (exclude neutrinos, etc.) ---
            vis_mask = np.isin(pdg, VISIBLE_PDGS, assume_unique=True)

            # --- thrust and |cosθ| cuts ---
            thrust, cos_theta, axis = compute_thrust_event_aleph_like(
                px[vis_mask], py[vis_mask], pz[vis_mask]
            )
            if thrust <= 0.85:
                continue
            cut_counts["thrust"] += 1

            if cos_theta >= 0.7:
                continue
            cut_counts["cos"] += 1

            # --- hemispheres ---
            dots  = px * axis[0] + py * axis[1] + pz * axis[2]
            mask1 = dots > 0
            mask2 = ~mask1

            # --- missing energies ---
            (E_vis1, E_miss1, m1_sq), (E_vis2, E_miss2, m2_sq) = compute_missing_energy(
                px, py, pz, E, pdg, mask1, mask2, sqrt_s=sqrt_s, method=energy_method
            )
            E_vis  = [E_vis1, E_vis2]
            E_miss = [E_miss1, E_miss2]

            # --- missing-E region cut (either hemi > 20) ---
            if not (E_miss[0] > 20 or E_miss[1] > 20):
                continue
            cut_counts["missing_E"] += 1

            # --- choose signal vs opposite hemisphere ---
            sig_idx = 0 if E_miss[0] > E_miss[1] else 1
            opp_idx = 1 - sig_idx
            opp_mask = mask1 if opp_idx == 0 else mask2

            # -----------------------
            #        B-TAGGING
            # -----------------------
            if btag_mode == "aleph":
                # visible-only tracks in the OPPOSITE hemisphere
                final_mask = vis_mask & opp_mask

                # get per-track PV probabilities from your simple tag helper
                # (use_topk=0 and alpha_hemi_cut=1.0 so it returns ALL good tracks)
                is_b_tagged_raw, alpha_hemi_raw, n_good_btag, track_probs = hemisphere_btag_aleph_simple(
                    x_mm=x_prod[final_mask], y_mm=y_prod[final_mask],
                    px=px[final_mask],       py=py[final_mask],       pz=pz[final_mask],
                    pdg=pdg[final_mask],
                    alpha_hemi_cut=1.0,
                    min_tracks=B_MIN_TRACKS, pmin=B_PMIN,
                    a_um=B_A_UM, b_um=B_B_UM, ip_cap_mm=B_IP_CAP_MM,
                    sigma_scale=B_SIG_SCALE, S_cap=B_S_CAP, use_topk=0
                )
                if n_good_btag < B_MIN_TRACKS:
                    continue

                # --- (A) Jet-Probability hemisphere CL (multiplicity-corrected) ---
                probs = np.clip(np.asarray(track_probs, float), 1e-300, 1.0)
                alpha_all = float(np.exp(np.sum(np.log(probs))))
                T = -np.log(alpha_all)
                jp_sum, term = 1.0, 1.0
                for j in range(1, n_good_btag):
                    term *= T / j
                    jp_sum += term
                jp_cl = alpha_all * jp_sum
                if jp_cl >= JP_ALPHA_CUT:
                    continue  # fail JP requirement

                # --- (B) Topology: ≥2 positive displaced (|S|>3.0) AND ≥1 hard hit (|S|>3.7) ---
                px_o = px[final_mask]; py_o = py[final_mask]
                x_o  = x_prod[final_mask]; y_o = y_prod[final_mask]
                pT   = np.hypot(px_o, py_o)
                vx   = np.divide(px_o, pT, out=np.zeros_like(px_o), where=pT>1e-12)
                vy   = np.divide(py_o, pT, out=np.zeros_like(py_o), where=pT>1e-12)
                d0_mm = np.abs(x_o*vy - y_o*vx)

                p3 = np.sqrt(px_o**2 + py_o**2 + pz[final_mask]**2) + 1e-12
                sigma_um = np.sqrt((B_A_UM * B_SIG_SCALE)**2 + (B_B_UM * B_SIG_SCALE / p3)**2)
                S_abs = np.minimum(1e3 * d0_mm / np.maximum(sigma_um, 1e-3), B_S_CAP)  # mm→μm, cap

                sgn = np.sign(x_o*py_o - y_o*px_o) * np.sign(axis[2])
                pos_disp = (sgn > 0) & (S_abs > POS_S_MIN)
                hard_hit = (sgn > 0) & (S_abs > POS_S_HARD)
                if (np.count_nonzero(pos_disp) < N_POS_MIN) or (not np.any(hard_hit)):
                    continue

                cut_counts["btag"] += 1
            # -----------------------

            # --- opposite-hemisphere missing-E veto (strict hygiene; align to 20 GeV) ---
            if E_miss[opp_idx] >= 25:
                continue
            cut_counts["opp_veto"] += 1

            # --- tracks cut on the OPPOSITE hemisphere (E>1 GeV definition) ---
            E_h   = E[opp_mask]
            pdg_h = pdg[opp_mask]
            track_mask = np.isin(pdg_h, CHARGED_PDGS, assume_unique=True) & (E_h > 1.0)
            if np.count_nonzero(track_mask) < min_tracks:
                continue
            cut_counts["tracks"] += 1

            # --- semileptonic veto on the SIGNAL hemisphere ---
            sig_mask = mask1 if sig_idx == 0 else mask2
            if has_identified_lepton(
                pdg[sig_mask],
                E[sig_mask],
                np.sqrt(px[sig_mask]**2 + py[sig_mask]**2 + pz[sig_mask]**2)
            ):
                continue
            cut_counts["lep_veto"] += 1

            # --- fill missing-energy bins (exclusive) using the SIGNAL hemisphere ---
            E_sig = E_miss[sig_idx]
            placed = False
            for lo, hi in zip(interval_edges[:-1], interval_edges[1:]):
                if lo <= E_sig < hi:
                    interval_counts[f"{lo}-{hi}"] += 1
                    placed = True
                    break
            if not placed and E_sig >= interval_edges[-1]:
                interval_counts[">=45"] += 1

            kept += 1

    # --- cutflow summary ---
    print("\n--- Cutflow summary ---")
    for step, n in cut_counts.items():
        frac = 100.0 * n / cut_counts["total"] if cut_counts["total"] > 0 else 0.0
        print(f"{step:12s}: {n:,}  ({frac:.6f}%)")

    print(f"\nFinal kept events: {kept:,} ({100*kept/cut_counts['total']:.6f}%)")

    print("\n--- Events per missing-energy bin (exclusive) ---")
    for k, v in interval_counts.items():
        print(f"{k:>6s}: {v}")

    return cut_counts, interval_counts



In [15]:
filename = "z-decay-products-BSM.txt"

# ALEPH-style missing energy + ALEPH-style b-tag
select_missing_energy_events_fast_with_btag(
    filename, 
    energy_method="aleph"
)

Processed 100,000 events... kept 6,008
Processed 200,000 events... kept 11,972
Processed 300,000 events... kept 17,837
Processed 400,000 events... kept 23,650

--- Cutflow summary ---
total       : 490,249  (100.000000%)
thrust      : 425,932  (86.880748%)
cos         : 259,738  (52.980832%)
missing_E   : 93,596  (19.091523%)
btag        : 40,452  (8.251317%)
opp_veto    : 40,421  (8.244994%)
tracks      : 28,905  (5.895983%)
lep_veto    : 28,841  (5.882929%)

Final kept events: 28,841 (5.882929%)

--- Events per missing-energy bin (exclusive) ---
 20-25: 10837
 25-30: 8974
 30-35: 6366
 35-40: 2445
 40-45: 219
  >=45: 0


({'total': 490249,
  'thrust': 425932,
  'cos': 259738,
  'missing_E': 93596,
  'btag': 40452,
  'opp_veto': 40421,
  'tracks': 28905,
  'lep_veto': 28841},
 {'20-25': 10837,
  '25-30': 8974,
  '30-35': 6366,
  '35-40': 2445,
  '40-45': 219,
  '>=45': 0})