In [1]:
import numpy as np
import csv
from math import sqrt
from scipy.special import erfc

In [2]:
 # ------------------------
# Utilities
# ------------------------

VISIBLE_PDGS = np.array([
    211, -211,    # pi±
    111,          # pi0
    321, -321,    # K±
    2212, -2212,  # p, p̅
    2112, -2112,  # n, n̅
    130,          # K_L0
    22,           # γ
    11, -11,      # e±
    13, -13       # μ±
], dtype=int)

CHARGED_PDGS = np.array([
    211, -211,
    321, -321,
    2212, -2212,
    11, -11,
    13, -13
], dtype=int)

LEPTON_PDGS = {11, -11, 13, -13}

def compute_thrust_event_aleph_like(px, py, pz, n_iter=10, n_random=5):
    """
    ALEPH-like thrust finder:
    - Uses |p|, |pz|, and a few random seeds
    - Less 'perfect' than all-seeds, closer to ALEPH reconstruction
    """
    p = np.column_stack((px, py, pz))
    p_mag = np.linalg.norm(p, axis=1)
    total_mag = np.sum(p_mag)

    if total_mag == 0:
        return 0.0, 1.0, np.array([0., 0., 1.])

    seeds = []

    # 1. Seed from largest |p|
    idx_maxp = np.argmax(p_mag)
    seeds.append(p[idx_maxp] / p_mag[idx_maxp])

    # 2. Seed from largest |pz|
    idx_maxpz = np.argmax(np.abs(p[:, 2]))
    if p_mag[idx_maxpz] > 0:
        seeds.append(p[idx_maxpz] / p_mag[idx_maxpz])

    # 3. Random seeds from visible particles
    n_available = len(p)
    if n_available > 0:
        n_rand = min(n_random, n_available)
        rand_idx = np.random.choice(n_available, n_rand, replace=False)
        for i in rand_idx:
            if p_mag[i] > 0:
                seeds.append(p[i] / p_mag[i])

    best_thrust = -1.0
    best_axis = np.array([0., 0., 1.])

    # Iterate thrust maximization
    for axis in seeds:
        prev_thrust = -1.0
        for _ in range(n_iter):
            dots = p @ axis
            signs = np.sign(dots)
            new_axis = np.sum(signs[:, None] * p, axis=0)
            norm = np.linalg.norm(new_axis)
            if norm == 0:
                break
            axis = new_axis / norm
            thrust_val = np.sum(np.abs(p @ axis)) / total_mag
            if abs(thrust_val - prev_thrust) < 1e-6:
                break
            prev_thrust = thrust_val

        # Final thrust
        thrust_val = np.sum(np.abs(p @ axis)) / total_mag
        if thrust_val > best_thrust:
            best_thrust = thrust_val
            best_axis = axis

    cos_theta_thrust = abs(best_axis[2])
    return best_thrust, cos_theta_thrust, best_axis


def compute_thrust_with_angle2(px, py, pz, n_iter=5, tol=1e-3):
    """
    Iterative thrust with |p|-seed, early convergence, and sign-robust stopping.

    Returns
    -------
    thrust_val : float
    cos_theta_thrust : float
    axis : np.ndarray shape (3,)
    """
    p = np.column_stack((px, py, pz)).astype(float)
    p_mag = np.linalg.norm(p, axis=1)
    denom = p_mag.sum()
    if denom == 0.0:
        return 0.0, 1.0, np.array([0.0, 0.0, 1.0])

    # Seed with direction of largest |p|
    lead = np.argmax(p_mag)
    axis = p[lead]
    norm = np.linalg.norm(axis)
    axis = axis / norm if norm > 0.0 else np.array([0.0, 0.0, 1.0])

    for _ in range(n_iter):
        # s_i = sign(p_i · n); resolve exact zeros deterministically as +1
        proj = p @ axis
        signs = np.sign(proj)
        signs[signs == 0.0] = 1.0

        new_axis = (signs[:, None] * p).sum(axis=0)
        new_norm = np.linalg.norm(new_axis)
        if new_norm == 0.0:
            break
        new_axis /= new_norm

        # Convergence up to a global sign: stop when n and new_n are aligned
        if 1.0 - abs(np.dot(axis, new_axis)) < tol:
            axis = new_axis
            break

        axis = new_axis

    thrust_val = np.sum(np.abs(p @ axis)) / denom
    cos_theta_thrust = abs(axis[2])
    return thrust_val, cos_theta_thrust, axis


def has_identified_lepton(pdg_h, E_h, p_h, p_min=2.0):
    for pdg, E, p in zip(pdg_h, E_h, p_h):
        if abs(pdg) in LEPTON_PDGS and p > p_min:
            return True
    return False

In [3]:
def _vt_unit(px, py, eps=1e-12):
    pT = np.hypot(px, py)
    vx = np.divide(px, pT, out=np.zeros_like(px, dtype=float), where=pT>eps)
    vy = np.divide(py, pT, out=np.zeros_like(py, dtype=float), where=pT>eps)
    return vx, vy, pT

def _sigma_d0_um(px, py, pz, a_um=25.0, b_um=95.0):
    """ALEPH-like σ(d0): sqrt(25^2 + (95/p)^2) in microns."""
    p = np.sqrt(px**2 + py**2 + pz**2) + 1e-12
    return np.sqrt(a_um**2 + (b_um/p)**2)

def _erfc_half_from_absS(S_abs):
    """
    Return 0.5*erfc(S/sqrt(2)) for array-like S_abs using
    Abramowitz–Stegun 7.1.26 (no SciPy/np.erfc needed).
    """
    S_abs = np.asarray(S_abs, float)
    y = S_abs / np.sqrt(2.0)
    # A&S constants
    p  = 0.3275911
    a1 = 0.254829592; a2 = -0.284496736; a3 = 1.421413741
    a4 = -1.453152027; a5 =  1.061405429
    t = 1.0/(1.0 + p*y)
    poly = (((((a5*t + a4)*t + a3)*t + a2)*t + a1)*t)
    erfc_y = poly * np.exp(-y*y)
    return 0.5 * erfc_y

def track_pv_probability_simple(x_mm, y_mm, px, py, pz,
                                a_um=25.0, b_um=95.0,
                                sigma_scale=1.0, S_cap=5.0):
    """
    PV probability using transverse d0 only.
    x,y in mm; p in GeV; σ(d0) in μm. No SciPy needed.
    """
    p = sqrt(px*px + py*py + pz*pz)
    if p <= 0.0:
        return 1.0
    pT = sqrt(px*px + py*py)
    if pT <= 0.0:
        return 1.0

    vx, vy = px/pT, py/pT
    d0_mm = abs(x_mm*vy - y_mm*vx)

    # σ(d0) in μm, with your global scale
    sig = sqrt((a_um*sigma_scale)**2 + (b_um*sigma_scale/p)**2)
    S   = (1e3 * d0_mm) / max(sig, 1e-3)   # mm → μm
    S   = min(abs(S), S_cap)               # cap |S| to avoid one crazy track dominating

    # one-sided PV prob: 0.5*erfc(S/√2) via Abramowitz–Stegun 7.1.26
    y = S / sqrt(2.0)
    pc, a1,a2,a3,a4,a5 = 0.3275911, 0.254829592, -0.284496736, 1.421413741, -1.453152027, 1.061405429
    t = 1.0/(1.0 + pc*y)
    erfc_y = (((((a5*t + a4)*t + a3)*t + a2)*t + a1)*t) * np.exp(-y*y)
    return float(0.5 * erfc_y)

def hemisphere_btag_aleph_simple(
    x_mm, y_mm, px, py, pz, pdg,
    alpha_hemi_cut=0.01, min_tracks=6, pmin=0.5,
    a_um=25.0, b_um=95.0, ip_cap_mm=4.0,
    sigma_scale=1.0, S_cap=5.0, use_topk=2
):
    """
    Tag a PRESELECTED hemisphere (you already masked it).
    Good tracks: |PDG| in CHARGED_PDGS and p > pmin.
    Product of the K most displaced tracks (smallest PV probs).
    """
    x = np.asarray(x_mm, float); y = np.asarray(y_mm, float)
    px = np.asarray(px, float);  py = np.asarray(py, float);  pz = np.asarray(pz, float)
    pdg = np.asarray(pdg)

    p = np.sqrt(px**2 + py**2 + pz**2)
    good = (np.isin(np.abs(pdg), CHARGED_PDGS) & (p > pmin))

    # IP sanity: drop huge-d0 tracks
    vx, vy, _ = _vt_unit(px, py)
    d0_mm = np.abs(x*vy - y*vx)
    good &= (d0_mm < ip_cap_mm)

    idx = np.flatnonzero(good)
    n_good = idx.size
    if n_good < min_tracks:
        return False, 1.0, int(n_good), []

    # per-track PV probs
    probs = np.array([
        track_pv_probability_simple(x[i], y[i], px[i], py[i], pz[i],
                                    a_um=a_um, b_um=b_um,
                                    sigma_scale=sigma_scale, S_cap=S_cap)
        for i in idx
    ], float)
    probs = np.clip(probs, 1e-300, 1.0)

    # product of K most displaced (smallest probs)
    if use_topk and probs.size > use_topk:
        kth = np.partition(probs, use_topk-1)[:use_topk]
        alpha_hemi = float(np.exp(np.sum(np.log(kth))))
    else:
        alpha_hemi = float(np.exp(np.sum(np.log(probs))))

    return (alpha_hemi < alpha_hemi_cut), alpha_hemi, int(n_good), probs.tolist()

In [4]:
def compute_missing_energy(px, py, pz, E, pdg, mask1, mask2,
                           sqrt_s=91.2, method="simple", tol=1e-6):
    """
    ALEPH-style missing energy:
    - Compute visible hemisphere energies excluding neutrinos
    - Compute missing energy as E_true - E_vis.
    - Returns also the hemisphere invariant masses (m1^2, m2^2).
    
    Parameters
    ----------
    px,py,pz,E : arrays
        Particle kinematics
    pdg : array
        PDG codes
    mask1,mask2 : bool arrays
        Hemisphere assignments
    sqrt_s : float
        CM energy [GeV]
    method : {"simple","aleph"}
        Method for computing E_true
    tol : float
        Numerical tolerance for negative m^2
    """

    s = sqrt_s**2
    E_beam = sqrt_s / 2.0

    # --- Visible hemisphere energies (exclude neutrinos)
    vis_mask1 = mask1 & np.isin(pdg, list(VISIBLE_PDGS), assume_unique=True)
    vis_mask2 = mask2 & np.isin(pdg, list(VISIBLE_PDGS), assume_unique=True)

    E_vis1 = np.sum(E[vis_mask1])
    E_vis2 = np.sum(E[vis_mask2])

    # --- Invariant masses
    def m2(px, py, pz, E, mask):
        E_h  = E[mask].sum()
        px_h = px[mask].sum()
        py_h = py[mask].sum()
        pz_h = pz[mask].sum()
        val = E_h**2 - (px_h**2 + py_h**2 + pz_h**2)
        # safeguard: clip only tiny negatives
        if val < -tol:
            return val  # keep genuine negatives
        return max(val, 0.0)

    m1_sq = m2(px, py, pz, E, vis_mask1)
    m2_sq = m2(px, py, pz, E, vis_mask2)

    # --- E_true depending on method
    if method == "simple":
        E_true1 = E_beam
        E_true2 = E_beam

    elif method == "aleph":
        E_true1 = (s + m1_sq - m2_sq) / (2 * sqrt_s)
        E_true2 = (s + m2_sq - m1_sq) / (2 * sqrt_s)

    else:
        raise ValueError("method must be 'simple' or 'aleph'")

    # --- Missing energies
    E_miss1 = E_true1 - E_vis1
    E_miss2 = E_true2 - E_vis2

    return (E_vis1, E_miss1, m1_sq), (E_vis2, E_miss2, m2_sq)



In [7]:
# ------------------------
# Main selection function
# ------------------------

NMAX = 2_000_000

def select_missing_energy_events_fast_with_btag(
    input_path,
    output_events_path,
    output_summary_path,
    output_allhemis_path="all_hemispheres_HEtail.csv",
    sqrt_s=91.2,
    min_tracks=7,                 # align with final WP
    progress_step=100_000,
    energy_method="aleph",
    btag_mode="aleph"
):

    scanned = 0
    kept = 0

    cut_counts = {
        "total": 0,
        "thrust_cos": 0,
        "missing_E": 0,
        "btag": 0,
        "opp_veto": 0,
        "tracks": 0,
        "lep_veto": 0,
    }

    # --- b-tag working point (FINAL) ---
    JP_ALPHA_CUT = 0.001     # Jet-Probability CL cut (tight)
    B_MIN_TRACKS = 7         # multiplicity on tag hemisphere
    B_PMIN       = 0.5       # GeV
    B_A_UM       = 25.0
    B_B_UM       = 95.0
    B_IP_CAP_MM  = 4.0
    B_SIG_SCALE  = 1.3978
    B_S_CAP      = 5.0

    POS_S_MIN    = 3.0       # displaced threshold (positive sign)
    POS_S_HARD   = 3.7       # at least one hard positive track

    with open(input_path, "r") as fin, \
         open(output_events_path, "w") as fout_events, \
         open(output_summary_path, "w", newline="") as fout_summary, \
         open(output_allhemis_path, "w", newline="") as fout_all:  

        writer = csv.writer(fout_summary)
        writer.writerow([
            "thrust", "cos_theta_thrust",
            "thrust_x", "thrust_y", "thrust_z",
            "E_vis_h1", "E_miss_h1", "tracks_h1",
            "E_vis_h2", "E_miss_h2", "tracks_h2"
        ])

        writer_all = csv.writer(fout_all)
        writer_all.writerow(["E_miss", "hemisphere", "event_id", "stage"])

        for ev_id, raw in enumerate(fin):
            line = raw.strip()
            if not line or line.startswith("#"):
                continue

            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")

            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E = arr[:, 1:5].T
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T

            scanned += 1
            cut_counts["total"] += 1
            if scanned % progress_step == 0:
                print(f"Processed {scanned:,} events... kept {kept:,}")
                
            # --- visible mask ---
            vis_mask = np.isin(pdg, VISIBLE_PDGS, assume_unique=True)

            # --- thrust + cosθ cuts ---
            thrust, cos_theta, axis = compute_thrust_with_angle2(px[vis_mask], py[vis_mask], pz[vis_mask])
            if thrust <= 0.85 or cos_theta >= 0.7:
                continue
            cut_counts["thrust_cos"] += 1

            # --- hemispheres ---
            dot_products = px * axis[0] + py * axis[1] + pz * axis[2]
            mask1 = dot_products > 0
            mask2 = ~mask1

            # --- missing energies ---
            (E_vis1, E_miss1, m1_sq), (E_vis2, E_miss2, m2_sq) = compute_missing_energy(
                px, py, pz, E, pdg, mask1, mask2, sqrt_s=sqrt_s, method=energy_method
            )
            E_vis  = [E_vis1, E_vis2]
            E_miss = [E_miss1, E_miss2]

            # dump after thrust (background spectrum)
            writer_all.writerow([E_miss1, 0, ev_id, "thrust"])
            writer_all.writerow([E_miss2, 1, ev_id, "thrust"])

            # --- good charged tracks per hemi (E>1 GeV def; for later tracks cut) ---
            good_tracks = []
            for mask in (mask1, mask2):
                E_h = E[mask]
                pdg_h = pdg[mask]
                track_mask = np.isin(pdg_h, CHARGED_PDGS, assume_unique=True) & (E_h > 1.0)
                good_tracks.append(np.count_nonzero(track_mask))

            # --- missing-E region cut (HE tail) ---
            if not (E_miss[0] > -20 or E_miss[1] > -20):
                continue
            cut_counts["missing_E"] += 1

            # --- choose signal vs opposite hemisphere ---
            sig_idx = 0 if E_miss[0] > E_miss[1] else 1
            opp_idx = 1 - sig_idx
            opp_mask = mask1 if opp_idx == 0 else mask2

            # =======================
            #        B-TAGGING
            # =======================
            if btag_mode == "aleph":
                final_mask = vis_mask & opp_mask  # visible-only tracks in opposite hemi

                # get ALL good-track PV probabilities (no decision inside helper)
                is_raw, alpha_raw, n_good_btag, track_probs = hemisphere_btag_aleph_simple(
                    x_mm=x_prod[final_mask], y_mm=y_prod[final_mask],
                    px=px[final_mask],     py=py[final_mask],     pz=pz[final_mask],
                    pdg=pdg[final_mask],
                    alpha_hemi_cut=1.0,          # return probs only
                    min_tracks=B_MIN_TRACKS, pmin=B_PMIN,
                    a_um=B_A_UM, b_um=B_B_UM, ip_cap_mm=B_IP_CAP_MM,
                    sigma_scale=B_SIG_SCALE, S_cap=B_S_CAP, use_topk=0
                )
                if n_good_btag < B_MIN_TRACKS:
                    continue

                # (A) Jet-Probability CL over ALL good tracks
                probs = np.clip(np.asarray(track_probs, float), 1e-300, 1.0)
                alpha_all = float(np.exp(np.sum(np.log(probs))))
                T = -np.log(alpha_all)
                jp_sum, term = 1.0, 1.0
                for j in range(1, n_good_btag):
                    term *= T / j
                    jp_sum += term
                jp_cl = alpha_all * jp_sum
                if jp_cl >= JP_ALPHA_CUT:
                    continue

                # (B) Topology: ≥2 positive displaced (|S|>3.0) AND ≥1 hard hit (|S|>3.7)
                px_o = px[final_mask]; py_o = py[final_mask]
                x_o  = x_prod[final_mask]; y_o = y_prod[final_mask]
                pT   = np.hypot(px_o, py_o)
                vx   = np.divide(px_o, pT, out=np.zeros_like(px_o), where=pT>1e-12)
                vy   = np.divide(py_o, pT, out=np.zeros_like(py_o), where=pT>1e-12)
                d0_mm = np.abs(x_o*vy - y_o*vx)

                p3 = np.sqrt(px_o**2 + py_o**2 + pz[final_mask]**2) + 1e-12
                sigma_um = np.sqrt((B_A_UM * B_SIG_SCALE)**2 + (B_B_UM * B_SIG_SCALE / p3)**2)
                S_abs = np.minimum(1e3 * d0_mm / np.maximum(sigma_um, 1e-3), B_S_CAP)  # mm→μm, cap

                sgn = np.sign(x_o*py_o - y_o*px_o) * np.sign(axis[2])
                pos_disp = (sgn > 0) & (S_abs > POS_S_MIN)
                hard_hit = (sgn > 0) & (S_abs > POS_S_HARD)
                if (np.count_nonzero(pos_disp) < 2) or (not np.any(hard_hit)):
                    continue

                cut_counts["btag"] += 1
            # =======================

            # --- opposite hemisphere veto (align to 20 GeV) ---
            if E_miss[opp_idx] >= 20:
                continue
            cut_counts["opp_veto"] += 1

            # --- tracks cut (your E>1 GeV good-track count) ---
            if good_tracks[opp_idx] < min_tracks:
                continue
            cut_counts["tracks"] += 1

            # --- semileptonic veto on the signal hemisphere ---
            sig_mask = mask1 if sig_idx == 0 else mask2
            if has_identified_lepton(
                pdg[sig_mask],
                E[sig_mask],
                np.sqrt(px[sig_mask]**2 + py[sig_mask]**2 + pz[sig_mask]**2)
            ):
                continue
            cut_counts["lep_veto"] += 1

            # --- store selected event ---
            fout_events.write(line + "\n")
            writer.writerow([
                thrust, cos_theta,
                axis[0], axis[1], axis[2],
                E_vis[0], E_miss[0], good_tracks[0],
                E_vis[1], E_miss[1], good_tracks[1]
            ])
            kept += 1     

    # --- cutflow summary ---
    print("\n--- Cutflow summary ---")
    for step, n in cut_counts.items():
        frac = 100.0 * n / cut_counts["total"] if cut_counts["total"] > 0 else 0.0
        print(f"{step:12s}: {n:,}  ({frac:.6f}%)")

    print(f"\nFinal kept events: {kept:,} ({100*kept/cut_counts['total']:.6f}%)")
    return cut_counts


In [6]:
filename = "selected_hadronic_smeared.txt"

# ALEPH-style missing energy + ALEPH-style b-tag
select_missing_energy_events_fast_with_btag(
    filename, "selected_aleph_HEtail.txt", "summary_alephHE_tail.csv",
    energy_method="aleph",
    btag_mode="aleph"
)



Processed 100,000 events... kept 0
Processed 200,000 events... kept 1
Processed 300,000 events... kept 1
Processed 400,000 events... kept 1
Processed 500,000 events... kept 1
Processed 600,000 events... kept 2
Processed 700,000 events... kept 2
Processed 800,000 events... kept 2
Processed 900,000 events... kept 2
Processed 1,200,000 events... kept 3
Processed 1,300,000 events... kept 3
Processed 1,400,000 events... kept 3
Processed 1,500,000 events... kept 4
Processed 1,600,000 events... kept 5
Processed 1,700,000 events... kept 6
Processed 1,800,000 events... kept 6
Processed 1,900,000 events... kept 7
Processed 2,000,000 events... kept 8
Processed 2,100,000 events... kept 9
Processed 2,200,000 events... kept 9
Processed 2,300,000 events... kept 9
Processed 2,400,000 events... kept 9
Processed 2,500,000 events... kept 9
Processed 2,600,000 events... kept 9
Processed 2,700,000 events... kept 9
Processed 2,800,000 events... kept 9
Processed 2,900,000 events... kept 9
Processed 3,000,000

{'total': 4176778,
 'thrust_cos': 2286715,
 'missing_E': 212,
 'btag': 38,
 'opp_veto': 38,
 'tracks': 24,
 'lep_veto': 12}

In [9]:
filename = "selected_hadronic_smeared.txt"

# Simple half-beam energy + simple displacement b-tag
select_missing_energy_events_fast_with_btag(
    filename, "selected_aleph_all.txt", "summary_aleph_all.csv",
    energy_method="aleph",
     btag_mode="aleph"
)

Processed 100,000 events... kept 3,561
Processed 200,000 events... kept 7,169
Processed 300,000 events... kept 10,759
Processed 400,000 events... kept 14,447
Processed 500,000 events... kept 17,913
Processed 600,000 events... kept 21,467
Processed 700,000 events... kept 25,129
Processed 800,000 events... kept 28,669
Processed 900,000 events... kept 32,130
Processed 1,000,000 events... kept 35,685
Processed 1,100,000 events... kept 39,221
Processed 1,200,000 events... kept 42,823
Processed 1,300,000 events... kept 46,378
Processed 1,400,000 events... kept 49,834
Processed 1,500,000 events... kept 53,360
Processed 1,600,000 events... kept 56,951
Processed 1,700,000 events... kept 60,442
Processed 1,800,000 events... kept 64,001
Processed 1,900,000 events... kept 67,556
Processed 2,000,000 events... kept 71,070
Processed 2,100,000 events... kept 74,660
Processed 2,200,000 events... kept 78,195
Processed 2,300,000 events... kept 81,724
Processed 2,400,000 events... kept 85,323
Processed 2,

{'total': 4176778,
 'thrust_cos': 2286715,
 'missing_E': 2286715,
 'btag': 330575,
 'opp_veto': 330541,
 'tracks': 234662,
 'lep_veto': 148697}