In [2]:
import numpy as np
import csv

# Predefine sets as NumPy arrays for fast isin
VISIBLE_PDGS = np.array([
    211, -211,    # pi±
    111,          # pi0
    321, -321,    # K±
    2212, -2212,  # p, p̅
    2112, -2112,  # n, n̅
    130,          # K_L0
    22,           # γ
    11, -11,      # e±
    13, -13       # μ±
], dtype=int)

CHARGED_PDGS = np.array([
    211, -211,
    321, -321,
    2212, -2212,
    11, -11,
    13, -13
], dtype=int)

LEPTON_PDGS = {11, -11, 13, -13}  # e±, μ±


def compute_thrust_event_aleph_like(px, py, pz, n_iter=10, n_random=5):
    """
    ALEPH-like thrust finder:
    - Uses |p|, |pz|, and a few random seeds
    - Less 'perfect' than all-seeds, closer to ALEPH reconstruction
    """
    p = np.column_stack((px, py, pz))
    p_mag = np.linalg.norm(p, axis=1)
    total_mag = np.sum(p_mag)

    if total_mag == 0:
        return 0.0, 1.0, np.array([0., 0., 1.])

    seeds = []

    # 1. Seed from largest |p|
    idx_maxp = np.argmax(p_mag)
    seeds.append(p[idx_maxp] / p_mag[idx_maxp])

    # 2. Seed from largest |pz|
    idx_maxpz = np.argmax(np.abs(p[:, 2]))
    if p_mag[idx_maxpz] > 0:
        seeds.append(p[idx_maxpz] / p_mag[idx_maxpz])

    # 3. Random seeds from visible particles
    n_available = len(p)
    if n_available > 0:
        n_rand = min(n_random, n_available)
        rand_idx = np.random.choice(n_available, n_rand, replace=False)
        for i in rand_idx:
            if p_mag[i] > 0:
                seeds.append(p[i] / p_mag[i])

    best_thrust = -1.0
    best_axis = np.array([0., 0., 1.])

    # Iterate thrust maximization
    for axis in seeds:
        prev_thrust = -1.0
        for _ in range(n_iter):
            dots = p @ axis
            signs = np.sign(dots)
            new_axis = np.sum(signs[:, None] * p, axis=0)
            norm = np.linalg.norm(new_axis)
            if norm == 0:
                break
            axis = new_axis / norm
            thrust_val = np.sum(np.abs(p @ axis)) / total_mag
            if abs(thrust_val - prev_thrust) < 1e-6:
                break
            prev_thrust = thrust_val

        # Final thrust
        thrust_val = np.sum(np.abs(p @ axis)) / total_mag
        if thrust_val > best_thrust:
            best_thrust = thrust_val
            best_axis = axis

    cos_theta_thrust = abs(best_axis[2])
    return best_thrust, cos_theta_thrust, best_axis

def compute_thrust_with_angle2(px, py, pz, n_iter=30, tol=1e-8):
    """
    Iterative thrust with |p|-seed, early convergence, and sign-robust stopping.

    Returns
    -------
    thrust_val : float
    cos_theta_thrust : float
    axis : np.ndarray shape (3,)
    """
    p = np.column_stack((px, py, pz)).astype(float)
    p_mag = np.linalg.norm(p, axis=1)
    denom = p_mag.sum()
    if denom == 0.0:
        return 0.0, 1.0, np.array([0.0, 0.0, 1.0])

    # Seed with direction of largest |p|
    lead = np.argmax(p_mag)
    axis = p[lead]
    norm = np.linalg.norm(axis)
    axis = axis / norm if norm > 0.0 else np.array([0.0, 0.0, 1.0])

    for _ in range(n_iter):
        # s_i = sign(p_i · n); resolve exact zeros deterministically as +1
        proj = p @ axis
        signs = np.sign(proj)
        signs[signs == 0.0] = 1.0

        new_axis = (signs[:, None] * p).sum(axis=0)
        new_norm = np.linalg.norm(new_axis)
        if new_norm == 0.0:
            break
        new_axis /= new_norm

        # Convergence up to a global sign: stop when n and new_n are aligned
        if 1.0 - abs(np.dot(axis, new_axis)) < tol:
            axis = new_axis
            break

        axis = new_axis

    thrust_val = np.sum(np.abs(p @ axis)) / denom
    cos_theta_thrust = abs(axis[2])
    return thrust_val, cos_theta_thrust, axis


def has_identified_lepton(pdg_h, E_h, p_h, p_min=2.0):
    """
    Proxy for ALEPH lepton ID: hemisphere contains an electron or muon 
    with momentum > p_min GeV.
    """
    for pdg, E, p in zip(pdg_h, E_h, p_h):
        if abs(pdg) in LEPTON_PDGS and p > p_min:
            return True
    return False
  
def missingE_with_thrust_and_lepton_veto(
    input_path,
    output_path,
    E_beam=45.6,
    progress_step=100_000
):
    """
    Loop over all events and store hemisphere missing energies
    using the thrust axis (ALEPH style), with thrust cuts and lepton veto
    """
    
    scanned = 0
    kept = 0
    
    cut_counts = {
        "thrust_cos": 0,
        "lep_veto": 0,
    }

    with open(input_path, "r") as fin, \
         open(output_path, "w", newline="") as fout:

        writer = csv.writer(fout)
        writer.writerow(["E_miss", "hemisphere", "event_id"])

        for ev_id, raw in enumerate(fin):
            line = raw.strip()
            if not line or line.startswith("#"):
                continue

            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")

            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E = arr[:, 1:5].T
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T
            
            scanned += 1
            if scanned % progress_step == 0:
                print(f"Processed {scanned:,} events...")

             # --- Build visible mask (exclude neutrinos etc.) ---
            vis_mask = np.isin(pdg, VISIBLE_PDGS, assume_unique=True)

            # --- Thrust cuts ---
            thrust, cos_theta, axis = compute_thrust_with_angle2(px[vis_mask], py[vis_mask], pz[vis_mask])
            if thrust <= 0.85 or cos_theta >= 0.7:
                continue
            cut_counts["thrust_cos"] += 1

            # Split into hemispheres wrt thrust axis
            dot_products = px * axis[0] + py * axis[1] + pz * axis[2]
            mask_forward = dot_products > 0
            hemispheres = (mask_forward, ~mask_forward)

            # --- Lepton veto: check both hemispheres ---
            veto = False
            for hmask in hemispheres:
                pdg_h = pdg[hmask]
                E_h   = E[hmask]
                p_h   = np.sqrt(px[hmask]**2 + py[hmask]**2 + pz[hmask]**2)
                if has_identified_lepton(pdg_h, E_h, p_h):
                    veto = True
                    break
            if veto:
                continue
            cut_counts["lep_veto"] += 1

            # --- Compute missing energy per hemisphere ---
            for h_idx, hmask in enumerate(hemispheres):
                pdg_h = pdg[hmask]
                E_h   = E[hmask]

                vis_mask = np.isin(pdg_h, VISIBLE_PDGS, assume_unique=True)
                E_vis_h = np.sum(E_h[vis_mask])
                E_miss_h = E_beam - E_vis_h

                writer.writerow([E_miss_h, h_idx, ev_id])

            kept += 1

    print(f"\nScanned {scanned:,} events, stored {kept:,} hemispheres into {output_path}")
    print("Cutflow:", cut_counts)
    return kept

In [3]:
filename = "selected_hadronic_smeared.txt"
kept = missingE_with_thrust_and_lepton_veto(
    filename,
    "background_smeared.csv",
    E_beam=45.6,
    progress_step=100_000
)

Processed 100,000 events...
Processed 200,000 events...
Processed 300,000 events...
Processed 400,000 events...
Processed 500,000 events...
Processed 600,000 events...
Processed 700,000 events...
Processed 800,000 events...
Processed 900,000 events...
Processed 1,000,000 events...
Processed 1,100,000 events...
Processed 1,200,000 events...
Processed 1,500,000 events...
Processed 1,600,000 events...
Processed 1,700,000 events...
Processed 1,800,000 events...
Processed 1,900,000 events...
Processed 2,000,000 events...
Processed 2,100,000 events...
Processed 2,200,000 events...
Processed 2,300,000 events...
Processed 2,400,000 events...
Processed 2,500,000 events...
Processed 2,600,000 events...
Processed 2,700,000 events...
Processed 2,800,000 events...
Processed 2,900,000 events...
Processed 3,000,000 events...
Processed 3,100,000 events...
Processed 3,200,000 events...
Processed 3,300,000 events...
Processed 3,400,000 events...
Processed 3,500,000 events...
Processed 3,600,000 events..