In [None]:
import numpy as np

# Data from the Pythia simulation - All possible decays of Z

# Selection of hadronic events from the data, following ALEPH cuts

# Path to input and output files

filename = "z-decay-products.txt"
outfile  = "selected_hadronic.txt"

progress_step = 100000

# PDG code groups
CHARGED_LEPTON_ABS_PDGS = {11, 13, 15}        # e±, μ±, τ±
CHARGED_HADRON_ABS_PDGS = {
    211, 321, 2212,      # π±, K±, p
    3112, 3222, 3312, 3334  # Σ−, Σ+, Ξ−, Ω−
}

def process_events_no_smear(in_path, out_path,
                            sqrt_s=91.2, Emin_track=1.0, min_good=5, frac_E=0.10,
                            require_hadron=False, limit=None):
    thr_sum = frac_E * sqrt_s
    total = kept_had = leptonic_only = nu_like = 0

    with open(out_path, "w") as fout, open(in_path, "r") as f:
        for raw in f:
            line = raw.strip()
            if not line or line.startswith("#"):
                continue
            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")

            # --- parse event ---
            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E_true = arr[:, 1], arr[:, 2], arr[:, 3], arr[:, 4]
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T

            abs_pdg = np.abs(pdg)

            # --- good charged track selection (using true E) ---
            charged_mask = (
                np.isin(abs_pdg, list(CHARGED_LEPTON_ABS_PDGS | CHARGED_HADRON_ABS_PDGS))
                & (E_true > Emin_track)
            )
            n_good = charged_mask.sum()
            Esum_good = E_true[charged_mask].sum()

            # separate leptons / hadrons among good tracks
            lepton_mask = np.isin(abs_pdg, list(CHARGED_LEPTON_ABS_PDGS)) & charged_mask
            hadron_mask = np.isin(abs_pdg, list(CHARGED_HADRON_ABS_PDGS)) & charged_mask

            # purely leptonic counter
            if lepton_mask.sum() >= 2 and hadron_mask.sum() == 0:
                leptonic_only += 1

            # neutrino-like counter
            if n_good == 0:
                nu_like += 1

            # hadronic selection
            if (n_good >= min_good) and (Esum_good >= thr_sum) and ((not require_hadron) or hadron_mask.sum() >= 1):
                # Write out original event (9 columns per particle, unchanged)
                fout.write(line + "\n")
                kept_had += 1

            total += 1
            if total % progress_step == 0:
                print(f"Processed {total:,} events... kept {kept_had:,}")

            if limit is not None and total >= limit:
                break

    # --- summary ---
    print(f"Scanned {total:,} events")
    print(f" Kept hadronic-like: {kept_had:,} ({100*kept_had/total:.2f}%)"
          f"  [≥{min_good} good tracks (E>{Emin_track} GeV), ΣE_good ≥ {thr_sum:.2f} GeV]")
    print(f" Purely leptonic: {leptonic_only:,} ({100*leptonic_only/total:.2f}%)")
    print(f" Neutrino-like: {nu_like:,} ({100*nu_like/total:.2f}%)")

    return kept_had, leptonic_only, nu_like, total


# --- run ---
kept_had, leps_only, nu_like, total = process_events_no_smear(
    filename, outfile,
    sqrt_s=91.2, Emin_track=1.0, min_good=5, frac_E=0.10,
    require_hadron=False, limit=None
)


# Expected: 70%, 7-10% (decays to taus do not always look as purely leptonic,
# since many taus decay to hadrons), 20 %



In [3]:
import numpy as np

# Data from the Pythia simulation - All possible decays of Z

# Selection of hadronic events from the data, following ALEPH cuts

# Smearing in energy applied to the data before the selection:

# Nothing to neutrinos (not detected), electromagnetic calorimeter
# formula to e- and photons and for hadrons (an other charged particles, 
# including muons) the measured total energy formula
# (hadron calorimeter plus other systems combined - energy flow)

# Path to input and output files

filename = "z-decay-products-BSM.txt"
outfile  = "selected_hadronic_smearedBSM.txt"

rng = np.random.default_rng()
progress_step=100000

# PDG code groups
CHARGED_LEPTON_ABS_PDGS = {11, 13, 15}        # e±, μ±, τ±
CHARGED_HADRON_ABS_PDGS = {
    211, 321, 2212,      # π±, K±, p
    3112, 3222, 3312, 3334  # Σ−, Σ+, Ξ−, Ω−
}

def process_events_smear(in_path, out_path,
                   sqrt_s=91.2, Emin_track=1.0, min_good=5, frac_E=0.10,
                   require_hadron=False, limit=None):
    thr_sum = frac_E * sqrt_s
    total = kept_had = leptonic_only = nu_like = 0

    with open(out_path, "w") as fout, open(in_path, "r") as f:
        for raw in f:
            line = raw.strip()
            if not line or line.startswith("#"):
                continue
            tk = line.split()
            if len(tk) % 9 != 0:
                raise ValueError(f"Line has {len(tk)} tokens (not multiple of 9).")
            
            # --- parse event ---
            arr = np.fromiter((float(x) for x in tk), dtype=float).reshape(-1, 9)
            pdg = arr[:, 0].astype(int)
            px, py, pz, E_true = arr[:, 1], arr[:, 2], arr[:, 3], arr[:, 4]
            x_prod, y_prod, z_prod, r_prod = arr[:, 5:9].T

            # --- vectorized smearing ---
            abs_pdg = np.abs(pdg)
            sigma_abs = np.zeros_like(E_true)

            # electrons / photons
            em_mask = (abs_pdg == 11) | (abs_pdg == 22)
            sigma_abs[em_mask] = (0.18 / np.sqrt(E_true[em_mask]) + 0.009) * E_true[em_mask]

            # hadrons / muons (everything visible but not e/γ and not ν)
            had_mask = ~em_mask & ~(np.isin(abs_pdg, [12, 14, 16]))
            sigma_abs[had_mask] = 0.6 * np.sqrt(E_true[had_mask]) + 0.6

            # neutrinos: unchanged
            E_smeared = np.where(np.isin(abs_pdg, [12, 14, 16]), E_true,
                                 np.clip(rng.normal(E_true, sigma_abs), 0, None))

            # --- good charged track selection ---
            charged_mask = (
                np.isin(abs_pdg, list(CHARGED_LEPTON_ABS_PDGS | CHARGED_HADRON_ABS_PDGS))
                & (E_smeared > Emin_track)
            )
            n_good = charged_mask.sum()
            Esum_good = E_smeared[charged_mask].sum()

            # separate leptons / hadrons among good tracks
            lepton_mask = np.isin(abs_pdg, list(CHARGED_LEPTON_ABS_PDGS)) & charged_mask
            hadron_mask = np.isin(abs_pdg, list(CHARGED_HADRON_ABS_PDGS)) & charged_mask

            # purely leptonic counter
            if lepton_mask.sum() >= 2 and hadron_mask.sum() == 0:
                leptonic_only += 1

            # neutrino-like counter
            if n_good == 0:
                nu_like += 1

            # hadronic selection
            if (n_good >= min_good) and (Esum_good >= thr_sum) and ((not require_hadron) or hadron_mask.sum() >= 1):
                # Write out *smeared* event: keep original 9 columns, but with smeared energy
                smeared_line = " ".join(
                    f"{p} {px:.6f} {py:.6f} {pz:.6f} {e:.6f} {x:.6f} {y:.6f} {z:.6f} {r:.6f}"
                    for p, px, py, pz, e, x, y, z, r in zip(
                        pdg, px, py, pz, E_smeared, x_prod, y_prod, z_prod, r_prod
                    )
                )
                fout.write(smeared_line + "\n")
                kept_had += 1

            total += 1
            
            if total % progress_step == 0:
                print(f"Processed {total:,} events... kept {kept_had:,}")

            if limit is not None and total >= limit:
                break

    # --- summary ---
    print(f"Scanned {total:,} events")
    print(f" Kept hadronic-like: {kept_had:,} ({100*kept_had/total:.2f}%)"
          f"  [≥{min_good} good tracks (E>{Emin_track} GeV), ΣE_good ≥ {thr_sum:.2f} GeV]")
    print(f" Purely leptonic: {leptonic_only:,} ({100*leptonic_only/total:.2f}%)")
    print(f" Neutrino-like: {nu_like:,} ({100*nu_like/total:.2f}%)")

    return kept_had, leptonic_only, nu_like, total


# --- run ---
kept_had, leps_only, nu_like, total = process_events_smear(
    filename, outfile,
    sqrt_s=91.2, Emin_track=1.0, min_good=5, frac_E=0.10,
    require_hadron=False, limit=None
)


# Expected: 70%, 7-10% (decays to taus do not always look as purely leptonic,
# since many taus decay to hadrons), 20 %


Processed 100,000 events... kept 99,434
Processed 200,000 events... kept 198,857
Processed 300,000 events... kept 298,212
Processed 400,000 events... kept 397,643
Scanned 490,249 events
 Kept hadronic-like: 487,321 (99.40%)  [≥5 good tracks (E>1.0 GeV), ΣE_good ≥ 9.12 GeV]
 Purely leptonic: 2 (0.00%)
 Neutrino-like: 0 (0.00%)
