# Raw Data Filtering

* Step 1: Remove events with zero jetmultiplicity.
* Step 2: Set jet quantities to NaN if the jet's Eta is outside the -2.5 to 2.5 range.
* Step 3: Remove events with no valid jets and isophotons (those will all values zero or Nan).
* Step 4: Method 1: Take only those events with **ONLY ONE** isophoton(any) and **JUST TWO(any)** btag jets.\
          $\hspace{1.1cm}$ Method 2: Take only those events with **ONLY FIRST ONE** isophoton and **JUST FIRST TWO** btag jets.

In [1]:
# Define constants for jet features and limits
JET_FEATURES = ['Eta', 'Phi', 'pT', 'Px', 'Py', 'Pz', 'E']
PHOTON_FEATURES = ['Eta', 'Phi', 'pT', 'Px', 'Py', 'Pz', 'E']
BTAG_FEATURE = 'btag' 

MAX_JETS = 15
MAX_PHOTONS = 1
ETA_MIN = -2.5
ETA_MAX = 2.5

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import json
import os

def load_data(filepath:str, sep:str = '\t') -> pd.DataFrame:
    """
    Loads data from a txt file into a Pandas DataFrame.

    Args:
        filepath (str): The path to the txt file.

    Returns:
        pd.DataFrame: DataFrame containing the loaded data, or None if an error occurs.
    """
    try:
        print(f"Loading data from {filepath}...")
        df = pd.read_csv(filepath, sep = sep)
        print("--- Data loaded successfully :)")
        return df
    except FileNotFoundError:
        print(f"*** Error: File not found at {filepath}")
        return None
    except pd.errors.EmptyDataError:
        print(f"*** Error: File at {filepath} is empty.")
        return None
    except Exception as e:
        print(f"*** An unexpected error occurred during file loading: {e}")
        return None

def filter_zero_multiplicity(df:pd.DataFrame) -> pd.DataFrame:
    """
    Removes events with zero jetmultiplicity.

    Args:
        df (pd.DataFrame): The input DataFrame with event data.

    Returns:
        pd.DataFrame: A new DataFrame with zero jetmultiplicity events removed.
                     Returns None if the input DataFrame is invalid or lacks
                     the 'jetmultiplicity' column.
    """
    if df is None or not isinstance(df, pd.DataFrame):
        print("*** Error: Invalid input DataFrame :(")
        return None
    if 'jetmultiplicity' not in df.columns:
        print("*** Error: 'jetmultiplicity' column not found in DataFrame :(")
        return None

    initial_events = len(df)
    print(f"Initial number of events: {initial_events}")

    # Filter events where jetmultiplicity is greater than 0
    df_filtered = df[df['jetmultiplicity'] > 0].copy()

    removed_events = initial_events - len(df_filtered)
    print(f"--- Removed {removed_events} events with zero jetmultiplicity :)")
    print(f"Number of events after filtering: {len(df_filtered)}")

    return df_filtered

def filter_jets_by_eta(df:pd.DataFrame, eta_min:float=-2.5, eta_max:float=2.5, max_jets:int=13) -> [pd.DataFrame, list]:
    """
    Sets jet quantities to NaN if the jet's Eta is outside the specified range.

    It iterates through each possible jet (1 to max_jets) and checks its Eta value.
    If Eta is outside [eta_min, eta_max], all features (Eta, Phi, pT, Px, Py, Pz, E)
    for that specific jet in that event are set to NaN.

    Args:
        df (pd.DataFrame): The input DataFrame with event data.
        eta_min (float): The minimum allowed Eta value. Defaults to -2.5.
        eta_max (float): The maximum allowed Eta value. Defaults to 2.5.
        max_jets (int): The maximum number of jets to check per event. Defaults to 13.

    Returns:
        pd.DataFrame: The DataFrame with jet quantities potentially modified to NaN.
                      Returns None if the input DataFrame is invalid.
    """
    if df is None or not isinstance(df, pd.DataFrame):
        print("*** Error: Invalid input DataFrame for Eta filtering :(")
        return None

    print(f"Applying Eta filter: Keeping jets with Eta between {eta_min} and {eta_max}.")

    df_modified = df.copy()
    jet_eta_cols_in_df = []
    for i in range(1, max_jets + 1):
        eta_col = f'jet{i}_Eta'
        if eta_col in df_modified.columns:
            jet_eta_cols_in_df.append(eta_col)

            mask = ~df_modified[eta_col].between(eta_min, eta_max, inclusive='both')
            jet_cols = [f'jet{i}_{feature}' for feature in JET_FEATURES]
            existing_jet_cols = [col for col in jet_cols if col in df_modified.columns]
            if not existing_jet_cols:
                continue
            df_modified.loc[mask, existing_jet_cols] = np.nan
    print("--- Eta filtering complete :)")

    return df_modified, jet_eta_cols_in_df


def filter_empty_events(df:pd.DataFrame, jet_eta_cols:list, max_photons:int=3) -> pd.DataFrame:
    """
    Removes events that have no valid jets AND no valid photons after processing.

    - No valid jets means all existing jet_Eta columns for the event are NaN.
    - No valid photons means all existing isophoton_E columns are <= 0 (or NaN).

    Args:
        df (pd.DataFrame): DataFrame after jet Eta filtering.
        jet_eta_cols (list): List of jet_Eta column names that actually exist in df.
        max_photons (int): Maximum number of photons to check.

    Returns:
        pd.DataFrame: DataFrame with empty events removed, or None if input is invalid.
    """
    if df is None or not isinstance(df, pd.DataFrame):
        print("*** Error: Invalid input DataFrame for empty event filtering :(")
        return None
    if not jet_eta_cols:
         print("*** Warning: No jet Eta columns found in DataFrame. Cannot filter based on jets :(")
         has_no_valid_jets = pd.Series([True] * len(df), index=df.index) # Assume no jets if no columns
    else:
        # Check rows where ALL existing jet_Eta columns are NaN
        has_no_valid_jets = df[jet_eta_cols].isnull().all(axis=1)

    photon_e_cols = [f'isophoton{i}_E' for i in range(1, max_photons + 1)]
    photon_e_cols_in_df = [col for col in photon_e_cols if col in df.columns]

    if not photon_e_cols_in_df:
        print("*** Warning: No photon Energy columns found in DataFrame. Cannot filter based on photons :(")
        has_no_valid_photons = pd.Series([True] * len(df), index=df.index) # Assume no photons if no columns
    else:
        has_no_valid_photons = (df[photon_e_cols_in_df].fillna(0) <= 0).all(axis=1)

    # Identify events to remove (those having no valid jets AND no valid photons)
    is_empty_event = has_no_valid_jets & has_no_valid_photons

    # Filter the DataFrame: keep rows where is_empty_event is False
    df_filtered = df[~is_empty_event].copy()

    removed_count = len(df) - len(df_filtered)
    if removed_count > 0:
        print(f"Removed {removed_count} events with no valid jets AND no valid photons :)")
    else:
        print("No events found with both empty jets and empty photons.")
    print(f"Number of events after empty event filtering: {len(df_filtered)}")

    return df_filtered


def save_data(df:pd.DataFrame, output_filepath:str) -> bool:
    """
    Saves the DataFrame to a txt file.

    Args:
        df (pd.DataFrame): The DataFrame to save.
        output_filepath (str): The path where the txt file will be saved.

    Returns:
        bool: True if saving was successful, False otherwise.
    """
    if df is None or not isinstance(df, pd.DataFrame):
        print("*** Error: Invalid DataFrame provided for saving :(")
        return False
    try:
        print(f"Saving processed data to {output_filepath}...")
        df.to_csv(output_filepath, index=False, sep = '\t')
        print("--- Data saved successfully :)")
        return True
    except Exception as e:
        print(f"*** An unexpected error occurred during file saving: {e}")
        return False

def create_event_dictionary(row:pd.Series, max_jets:int=13, max_photons:int=3,
                            jet_features:list[str, ...]=None, photon_features:list[str, ...]=None,
                            btag_feature:str=None) -> dict:
    """
    Creates a dictionary for a single event (row) with structured jet/photon info.

    Args:
        row (pd.Series): A row from the DataFrame representing one event.
        max_jets (int): Maximum number of jets to check.
        max_photons (int): Maximum number of photons to check.
        jet_features (list): List of feature names for jets.
        photon_features (list): List of feature names for photons.
        btag_suffix (str): Suffix used for b-tag columns (e.g., 'btag').

    Returns:
        dict: A dictionary representing the event, or None if essential event
              info (like eventno) is missing.
    """
    if 'eventno' not in row or pd.isna(row['eventno']):
        print("*** Warning: Skipping row due to missing or NaN 'eventno' :(")
        return None

    event_dict = {
        "eventno": int(row['eventno']), # Ensure event number is an integer
        "jets": [],
        "photons": []
    }
    current_btag_jet_count = 0
    current_photon_count = 0

    for i in range(1, max_jets + 1):
        key_feature_col = f'jet{i}_{jet_features[0]}' # e.g., jet1_Eta

        # Check if the jet is valid (its key feature is not NaN after filtering)
        if key_feature_col in row and pd.notna(row[key_feature_col]):
            jet_data = {'jet_index':i}
            # Populate physics features
            for feature in jet_features:
                col_name = f'jet{i}_{feature}'
                if col_name in row and pd.notna(row[col_name]):
                    value = row[col_name]
                    # Convert numpy types to standard python types for JSON compatibility
                    jet_data[feature] = value.item() if hasattr(value, 'item') else value
                else:
                    jet_data[feature] = None # Mark missing sub-features

            # --- Add b-tag label directly to jet_data ---
            btag_col_name = f'jet{i}_{btag_feature}'
            btag_value = None # Default to None
            if btag_col_name in row and pd.notna(row[btag_col_name]):
                try:
                    # Attempt conversion to int, assuming b-tag is 0 or 1
                    btag_value = int(row[btag_col_name])
                except (ValueError, TypeError):
                     # Handle cases where conversion fails (e.g., unexpected string)
                     print(f"*** Warning: Could not convert b-tag value '{row[btag_col_name]}' to int for jet {i}, event {event_dict['eventno']}. Setting to None :(")
                     btag_value = None # Keep as None if conversion fails

                # --- Increment b-tag count if applicable ---
                # Check if the obtained btag_value indicates a b-tagged jet (assuming 1 means tagged)
                if btag_value == 1:
                    current_btag_jet_count += 1

            # Add the btag_value (which is int or None) to the jet dictionary
            jet_data[btag_feature] = btag_value

            # Append the complete jet data to the event's jet list
            event_dict["jets"].append(jet_data)

    # --- Process Isolated Photons ---
    for i in range(1, max_photons + 1):
        energy_col = f'isophoton{i}_E'

        # Check if photon is valid (Energy exists, is not NaN, and is > 0)
        if energy_col in row and pd.notna(row[energy_col]) and row[energy_col] > 0:
            photon_data = {'isophoton_index':i}
            # Populate physics features
            for feature in photon_features:
                col_name = f'isophoton{i}_{feature}'
                if col_name in row and pd.notna(row[col_name]):
                     value = row[col_name]
                     photon_data[feature] = value.item() if hasattr(value, 'item') else value
                else:
                    photon_data[feature] = None # Mark missing sub-features

            # Append the valid photon data
            event_dict["photons"].append(photon_data)
            # --- Increment photon count ---
            current_photon_count += 1 # Increment for each valid photon added

    # --- Add the recalculated counts to the event dictionary ---
    event_dict["num_btag_jets"] = current_btag_jet_count
    event_dict["num_isophoton"] = current_photon_count

    return event_dict


def convert_df_to_event_dicts(df:pd.DataFrame, max_jets:int=13, max_photons:int=3,
                            jet_features:list[str, ...]=None, photon_features:list[str, ...]=None,
                            btag_feature:str=None) -> list[dict, ...]:
    """
    Converts the DataFrame into a list of event dictionaries.

    Args:
        df (pd.DataFrame): The processed DataFrame.
        max_jets (int): Maximum number of jets to consider.
        max_photons (int): Maximum number of photons to consider.
        jet_features (list): List of feature names for jets.
        photon_features (list): List of feature names for photons.
        btag_suffix (str): Suffix for b-tag columns.

    Returns:
        list: A list containing dictionaries, each representing an event.
              Returns an empty list if the input DataFrame is invalid.
    """
    if df is None or not isinstance(df, pd.DataFrame):
        print("Error: Invalid DataFrame provided for dictionary conversion.")
        return []
    if df.empty:
        print("Warning: Input DataFrame is empty. Returning empty list.")
        return []

    event_dictionaries = []
    print("Converting DataFrame rows to event dictionaries...")
    for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing Events"):
        event_dict = create_event_dictionary(row, max_jets, max_photons,
                                             jet_features, photon_features, btag_feature)
        # Append only those events with just one isophoton and at least 2 btag jets
        if (event_dict is not None) and (event_dict["num_isophoton"] == 1) and (event_dict["num_btag_jets"] == 2):
            event_dictionaries.append(event_dict)

    print(f"--- Successfully converted {len(event_dictionaries)} events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)")
    return event_dictionaries

def save_to_json(data:list[dict, ...], filepath:str) -> bool:
    """
    Saves a list of dictionaries to a JSON file.

    Args:
        data (list): The list of event dictionaries.
        filepath (str): The path where the JSON file will be saved.

    Returns:
        bool: True if saving was successful, False otherwise.
    """
    if not isinstance(data, list):
        print("Error: Data to be saved must be a list of dictionaries.")
        return False

    print(f"Attempting to save {len(data)} events to JSON file: {filepath}")
    try:
        with open(filepath, 'w') as f:
            # Use indent for readability, but remove for smaller file size if needed
            json.dump(data, f, indent=2)
        print("JSON file saved successfully.")
        return True
    except TypeError as e:
        print(f"Error: Data contains types not serializable to JSON: {e}")
        # This might happen if numpy types weren't converted properly
        return False
    except IOError as e:
        print(f"Error: Could not write to file {filepath}: {e}")
        return False
    except Exception as e:
        print(f"An unexpected error occurred during JSON saving: {e}")
        return False

# Configuration

In [26]:
def main(INPUT_FILE_PATH:str = None, OUTPUT_JSON_FILE_PATH:str = None,*, save_intermediate_txt_file:bool = False, PROCESSED_TXT_FILE_PATH:str = None) -> None:
    # --- Pipeline ---
    # 1. Load Data
    raw_df = load_data(INPUT_FILE_PATH, sep = '\t')

    if raw_df is not None:
        # 2. Filter out events with zero jetmultiplicity
        # df_filtered_multiplicity = filter_zero_multiplicity(raw_df)
        df_filtered_multiplicity = raw_df

        if df_filtered_multiplicity is not None:
            # 3. Filter jets based on Eta range (Sets invalid jets to NaN)
            df_eta_filtered, existing_jet_eta_cols = filter_jets_by_eta(df_filtered_multiplicity,
                                                                        eta_min=ETA_MIN,
                                                                        eta_max=ETA_MAX,
                                                                        max_jets=MAX_JETS)

            if df_eta_filtered is not None and not df_eta_filtered.empty:
                # 4. NEW STEP: Filter out events with no valid jets AND no valid photons
                df_final_filtered = filter_empty_events(df_eta_filtered,
                                                        jet_eta_cols=existing_jet_eta_cols,
                                                        max_photons=MAX_PHOTONS)

                if df_final_filtered is not None and not df_final_filtered.empty:
                    if save_intermediate_txt_file and (PROCESSED_TXT_FILE_PATH is not None):
                        save_data(df_final_filtered, PROCESSED_TXT_FILE_PATH)

                    # 5. Convert FINAL filtered DataFrame to List of Dictionaries
                    event_dict_list = convert_df_to_event_dicts(df_final_filtered, 
                                                                max_jets=15, 
                                                                max_photons=1,
                                                                jet_features=JET_FEATURES, 
                                                                photon_features=PHOTON_FEATURES,
                                                                btag_feature = BTAG_FEATURE)

                    # 6. Save the list of dictionaries to JSON
                    if event_dict_list:
                        if save_to_json(event_dict_list, OUTPUT_JSON_FILE_PATH):
                            print(f"\n--- Processing complete :). \nFiltered event dictionaries saved to {OUTPUT_JSON_FILE_PATH}")
                            print("\n--- Sample Final Event Dictionaries ---")
                            for i, event in enumerate(event_dict_list[:2]):
                                print(f"Event {i+1} (Original EventNo: {event['eventno']}):")
                                print(json.dumps(event, indent=2))
                                print("-" * 20)
                        else:
                            print("\n*** Processing complete :), but failed to save the output JSON file :(")
                    else:
                        print("\n*** Processing complete :), but no event dictionaries were generated after final filtering :(")
                elif df_final_filtered is not None and df_final_filtered.empty:
                     print("*** All events were removed during the empty event filtering step :(")
                else:
                    print("*** Empty event filtering step failed :(")
            elif df_eta_filtered is not None and df_eta_filtered.empty:
                 print("*** All events were removed during the Eta filtering step :(")
            else:
                print("*** Eta filtering step failed :(")
        elif df_filtered_multiplicity is not None and df_filtered_multiplicity.empty:
            print("*** All events were removed during the jet multiplicity filtering step :(")
        else:
            print("*** Jet multiplicity filtering step failed :(")
    else:
        print("*** Data loading failed. Aborting processing :(")
    return None

In [29]:
BASE_RAW_DATA_DIR = "./raw_txt_data"
INPUT_FILE_PATHS = []
for files in os.listdir("./raw_txt_data"):
    if files.endswith(".txt"):
        INPUT_FILE_PATHS.append(os.path.join(BASE_RAW_DATA_DIR, files))

print(f"Found the following files to preprocess and convert to JSON file: \n{INPUT_FILE_PATHS}")

OUTPUT_JSON_FILE_PATHS = []
BASE_OUTPUT_DIR = "./onlyAny2bj_onlyAny1p"
os.makedirs(BASE_OUTPUT_DIR, exist_ok=True)
for paths in INPUT_FILE_PATHS:
    if paths.endswith(".txt"):
        output_path = paths.split("/")[-1][:-4] + '_onlyAny2j_onlyAny1p_processed.json'
        OUTPUT_JSON_FILE_PATHS.append(os.path.join(BASE_OUTPUT_DIR, output_path))

print(f"\nFiles will be saved to paths: \n{OUTPUT_JSON_FILE_PATHS}")

Found the following files to preprocess and convert to JSON file: 
['./raw_txt_data/ppzaxbba_ax45_200k_minpt10_15jets_etafiltered_corrected.txt', './raw_txt_data/background_ppbba_500k_minpt20_15jets_etafiltered_corrected.txt', './raw_txt_data/ppzaxbba_ax15_200k_minpt10_15jets_etafiltered_corrected.txt', './raw_txt_data/ppzaxbba_ax15_200k_minpt20_15jets_etafiltered_corrected.txt', './raw_txt_data/background_ppbba_500k_minpt10_15jets_etafiltered_corrected.txt', './raw_txt_data/ppzaxbba_ax45_200k_minpt20_15jets_etafiltered_corrected.txt']

Files will be saved to paths: 
['./onlyAny2bj_onlyAny1p/ppzaxbba_ax45_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json', './onlyAny2bj_onlyAny1p/background_ppbba_500k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json', './onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json', './onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt20_15jets_etafiltered_c

In [30]:
for input_file_path, output_file_path in zip(INPUT_FILE_PATHS, OUTPUT_JSON_FILE_PATHS):
    main(input_file_path, output_file_path)
    print("-"*20)

Loading data from ./raw_txt_data/ppzaxbba_ax45_200k_minpt10_15jets_etafiltered_corrected.txt...
--- Data loaded successfully :)
Applying Eta filter: Keeping jets with Eta between -2.5 and 2.5.
--- Eta filtering complete :)
Removed 23309 events with no valid jets AND no valid photons :)
Number of events after empty event filtering: 176691
Converting DataFrame rows to event dictionaries...


Processing Events: 100%|██████████| 176691/176691 [01:00<00:00, 2899.31it/s]


--- Successfully converted 6476 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 6476 events to JSON file: ./onlyAny2bj_onlyAny1p/ppzaxbba_ax45_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/ppzaxbba_ax45_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 66):
{
  "eventno": 66,
  "jets": [
    {
      "jet_index": 1,
      "Eta": 1.48071,
      "Phi": -0.116224,
      "pT": 30.1322,
      "Px": 29.9289,
      "Py": -3.49422,
      "Pz": 62.8047,
      "E": 69.6591,
      "btag": 1
    },
    {
      "jet_index": 2,
      "Eta": 0.582781,
      "Phi": 1.70724,
      "pT": 19.004,
      "Px": -2.585,
      "Py": 18.8274,
      "Pz": 11.7128,
      "E": 22.3236,
      "btag": 1
    },
    {
      "je

Processing Events: 100%|██████████| 355591/355591 [01:24<00:00, 4207.63it/s]


--- Successfully converted 1893 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 1893 events to JSON file: ./onlyAny2bj_onlyAny1p/background_ppbba_500k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/background_ppbba_500k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 541):
{
  "eventno": 541,
  "jets": [
    {
      "jet_index": 1,
      "Eta": -0.0075974,
      "Phi": -2.57166,
      "pT": 34.8986,
      "Px": -29.3824,
      "Py": -18.8303,
      "Pz": -0.265141,
      "E": 34.8996,
      "btag": 1
    },
    {
      "jet_index": 2,
      "Eta": 0.306125,
      "Phi": -2.13079,
      "pT": 30.9913,
      "Px": -16.4621,
      "Py": -26.2576,
      "Pz": 9.63607,
      "E": 32.4548,
      "btag": 1
    

Processing Events: 100%|██████████| 175296/175296 [00:56<00:00, 3099.78it/s]


--- Successfully converted 3389 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 3389 events to JSON file: ./onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 30):
{
  "eventno": 30,
  "jets": [
    {
      "jet_index": 1,
      "Eta": 1.5536,
      "Phi": -0.8157,
      "pT": 21.4545,
      "Px": 14.704,
      "Py": -15.6233,
      "Pz": 48.4545,
      "E": 52.9918,
      "btag": 1
    },
    {
      "jet_index": 2,
      "Eta": 1.1582,
      "Phi": -0.457432,
      "pT": 14.0265,
      "Px": 12.5845,
      "Py": -6.19476,
      "Pz": 20.1292,
      "E": 24.5342,
      "btag": 1
    },
    {
      "jet

Processing Events: 100%|██████████| 149600/149600 [00:41<00:00, 3648.53it/s]


--- Successfully converted 552 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 552 events to JSON file: ./onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/ppzaxbba_ax15_200k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 292):
{
  "eventno": 292,
  "jets": [
    {
      "jet_index": 1,
      "Eta": 1.77274,
      "Phi": 1.0867,
      "pT": 25.8272,
      "Px": 12.0202,
      "Py": 22.8596,
      "Pz": 73.8287,
      "E": 78.2158,
      "btag": 1
    },
    {
      "jet_index": 2,
      "Eta": 1.93631,
      "Phi": 1.62012,
      "pT": 22.335,
      "Px": -1.10121,
      "Py": 22.3078,
      "Pz": 75.8149,
      "E": 79.0364,
      "btag": 1
    }
  ],
  "photons": [

Processing Events: 100%|██████████| 470848/470848 [02:46<00:00, 2824.67it/s]


--- Successfully converted 5645 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 5645 events to JSON file: ./onlyAny2bj_onlyAny1p/background_ppbba_500k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/background_ppbba_500k_minpt10_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 262):
{
  "eventno": 262,
  "jets": [
    {
      "jet_index": 2,
      "Eta": -0.460313,
      "Phi": 0.224215,
      "pT": 38.8455,
      "Px": 37.8732,
      "Py": 8.63695,
      "Pz": -18.5193,
      "E": 43.0342,
      "btag": 1
    },
    {
      "jet_index": 6,
      "Eta": -2.304,
      "Phi": -2.48777,
      "pT": 13.1734,
      "Px": -10.4565,
      "Py": -8.01238,
      "Pz": -65.3025,
      "E": 66.618,
      "btag": 0
    },
   

Processing Events: 100%|██████████| 148523/148523 [01:42<00:00, 1448.03it/s]


--- Successfully converted 1617 events (with only ONE isophoton and AT LEAST TWO btag jets) to dictionaries :)
Attempting to save 1617 events to JSON file: ./onlyAny2bj_onlyAny1p/ppzaxbba_ax45_200k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json
JSON file saved successfully.

--- Processing complete :). 
Filtered event dictionaries saved to ./onlyAny2bj_onlyAny1p/ppzaxbba_ax45_200k_minpt20_15jets_etafiltered_corrected_onlyAny2j_onlyAny1p_processed.json

--- Sample Final Event Dictionaries ---
Event 1 (Original EventNo: 228):
{
  "eventno": 228,
  "jets": [
    {
      "jet_index": 1,
      "Eta": -0.0143928,
      "Phi": -0.208869,
      "pT": 28.6807,
      "Px": 28.0573,
      "Py": -5.94704,
      "Pz": -0.412809,
      "E": 28.6837,
      "btag": 1
    },
    {
      "jet_index": 2,
      "Eta": -0.115036,
      "Phi": -1.73382,
      "pT": 23.2269,
      "Px": -3.76972,
      "Py": -22.919,
      "Pz": -2.67784,
      "E": 23.3808,
      "btag": 1
    }
  ]

In [13]:
# sig_75 = pd.read_csv("./raw_txt_data/ppzaxbba_ax75_200k_minpt20_13jets.txt", sep = '\t')
# sig_75.to_csv("ppzaxbba_ax75_200k_minpt20_13jets.csv")