In [1]:
import uproot
import json
import re
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def get_cut_string_from_json(filename, json_file_path):
    """
    Extracts a cut string from a JSON file based on elements derived from a filename.

    :param filename: The name of the file, used to derive the key for accessing the JSON data.
    :param json_file_path: The path to the JSON file containing cut strings.
    :return: The cut string corresponding to the given filename.
    """
    # Load the JSON file
    with open(json_file_path, 'r') as file:
        data = json.load(file)

    # Extract 'section' and 'section2' from the filename
    section = filename.split('_')[2]
    section2_raw = filename.split('_')[3]
    section2 = section2_raw.rstrip('p')  # Removes the trailing 'p'

    # Construct the subkey for accessing the JSON data
    subkey = f"{section}_{section2}"  # Adjust this format as needed

    # Access the data using the key and subkey
    cut_string = data["post_cuts_p2"].get(subkey, "Default cut string if not found")

    mod_cut_string = cut_string.replace("&&", " and ")
    
    return mod_cut_string

In [21]:
def remove_specific_cuts(cut_string, cuts_to_remove):
    """
    Removes specific conditions from the cut string, anticipating conversion to ' and '.
    
    :param cut_string: The original cut string containing all conditions.
    :param cuts_to_remove: A list of conditions (as strings) to remove from the cut string.
    :return: The modified cut string with specified conditions removed.
    """
    # Convert "&&" to " and " for consistency in removal and further processing
    cut_string = cut_string.replace("&&", " and ")
    
    for cut in cuts_to_remove:
        # Remove the specific cut with careful handling of leading/trailing spaces
        pattern = rf'\band\s+{cut}\b|\b{cut}\s+and\b|\b{cut}\b'
        cut_string = re.sub(pattern, '', cut_string, flags=re.IGNORECASE)

    # Clean up: Remove any leading or trailing 'and ', and replace multiple occurrences of 'and' with a single 'and'
    cut_string = re.sub(r'^and\s+', '', cut_string).strip()  # Remove 'and ' at the start
    cut_string = re.sub(r'\s+and$', '', cut_string).strip()  # Remove 'and ' at the end
    cut_string = re.sub(r'\band\s+and\b', ' and ', cut_string)  # Replace 'and and' with 'and'
    cut_string = re.sub(r'\s+and\s+', ' and ', cut_string)  # Ensure single space around 'and'

    return cut_string

# Example usage:
#cut_string = "bb_tr_n==1 && hcale>0.04 && abs(bb_tr_vz)<0.075 && abs(dy+0.03)<0.80 && bb_ps_e>0.2 && abs(W2-0.92)<0.72 && bb_gem_track_nhits>3 && abs(bb_etot_over_p-0.993)<0.240 && hcalnblk>0 && abs(coin-0.54)<7.0 && hcalon==1 && mag==100"
#cuts_to_remove = ["mag==100", "hcalon==1"]
#modified_cut_string = remove_specific_cuts(cut_string, cuts_to_remove)
#print(modified_cut_string)

In [3]:
def remove_specific_cuts(cut_string, cuts_to_remove):
    """
    Removes specific conditions from the cut string, anticipating conversion to ' and '.
    
    :param cut_string: The original cut string containing all conditions.
    :param cuts_to_remove: A list of conditions (as strings) to remove from the cut string.
    :return: The modified cut string with specified conditions removed.
    """
    # Convert "&&" to " and " for consistency in removal and further processing
    cut_string = cut_string.replace("&&", " and ")
    
    for cut in cuts_to_remove:
        # Escape special characters in the cut to be removed
        cut_escaped = re.escape(cut)
        
        # Remove the specific cut with careful handling of leading/trailing spaces
        pattern = rf'\band\s+{cut_escaped}\b|\b{cut_escaped}\s+and\b|\b{cut_escaped}\b'
        cut_string = re.sub(pattern, '', cut_string, flags=re.IGNORECASE)

    # Clean up: Remove any leading or trailing 'and ', and replace multiple occurrences of 'and' with a single 'and'
    cut_string = re.sub(r'^and\s+', '', cut_string).strip()  # Remove 'and ' at the start
    cut_string = re.sub(r'\s+and$', '', cut_string).strip()  # Remove 'and ' at the end
    cut_string = re.sub(r'\band\s+and\b', ' and ', cut_string)  # Replace 'and and' with 'and'
    cut_string = re.sub(r'\s+and\s+', ' and ', cut_string)  # Ensure single space around 'and'

    return cut_string

In [122]:
def remove_specific_cuts(cut_string, cuts_to_remove):
    """
    Removes specific conditions from the cut string.
    
    :param cut_string: The original cut string containing all conditions.
    :param cuts_to_remove: A list of conditions (as strings) to remove from the cut string.
    :return: The modified cut string with specified conditions removed.
    """
    for cut in cuts_to_remove:
        # Ensure to handle both the cut and its possible variations in spacing around '&&'
        cut_string = cut_string.replace(f"&&{cut}", "").replace(f"{cut}&&", "").replace(cut, "")
    return cut_string

In [66]:
def plot_histogram_from_file(filename, cut_string):
    # Extract the section from the filename for the title
    section = filename.split('_')[2]
    section2 = filename.split('_')[3]
    
    # Load the tree
    file = uproot.open(f"/lustre19/expphy/volatile/halla/sbs/seeds/parse/{filename}")
    tree = file["P"]
    
    # Load the branches as arrays
    dx_array = tree["dx"].array(library="np")
    nucleon_array = tree["nucleon"].array(library="np")
    weights_array = tree["mc_weight_norm"].array(library="np")
    
    # Apply the cuts based on the nucleon condition
    dx_p_array = dx_array[nucleon_array == 0]  # nucleon==0
    dx_n_array = dx_array[nucleon_array == 1]  # nucleon==1
    weights_p_array = weights_array[nucleon_array == 0]
    weights_n_array = weights_array[nucleon_array == 1]
    
    # Define custom binning within the range of -2 to 1
    bins = np.linspace(-2, 1, 200)
    bin_width = (bins[1] - bins[0]) / 2  # Calculate half the bin width
        
    # Calculate the integrals (weighted sums) and the number of events
    integral_p = np.sum(weights_p_array)
    integral_n = np.sum(weights_n_array)
    N_p = len(dx_p_array)
    N_n = len(dx_n_array)

    # Calculate the ratio of N_n to N_p
    ratio_n_p = integral_n / integral_p
    
    # Double the overall size of the plot
    plt.figure(figsize=(20, 12))
    
    # Calculate the histograms using numpy.histogram for the outlined sum
    counts, edges = np.histogram(dx_array, bins=bins, weights=weights_array)
    
    # Shift edges to the right by half a bin width
    shifted_edges = edges[:-1] + bin_width

    # Create histograms with weights
    plt.hist(dx_p_array, bins=bins, weights=weights_p_array, label=f'dx_p (proton) weighted, N={integral_p:.2f}', alpha=0.75)
    plt.hist(dx_n_array, bins=bins, weights=weights_n_array, label=f'dx_n (neutron) weighted, N={integral_n:.2f}', alpha=0.75)
    
    # Plot using plt.step for a histogram-like line plot
    plt.step(shifted_edges, counts, where='mid', color='black', linewidth=2, label=f'dx (sum) weighted')

    # Add a dummy plot for the ratio of N_n to N_p in the legend
    plt.plot([], [], ' ', label=f'Ratio of N_n to N_p: {ratio_n_p:.2f}')
    
    # Customize the plot
    plt.xlabel('dx', fontsize='xx-large')
    plt.ylabel('Counts', fontsize='xx-large')
    plt.title(f'Histograms of dx, dx_p, and dx_n with Weights and Cuts for {section} {section2}', fontsize='xx-large')
    plt.xlim(-2, 1)  # Set the x-axis limits
    plt.legend(fontsize='xx-large')
    plt.grid(True)  # Add grid lines
    plt.show()

In [4]:
def plot_histogram_from_file(filename, modified_cut_string):
    # Extract the section from the filename for the title
    section = filename.split('_')[2]
    section2_raw = filename.split('_')[3].rstrip('p')  # Removes the trailing 'p'
    
    # Load the tree
    file = uproot.open(f"/lustre19/expphy/volatile/halla/sbs/seeds/parse/{filename}")
    tree = file["P"]
        
    # Load the branches as arrays and create a DataFrame
    branches = tree.arrays(["dx", "W2", "nucleon", "mc_weight_norm", "bb_ps_e", "hcale", "dy", "bb_etot_over_p", "hcalnblk", "bb_gem_track_nhits", "fiducial_sig_x", "fiducial_sig_y", "hcalon", "coin", "bb_tr_vz"], library="pd")
    
    print("Columns in DataFrame:", branches.columns)
    
    # Define custom binning within the range of -2 to 1
    bins = np.linspace(-2, 1, 200)
    bin_width = (bins[1] - bins[0]) / 2  # Calculate half the bin width
    
    # Apply cuts using query method on DataFrame
    branches = branches.query(modified_cut_string)

    # After applying cuts, separate proton and neutron data based on the 'nucleon' condition
    dx_p_array = branches[branches['nucleon'] == 0]['dx']
    dx_n_array = branches[branches['nucleon'] == 1]['dx']
    weights_p_array = branches[branches['nucleon'] == 0]['mc_weight_norm']
    weights_n_array = branches[branches['nucleon'] == 1]['mc_weight_norm']

    # Calculate the integrals (weighted sums) and the number of events after cuts
    integral_p = weights_p_array.sum()
    integral_n = weights_n_array.sum()
    N_p = dx_p_array.size
    N_n = dx_n_array.size

    # Calculate the ratio of N_n to N_p
    ratio_n_p = integral_n / integral_p if integral_p else np.nan

    # Plotting
    plt.figure(figsize=(20, 12))

    # Create histograms with weights after cuts
    plt.hist(dx_p_array, bins=bins, weights=weights_p_array, label=f'dx_p (proton) weighted, N={integral_p:.2f}', alpha=0.75)
    plt.hist(dx_n_array, bins=bins, weights=weights_n_array, label=f'dx_n (neutron) weighted, N={integral_n:.2f}', alpha=0.75)
    
    # Add a dummy plot for the ratio of N_n to N_p in the legend
    plt.plot([], [], ' ', label=f'Ratio of N_n to N_p: {ratio_n_p:.2f}')

    # Customize the plot
    plt.xlabel('dx', fontsize='xx-large')
    plt.ylabel('Counts', fontsize='xx-large')
    plt.title(f'Histograms of dx, dx_p, and dx_n with Weights and Cuts for {section} {section2_raw}', fontsize='xx-large')
    plt.xlim(-2, 1)
    plt.legend(fontsize='xx-large')
    plt.grid(True)
    plt.show()

In [5]:
json_file_path = '/w/halla-scshelf2102/sbs/seeds/ana/config/syst.json'

In [6]:
# Specify the cuts to remove
cuts_to_remove = ["mag==70", "mag==100", "mag==30", "mag==85", "tar==1", "bb_tr_n==1", "abs(coin-0.54)<7.0"]

In [7]:
# List of files
files = [
    "parse_mc_sbs11_100p_barebones.root",
    "parse_mc_sbs14_70p_barebones.root",
    "parse_mc_sbs4_30p_barebones_alt.root",
    "parse_mc_sbs4_30p_barebones.root",
    "parse_mc_sbs4_50p_barebones.root",
    "parse_mc_sbs7_85p_barebones.root",
    "parse_mc_sbs9_70p_barebones_alt.root",
]

In [1]:
# List of files
files = [
    "parse_mc_sbs4_30p_barebones_alt.root",
    "parse_mc_sbs4_30p_barebones.root",
    "parse_mc_sbs4_50p_barebones.root",
    "parse_mc_sbs9_70p_barebones_alt.root",
]

In [2]:
# Loop over the files and plot histograms
for filename in files:
    cut_string = get_cut_string_from_json(filename, json_file_path)
    print(cut_string)
    modified_cut_string = remove_specific_cuts(cut_string, cuts_to_remove)
    print(modified_cut_string)
    plot_histogram_from_file(filename, modified_cut_string)

NameError: name 'get_cut_string_from_json' is not defined