In [21]:
def ranges_overlap(range1, range2):
    """
    Check if two ranges overlap.

    Parameters:
    range1 (tuple): A tuple representing the first range (start1, end1).
    range2 (tuple): A tuple representing the second range (start2, end2).

    Returns:
    bool: True if the ranges overlap, False otherwise.
    """
    start1, end1 = range1
    start2, end2 = range2
        
    # Check for overlap
    return max(start1, start2) < min(end1, end2)

def getMidpoint(a: float, b: float) -> float:
    """
    Calculate the midpoint of two float values.

    :param a: The first float value.
    :param b: The second float value.
    :return: The midpoint of the two values.
    """
    return (a + b) / 2

def checkNewCharacteristic(new_hist, new_hist_err):
    assert len(new_hist) == 2 # we assume new_hist has size of two
    # obtain up and down histogram ranges
    lower_bin_down_up = (
        new_hist[0] - new_hist_err[0],
        new_hist[0] + new_hist_err[0] 
    )
    upper_bin_down_up = (
        new_hist[1] - new_hist_err[1],
        new_hist[1] + new_hist_err[1] 
    )
    # check if either lower bin edge range or upper one has zero difference. Then return False
    if abs(lower_bin_down_up[0]-lower_bin_down_up[1]) ==0:
        return False
    elif abs(upper_bin_down_up[0]-upper_bin_down_up[1]) ==0:
        return False
    
    no_range_overlap = not ranges_overlap(lower_bin_down_up, upper_bin_down_up) # no range overlap means there was a characateristic we missed
    new_characteristic = no_range_overlap

    # print(f"lower_bin_down_up: {lower_bin_down_up}")
    # print(f"upper_bin_down_up: {upper_bin_down_up}")
    # print(f"no_range_overlap: {no_range_overlap}")
    return new_characteristic



In [7]:
import numpy as np
import matplotlib.pyplot as plt
import copy

# Example data
data1 = np.random.normal(loc=5, scale=1, size=100_000)
data2 = np.random.normal(loc=10, scale=1, size=200_000)
data3 = np.random.normal(loc=20, scale=1, size=1000_000)
data = np.concatenate([data1, data2, data3])

In [23]:
xmax = 40
xmin = 0
initial_bins = np.linspace(xmin, xmax, 3)

old_bins = initial_bins
current_bins = copy.deepcopy(old_bins)
# loop over old bins and divide them into two equal bins
print(f"current_bins: {current_bins}")

bin_has_changed = True
while True:
    bin_has_changed = False # make this false until flipped True
    print("new loop start ---------------------------------------------------------------------")
    print(f"current_bins length: {len(current_bins)}")
    print(f"current_bins: {current_bins}")
    
    for bin_idx in range(len(current_bins)-1):
        bin_low_edge = current_bins[bin_idx]
        bin_high_edge = current_bins[bin_idx+1]
        
        bin_mid = getMidpoint(bin_low_edge, bin_high_edge)
        # Make new Binning and plot histogram
        new_binning = np.array([bin_low_edge, bin_mid, bin_high_edge])
        new_hist, edges = np.histogram(data, bins=new_binning)
        new_hist_err = np.sqrt(new_hist)
        new_charaacteristic = checkNewCharacteristic(new_hist, new_hist_err)
        print(f"bin_low_edge: {bin_low_edge}")
        # print(f"bin_high_edge: {bin_high_edge}")
        # print(f"bin_mid: {bin_mid}")
        # print(f"new_hist: {new_hist}")
        # print(f"new_hist_err: {new_hist_err}")
        # print(f"edges: {edges}")
        # if new binning leads to new characateristic, keep new binning
        if new_charaacteristic:
            # add new bin edge and sort
            new_bins = list(current_bins) + [bin_mid]
            new_bins = np.array(sorted(new_bins))
            # print(f"adding edge {bin_mid}")
            # print(f"new_bins: {new_bins}")
            bin_has_changed = True 
        # else:
            # print(f"NOT adding edge {bin_mid}")
    
    # repeat until no new bin edge has been added, then end loop
    if bin_has_changed:
        current_bins = new_bins
    else: 
        print("No new bins were found. Ending Loop!")
        break # end loop of no bin has changed
        

current_bins: [ 0. 20. 40.]
new loop start ---------------------------------------------------------------------
current_bins length: 3
current_bins: [ 0. 20. 40.]
bin_low_edge: 0.0
bin_low_edge: 20.0
NOT adding edge 30.0
new loop start ---------------------------------------------------------------------
current_bins length: 4
current_bins: [ 0. 10. 20. 40.]
bin_low_edge: 0.0
bin_low_edge: 10.0
bin_low_edge: 20.0
NOT adding edge 30.0
new loop start ---------------------------------------------------------------------
current_bins length: 5
current_bins: [ 0. 10. 15. 20. 40.]
bin_low_edge: 0.0
bin_low_edge: 10.0
bin_low_edge: 15.0
bin_low_edge: 20.0
NOT adding edge 30.0
new loop start ---------------------------------------------------------------------
current_bins length: 6
current_bins: [ 0.  10.  15.  17.5 20.  40. ]
bin_low_edge: 0.0
bin_low_edge: 10.0
bin_low_edge: 15.0
bin_low_edge: 17.5
bin_low_edge: 20.0
NOT adding edge 30.0
new loop start -------------------------------------

KeyboardInterrupt: 