In [3]:
import sys
sys.path.append('../python/')

In [9]:
def binned_statistic_qt(x, y, v=None, statistic='density', edges=None):
    '''
    Computed statistics inlike scipy's binned_statistic_2d, but for QT's, and assuming np.nan-friendly stats
    
    Parameters
    ----------
    x, y : the 2 arrays of new data to compute the histogram from
    v : array of values to compute statistic over. Needs to match 
        x and y
    edges : x and y edges of the Quad Tree, expanded as
        xmin,xmax,ymin,ymax = edges
    statistic : string or callable, optional
        The statistic to compute (default value is 'density')
        
        The following statistics are available:
            * 'density' : compute the count of points within each bin, then
                divide by the area of the bin in (x,y) space. This is useful
                for making a histogram-like plot from the quad tree.
            * 'mean' : compute the mean of values for points within each bin.
                Empty bins will be represented by NaN. 
            * 'median' : compute the median of values for points within each
                bin. Empty bins will be represented by NaN. 
            * 'count' : compute the count of points within each bin.  This is
                identical to an unweighted histogram.  `v` array is not
                referenced. 
            * 'sum' : compute the sum of values for points within each bin. 
            * 'std' : compute the standard deviation within each bin.
                Empty bins will be represented by NaN.
            * 'min' : compute the minimum of values for points within each bin.
                Empty bins will be represented by NaN.
            * 'max' : compute the maximum of values for point within each bin.
                Empty bins will be represented by NaN.
            * function : a user-defined function which takes a 1D array of
                values, and outputs a single numerical statistic. This function
                will be called on the values in each bin. 

    Returns
    -------
    statistic : computed statistic within each bin
    '''
    
    #edges must be defined from QT... or we need to run QT to get them
    xmin,xmax,ymin,ymax = edges
    
    # check that statistic is callable
    known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max', 'density']
    if not callable(statistic) and statistic not in known_stats:
        raise ValueError(f'invalid statistic {statistic!r}')
   
    result = np.zeros_like(xmin, dtype=np.float64)

    if statistic == 'density':
        for k in range(len(xmin)):
            result[k] = np.sum((x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k]))
        result = result / ((ymax - ymin) * (xmax - xmin)) / result.sum()
        
    elif statistic == 'count':
        for k in range(len(xmin)):
            result[k] = np.sum((x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k]))
            
    elif statistic == 'sum':
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nansum(v[b])
            
    elif statistic in {'mean', np.mean, np.nanmean}:
        result.fill(np.nan)
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmean(v[b])
            
    elif statistic in {'median', np.median, np.nanmedian}:
        result.fill(np.nan)
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmedian(v[b])
    elif statistic in {'min', np.min, np.nanmin}:
        result.fill(np.nan)
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmin(v[b])
    elif statistic in {'max', np.max, np.nanmax}:
        result.fill(np.nan)
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmax(v[b])
    elif statistic in {'std', np.std, np.nanstd}:
        result.fill(np.nan)
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanstd(v[b])

    # need to test callable statistic returns something that fits in result
    elif callable(statistic):
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = statistic(v[b])
        
    return result

In [16]:
np.random.seed(42)
x = np.random.normal(loc=25, scale=5, size=1000)
np.random.seed(39)
y = np.random.normal(loc=30, scale=5, size=1000)
# Define sample edge values for testing
edges = [np.min(x), np.max(x), np.min(y), np.max(y)]

# Call the function with the defined edges
result = binned_statistic_qt(x, y, v=None, statistic='count', edges=edges)

TypeError: object of type 'numpy.float64' has no len()

In [14]:
def binned_statistic_qt(x, y, v=None, statistic='density', edges=None):
    xmin, xmax, ymin, ymax = edges
    known_stats = ['mean', 'median', 'count', 'sum', 'std', 'min', 'max', 'density']
    
    if not callable(statistic) and statistic not in known_stats:
        raise ValueError(f'invalid statistic {statistic!r}')
   
    result = np.full(len(xmin), np.nan, dtype=np.float64)  # NaN-filled for unfilled bins

    # Calculate statistics based on type
    if statistic == 'density':
        for k in range(len(xmin)):
            result[k] = np.sum((x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k]))
        result = result / ((ymax - ymin) * (xmax - xmin)) / result.sum()
        
    elif statistic == 'count':
        for k in range(len(xmin)):
            result[k] = np.sum((x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k]))
            
    elif statistic == 'sum':
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nansum(v[b])
            
    elif statistic in {'mean', np.mean, np.nanmean}:
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmean(v[b])
            
    elif statistic in {'median', np.median, np.nanmedian}:
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmedian(v[b])

    elif statistic in {'min', np.min, np.nanmin}:
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmin(v[b])

    elif statistic in {'max', np.max, np.nanmax}:
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanmax(v[b])

    elif statistic in {'std', np.std, np.nanstd}:
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = np.nanstd(v[b])

    elif callable(statistic):
        for k in range(len(xmin)):
            b = (x >= xmin[k]) & (x < xmax[k]) & (y >= ymin[k]) & (y < ymax[k])
            result[k] = statistic(v[b])

    # Filter to keep only min and max counts
    min_val, max_val = np.nanmin(result), np.nanmax(result)
    filtered_result = np.array([r if (r == min_val or r == max_val) else np.nan for r in result])

    return filtered_result


In [19]:
import numpy as np

def adaptive_qthist(x, y, N=5, min_count=1, max_count=50, density=False):
    '''
    Adaptive QuadTree histogram with dynamic binning control based on minimum and
    maximum count constraints.

    Parameters
    ----------
    x, y : arrays
        Input data arrays.
    N : int, optional, default = 5
        The number of maximum divisions (levels) to compute the QuadTree.
    min_count : int, optional, default = 1
        Minimum threshold for counts within a bin.
    max_count : int, optional, default = 50
        Maximum threshold for counts within a bin.
    density : bool, optional, default = True
        If True, returns counts per area; otherwise, returns raw counts.

    Returns
    -------
    num, xmin, xmax, ymin, ymax : arrays
        Array of counts/densities and bin edges.
    '''

    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial region with a small buffer.
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    def divide_and_count(x_range, y_range, level=1):
        ''' Recursive function to divide regions and count points '''
        if level > N:
            return

        # Create 2x2 bins for the current region
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Check if bin count is within the threshold
                if count >= min_count and count <= max_count:
                    # If within the threshold, add to the results
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    # If count exceeds max_count, subdivide further
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                else:
                    # If count is below min_count, keep as zero or small bin
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    # Start the adaptive QuadTree division
    divide_and_count(x_range, y_range)

    # Convert lists to numpy arrays
    num = np.array(num)
    xmin = np.array(xmin)
    xmax = np.array(xmax)
    ymin = np.array(ymin)
    ymax = np.array(ymax)

#     if density:
#         bin_areas = (xmax - xmin) * (ymax - ymin)
#         num = num / bin_areas / num.sum()

    return num, xmin, xmax, ymin, ymax

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)
num, xmin, xmax, ymin, ymax = adaptive_qthist(x, y, N=5, min_count=2, max_count=30, density=True)

# Print example output
print("Counts:", num)
print("X min edges:", xmin)
print("X max edges:", xmax)
print("Y min edges:", ymin)
print("Y max edges:", ymax)


Counts: [ 2.  4.  7. 23. 11. 12. 18. 25. 11. 23.  8.  7.  5. 11.  8. 14. 17.  7.
 11.  5.  9. 28.  4.  4. 29.  7. 10. 10.  8. 25. 14. 19. 20. 21. 20. 11.
 19.  9.  6. 26.  4. 19. 14. 19. 22. 17. 12. 27.  2.  9. 25. 25.  9. 11.
 30. 16. 24. 29. 19.  7. 24.  2. 27. 22. 10.  9. 26.  4.  7.  1.]
X min edges: [ 3.01547523  3.01547523  6.00073218  6.00073218  3.01547523  3.01547523
  6.00073218  6.00073218  8.98598914  8.98598914 11.97124609 11.97124609
 11.97124609 13.46387456 13.46387456  8.98598914  8.98598914 11.97124609
 11.97124609 13.46387456 13.46387456 11.97124609  3.01547523  3.01547523
  6.00073218  6.00073218  6.00073218  7.49336066  7.49336066  3.01547523
  8.98598914  8.98598914 11.97124609 11.97124609  8.98598914  8.98598914
 11.97124609 11.97124609 14.95650304 14.95650304 17.94175999 17.94175999
 14.95650304 14.95650304 17.94175999 17.94175999 20.92701694 20.92701694
 23.91227389 23.91227389 20.92701694 20.92701694 23.91227389 23.91227389
 14.95650304 14.95650304 17.94175999 

In [21]:
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio

# Generate example data for demonstration
np.random.seed(42)
x = np.random.normal(25, 5, 1000)
np.random.seed(39)
y = np.random.normal(25, 5, 1000)

# Define minimum and maximum counts for color coding
min_count = 10
max_count = 30

# Set up bins and compute histogram
bins = 20
H, xedges, yedges = np.histogram2d(x, y, bins=bins)

# Define color mapping based on bin counts
color_map = np.zeros_like(H, dtype='<U6')  # Initialize empty color map array

# Apply color coding conditions
color_map[H == 0] = 'yellow'       # Zero count bins
color_map[(H > 0) & (H < min_count)] = 'blue'   # Counts lower than min_count but greater than zero
color_map[(H >= min_count) & (H <= max_count)] = 'white'  # Counts within the acceptable range
color_map[H > max_count] = 'red'   # Counts higher than max_count

# Set up the Plotly heatmap plot
fig = go.Figure()

# Add the heatmap with the custom color scale
fig.add_trace(go.Heatmap(
    z=H,
    x=xedges,
    y=yedges,
    colorscale=[
        [0, 'yellow'],  # Yellow for zero counts
        [0.25, 'blue'], # Blue for low counts
        [0.5, 'white'], # White for acceptable counts
        [1, 'red']      # Red for high counts
    ],
    showscale=True,  # Show color scale for reference
    colorbar=dict(title="Bin Counts")
))

# Configure layout
fig.update_layout(
    title="Binned Data with Custom Color Coding",
    xaxis_title="X-axis",
    yaxis_title="Y-axis"
)

# Display the plot in an adaptive environment
pio.show(fig)


In [28]:
import numpy as np

def adaptive_qthist(x, y, N=5, min_count=15, max_count=35, density=False):
    '''
    Adaptive QuadTree histogram with dynamic binning control based on minimum and
    maximum count constraints.

    Parameters
    ----------
    x, y : arrays
        Input data arrays.
    N : int, optional, default = 5
        The number of maximum divisions (levels) to compute the QuadTree.
    min_count : int, optional, default = 1
        Minimum threshold for counts within a bin.
    max_count : int, optional, default = 50
        Maximum threshold for counts within a bin.
    density : bool, optional, default = True
        If True, returns counts per area; otherwise, returns raw counts.

    Returns
    -------
    num, xmin, xmax, ymin, ymax : arrays
        Array of counts/densities and bin edges.
    '''

    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial region with a small buffer.
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    def divide_and_count(x_range, y_range, level=1):
        ''' Recursive function to divide regions and count points '''
        if level > N:
            return

        # Create 2x2 bins for the current region
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                if count >= min_count and count <= max_count:
                    # If count is within the threshold, add to results
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    # If count exceeds max_count, subdivide further
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                elif count >= min_count:
                    # If count is above min_count but below max_count, try merging with neighbors
                    merged_count = count
                    merged_xmin = x_min
                    merged_xmax = x_max
                    merged_ymin = y_min
                    merged_ymax = y_max

                    # Attempt to merge with neighboring bins if they also meet criteria
                    if i < 1:  # Merge horizontally with next bin
                        merged_count += H[i + 1, j]
                        merged_xmax = xedges[i + 2]
                    if j < 1:  # Merge vertically with next bin
                        merged_count += H[i, j + 1]
                        merged_ymax = yedges[j + 2]

                    # Add merged bin if it meets the threshold
                    if merged_count <= max_count + min_count:
                        num.append(merged_count)
                        xmin.append(merged_xmin)
                        xmax.append(merged_xmax)
                        ymin.append(merged_ymin)
                        ymax.append(merged_ymax)
                    else:
                        # If unable to merge within thresholds, treat as a separate bin
                        num.append(count)
                        xmin.append(x_min)
                        xmax.append(x_max)
                        ymin.append(y_min)
                        ymax.append(y_max)
                else:
                    # If count is below min_count but not zero, add as is
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    # Start the adaptive QuadTree division
    divide_and_count(x_range, y_range)

    # Convert lists to numpy arrays
    num = np.array(num)
    xmin = np.array(xmin)
    xmax = np.array(xmax)
    ymin = np.array(ymin)
    ymax = np.array(ymax)

#     if density:
#         bin_areas = (xmax - xmin) * (ymax - ymin)
#         num = num / bin_areas  # Convert counts to density if needed

    return num, xmin, xmax, ymin, ymax

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)
num, xmin, xmax, ymin, ymax = adaptive_qthist(x, y, N=5, min_count=20, max_count=30, density=True)

# Print example output
print("Counts:", num)
print("X min edges:", xmin)
print("X max edges:", xmax)
print("Y min edges:", ymin)
print("Y max edges:", ymax)


Counts: [ 2.  4.  7. 23. 11. 12. 18. 25. 11. 23.  8.  7.  5. 11.  8. 14. 17.  7.
 11.  5.  9. 28.  4.  4. 29.  7. 10. 10.  8. 25. 14. 19. 20. 21. 20. 11.
 19.  9.  6. 26.  4. 19. 14. 19. 22. 17. 12. 27.  2.  9. 25. 25.  9. 11.
 30. 16. 24. 29. 19.  7. 24.  2. 27. 22. 10.  9. 26.  4.  7.  1.]
X min edges: [ 3.01547523  3.01547523  6.00073218  6.00073218  3.01547523  3.01547523
  6.00073218  6.00073218  8.98598914  8.98598914 11.97124609 11.97124609
 11.97124609 13.46387456 13.46387456  8.98598914  8.98598914 11.97124609
 11.97124609 13.46387456 13.46387456 11.97124609  3.01547523  3.01547523
  6.00073218  6.00073218  6.00073218  7.49336066  7.49336066  3.01547523
  8.98598914  8.98598914 11.97124609 11.97124609  8.98598914  8.98598914
 11.97124609 11.97124609 14.95650304 14.95650304 17.94175999 17.94175999
 14.95650304 14.95650304 17.94175999 17.94175999 20.92701694 20.92701694
 23.91227389 23.91227389 20.92701694 20.92701694 23.91227389 23.91227389
 14.95650304 14.95650304 17.94175999 

In [30]:
fig = go.Figure()
#xmin, xmax, ymin, ymax
# Add the heatmap with the custom color scale
fig.add_trace(go.Heatmap(
    z=num,
    x=xmax-xmin,
    y=ymax-ymin,
    colorscale=[
        [0, 'yellow'],  # Yellow for zero counts
        [0.25, 'blue'], # Blue for low counts
        [0.5, 'white'], # White for acceptable counts
        [1, 'red']      # Red for high counts
    ],
    showscale=True,  # Show color scale for reference
    colorbar=dict(title="Bin Counts")
))

# Configure layout
fig.update_layout(
    title="Binned Data with Custom Color Coding",
    xaxis_title="X-axis",
    yaxis_title="Y-axis"
)

# Display the plot in an adaptive environment
pio.show(fig)


In [36]:
import numpy as np

def adaptive_qthist(x, y, N=5, min_count=15, max_count=50, density=False):
    '''
    Adaptive QuadTree histogram with dynamic binning control based on minimum and
    maximum count constraints.

    Parameters
    ----------
    x, y : arrays
        Input data arrays.
    N : int, optional, default = 5
        The maximum number of divisions (levels) to compute the QuadTree.
    min_count : int, optional, default = 1
        Minimum threshold for counts within a bin.
    max_count : int, optional, default = 50
        Maximum threshold for counts within a bin.
    density : bool, optional, default = False
        If True, returns counts per area; otherwise, returns raw counts.

    Returns
    -------
    num, xmin, xmax, ymin, ymax : arrays
        Array of counts/densities and bin edges.
    '''

    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial region with a small buffer.
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    def divide_and_count(x_range, y_range, level=1):
        ''' Recursive function to divide regions and count points '''
        if level > N:
            return

        # Create 2x2 bins for the current region
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Check if bin count is within the threshold
                if min_count <= count <= max_count:
                    # If within the threshold, add to the results
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    # If count exceeds max_count, subdivide further
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                elif 0 < count < min_count:
                    # Attempt to merge with neighboring bins if below min_count
                    adjacent_count = count
                    combined_x_min, combined_x_max = x_min, x_max
                    combined_y_min, combined_y_max = y_min, y_max

                    # Search horizontally and vertically to try merging
                    for k in range(2):
                        adj_count_x = H[k, j] if k != i else 0
                        adj_count_y = H[i, k] if k != j else 0
                        if adj_count_x > 0:
                            adjacent_count += adj_count_x
                            combined_x_max = xedges[k + 1]
                        if adj_count_y > 0:
                            adjacent_count += adj_count_y
                            combined_y_max = yedges[k + 1]
                        if adjacent_count >= min_count:
                            break

                    # Add the combined bin if the count meets min_count after merging
                    num.append(adjacent_count)
                    xmin.append(combined_x_min)
                    xmax.append(combined_x_max)
                    ymin.append(combined_y_min)
                    ymax.append(combined_y_max)
                else:
                    # Count is zero or doesn't meet criteria - keep as zero bin
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    # Start the adaptive QuadTree division
    divide_and_count(x_range, y_range)

    # Convert lists to numpy arrays
    num = np.array(num)
    xmin = np.array(xmin)
    xmax = np.array(xmax)
    ymin = np.array(ymin)
    ymax = np.array(ymax)

#     # Normalize to density if required
#     if density:
#         bin_areas = (xmax - xmin) * (ymax - ymin)
#         num = num / bin_areas / num.sum()

    return num, xmin, xmax, ymin, ymax

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 10000)
np.random.seed(39)
y = np.random.uniform(35, 5, 10000)
num, xmin, xmax, ymin, ymax = adaptive_qthist(x, y, N=5, min_count=20, max_count=30, density=True)

# Print example output
print("Counts:", num)
print("X min edges:", xmin)
print("X max edges:", xmax)
print("Y min edges:", ymin)
print("Y max edges:", ymax)


Counts: [30.  0.  0. 19. 21. 31. 28. 21. 23. 32. 35.  0. 22. 27. 19. 28.  0. 20.
 31. 23. 34. 50. 21. 29. 46. 48. 32. 34. 40. 35. 29. 26. 39. 40. 28. 27.
 36.  0.  0. 25. 30. 20. 21. 27.  0. 23. 33.  0.  0. 25. 25. 20. 37. 22.
 20. 20. 28. 22. 32. 38. 39. 45. 30. 31. 35. 47. 27. 20. 52. 51. 33. 34.
 53. 41. 27. 28. 37. 38. 26. 23. 42. 48. 28. 20. 52. 44. 26. 33. 44.  0.
  0. 24. 17. 32.  0. 26. 23. 29. 20. 43. 31. 31. 37. 29. 35. 20. 35. 51.
 32. 32. 46. 39. 24. 26. 40.  0. 26. 32. 29. 36.  0. 24. 26. 15. 28. 52.
 23. 29. 56. 41. 27. 24. 48. 20. 36. 37. 21. 22. 29. 35. 21. 48. 31. 33.
 39. 35. 31. 27. 38. 40. 31. 27. 36. 41. 27. 25. 39. 38. 24. 27. 32. 46.
 28. 27. 48. 42. 26. 31. 46. 36. 24. 25. 34. 21. 35. 25. 52. 40. 30. 27.
 35. 43. 28. 30. 40. 37. 21. 28. 40. 44. 30. 28. 45. 33. 23. 26. 34. 50.
 33. 32. 53. 34. 25. 22. 35.  0.  0. 26. 45.  0.  0. 29. 28. 22. 23. 35.
 25. 42. 34. 21. 43. 23. 20. 50. 46. 28. 30. 20. 47. 28. 34. 48. 39. 25.
 28. 37. 34. 25. 20. 20. 20. 31. 39. 43. 42

In [34]:
len(num), len(xmin),len(xmax),len(ymin), len(ymax)

(586, 586, 586, 586, 586)

In [38]:
import numpy as np
import plotly.graph_objects as go

# Define the adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=1, max_count=50, density=False):
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                if min_count <= count <= max_count:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                elif 0 < count < min_count:
                    adjacent_count = count
                    combined_x_min, combined_x_max = x_min, x_max
                    combined_y_min, combined_y_max = y_min, y_max

                    for k in range(2):
                        adj_count_x = H[k, j] if k != i else 0
                        adj_count_y = H[i, k] if k != j else 0
                        if adj_count_x > 0:
                            adjacent_count += adj_count_x
                            combined_x_max = xedges[k + 1]
                        if adj_count_y > 0:
                            adjacent_count += adj_count_y
                            combined_y_max = yedges[k + 1]
                        if adjacent_count >= min_count:
                            break

                    num.append(adjacent_count)
                    xmin.append(combined_x_min)
                    xmax.append(combined_x_max)
                    ymin.append(combined_y_min)
                    ymax.append(combined_y_max)
                else:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    divide_and_count(x_range, y_range)
    num = np.array(num)
    xmin = np.array(xmin)
    xmax = np.array(xmax)
    ymin = np.array(ymin)
    ymax = np.array(ymax)


    return num, xmin, xmax, ymin, ymax

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=20, max_count=30, density=True)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif count < 20:
        color = 'blue'
    elif count > 30:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [41]:
counts

array([13., 29., 32., 23., 41., 23., 29., 25., 42., 23., 50., 23., 20.,
       26., 24., 63., 31., 23., 27., 21., 25., 28., 37., 43., 29., 27.,
       25., 25., 28., 25., 53., 33., 20., 21., 20., 31., 39., 39., 36.,
       26., 29., 49., 55., 33., 22., 58., 41., 27., 23., 38., 25., 25.,
       34., 45., 30., 46., 24., 29., 50., 26., 24., 33., 27., 22., 37.,
       41., 26., 30., 33., 12.])

In [42]:
import numpy as np
import plotly.graph_objects as go

# Define the adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=1, max_count=50):
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                if min_count <= count <= max_count:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                elif 0 < count < min_count:
                    adjacent_count = count
                    combined_x_min, combined_x_max = x_min, x_max
                    combined_y_min, combined_y_max = y_min, y_max

                    for k in range(2):
                        adj_count_x = H[k, j] if k != i else 0
                        adj_count_y = H[i, k] if k != j else 0
                        if adj_count_x > 0:
                            adjacent_count += adj_count_x
                            combined_x_max = xedges[k + 1]
                        if adj_count_y > 0:
                            adjacent_count += adj_count_y
                            combined_y_max = yedges[k + 1]
                        if adjacent_count >= min_count:
                            break

                    num.append(adjacent_count)
                    xmin.append(combined_x_min)
                    xmax.append(combined_x_max)
                    ymin.append(combined_y_min)
                    ymax.append(combined_y_max)
                else:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    divide_and_count(x_range, y_range)
    return np.array(num), np.array(xmin), np.array(xmax), np.array(ymin), np.array(ymax)

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=15, max_count=35)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif count < 20:
        color = 'blue'
    elif count > 30:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [43]:
import numpy as np
import plotly.graph_objects as go

# Define the adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=20, max_count=30):
    # Initialize lists to store counts and edges
    counts = []
    edges = []

    # Define the range with padding
    x_range = [x.min(), x.max()]
    y_range = [y.min(), y.max()]

    # Recursive division of space into bins
    def divide_and_count(x_range, y_range, depth=1):
        if depth > N:
            return
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(len(xedges) - 1):
            for j in range(len(yedges) - 1):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                if min_count <= count <= max_count:
                    counts.append(count)
                    edges.append((x_min, x_max, y_min, y_max))
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], depth + 1)

    # Initial recursive call
    divide_and_count(x_range, y_range)

    return counts, edges

# Example usage
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)
counts, edges = adaptive_qthist(x, y, N=5, min_count=20, max_count=30)

# Plotting with Plotly
fig = go.Figure()

# Add each bin as a rectangle and annotate counts
for (xmin, xmax, ymin, ymax), count in zip(edges, counts):
    color = 'yellow' if count == 0 else 'blue' if count < 20 else 'red' if count > 30 else 'white'

    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [46]:
import numpy as np
import plotly.graph_objects as go

# Define the adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=20, max_count=30):
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial range with padding for clarity in visualization
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    # Recursive function to split bins based on count thresholds
    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        # Create initial 2x2 bin grid for the current range
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Apply count thresholds to determine bin handling
                if min_count <= count <= max_count:
                    # Save bin details for later plotting
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                elif count > max_count:
                    # Further split this bin
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                else:
                    # Include low-count bins without further splitting
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    # Initial recursive call
    divide_and_count(x_range, y_range)
    return np.array(num), np.array(xmin), np.array(xmax), np.array(ymin), np.array(ymax)

# Sample data
np.random.seed(42)
x = np.random.uniform(25, 5, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)

# Generate adaptive bins and counts
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=20, max_count=50)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle with counts annotated at the center
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Determine color based on count
    if count == 0:
        color = 'yellow'
    elif count < 20:
        color = 'blue'
    elif count > 30:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Annotate each bin count at the center of the bin
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [48]:
counts,len(counts)

(array([ 2.,  4.,  7., 23., 11., 12., 18., 25., 11., 23.,  8.,  7.,  5.,
        11.,  8., 14., 17.,  7., 11.,  5.,  9., 28.,  4.,  4., 29.,  7.,
        10., 10.,  8., 25., 14., 19., 20., 21., 20., 11., 19.,  9.,  6.,
        26.,  4., 19., 14., 19., 22., 17., 12., 27.,  2.,  9., 25., 25.,
         9., 11., 30., 16., 24., 29., 19.,  7., 24.,  2., 27., 22., 10.,
         9., 26.,  4.,  7.,  1.]),
 70)

In [53]:
import numpy as np
import plotly.graph_objects as go

# Define adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=1, max_count=50):
    # Storage for bin data
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial range for x and y, with a small buffer
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    # Recursive function to divide bins and count points
    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        # Divide range into sub-bins
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Check if the bin count is within desired range
                if min_count <= count <= max_count:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                # If count is above max_count, subdivide further
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                # If count is below min_count, merge with adjacent bins if possible
                elif 0 < count < min_count:
                    adjacent_count = count
                    combined_x_min, combined_x_max = x_min, x_max
                    combined_y_min, combined_y_max = y_min, y_max

                    for k in range(2):
                        adj_count_x = H[k, j] if k != i else 0
                        adj_count_y = H[i, k] if k != j else 0
                        if adj_count_x > 0:
                            adjacent_count += adj_count_x
                            combined_x_max = xedges[k + 1]
                        if adj_count_y > 0:
                            adjacent_count += adj_count_y
                            combined_y_max = yedges[k + 1]
                        if adjacent_count >= min_count:
                            break

                    num.append(adjacent_count)
                    xmin.append(combined_x_min)
                    xmax.append(combined_x_max)
                    ymin.append(combined_y_min)
                    ymax.append(combined_y_max)
                else:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    divide_and_count(x_range, y_range)
    return np.array(num), np.array(xmin), np.array(xmax), np.array(ymin), np.array(ymax)

# Sample data
np.random.seed(42)
x = np.random.uniform(25, 30, 10000)
np.random.seed(39)
y = np.random.uniform(25, 20, 10000)

# Generate adaptive bins and counts
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=20, max_count=50)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif 0 < count < 20:
        color = 'blue'
    elif 20 < count > 50:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [62]:
import numpy as np
import plotly.graph_objects as go


xedgesmin = xmin_edges
xedgesmax= xmax_edges
yedgesmin = ymin_edges
yedgesmax = ymax_edges
# Plotting with Plotly
fig = go.Figure()

# Iterate over all bins and plot only non-zero counts
for i in range(len(counts)):
    if counts[i] > 0:  # Only plot bins with non-zero counts
        # Define bin edges and center
        xmin, xmax = xedgesmin[i], xedgesmax[i]
        ymin, ymax = yedgesmin[i], yedgesmax[i]
        center_x = (xmin + xmax) / 2
        center_y = (ymin + ymax) / 2

        # Define color based on count threshold
        if 0 < counts[i] < 20:
            color = 'blue'
        elif counts[i] > 50:
            color = 'red'
        elif counts[i] == 0:
            color = 'yellow'
        else:
            color = 'white'

        # Add rectangle for each bin
        fig.add_shape(
            type="rect",
            x0=xmin, y0=ymin, x1=xmax, y1=ymax,
            line=dict(color="black"),
            fillcolor=color,
            opacity=0.6
        )

        # Add count annotation at the center of the bin
        fig.add_annotation(
            x=center_x,
            y=center_y,
            text=str(counts[i]),
            showarrow=False,
            font=dict(color="black", size=10)
        )

# Update layout settings
fig.update_layout(
    title="Rectangular Bins with Counts",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[min(xedgesmin), max(xedgesmax)]),
    yaxis=dict(range=[min(yedgesmin), max(yedgesmax)]),
    showlegend=False,
    hovermode="closest"
)

# Display the plot
fig.show()


In [57]:
counts

array([24.,  0.,  0., 33., 39.,  0., 40.,  0., 35., 39., 28., 26., 38.,
       20., 23., 33., 51., 46., 32., 27., 50., 41., 23., 30., 44.,  0.,
        0., 31., 37.,  0.,  0., 30., 36., 30., 25., 49., 47., 39., 26.,
       29., 41., 24., 37., 36., 39., 50., 49., 29., 20., 22., 42., 32.,
       24., 20., 39., 27., 23., 20., 25., 22., 40., 52.,  0., 44.,  0.,
       43., 35., 24., 24., 39., 48., 32., 29., 47., 41., 42., 28., 29.,
       36.,  0., 39.,  0., 26., 44., 26., 32., 40., 46., 31., 32., 44.,
       23., 40., 37., 45., 49., 47., 23., 24., 47., 45., 29., 26., 48.,
       43., 32., 29., 39., 42., 27., 24., 51., 45., 29., 28., 50., 34.,
       21., 25., 39., 42., 32., 25., 37., 57., 27., 30., 55., 38., 27.,
       22., 46., 43., 44., 29., 31., 40., 43., 26., 31., 40., 48., 42.,
       28., 30., 41., 20., 36., 31., 46., 42., 29., 29., 44.,  0.,  0.,
       32., 45.,  0.,  0., 36., 35., 49., 36., 32., 41., 20., 31., 32.,
       35., 45., 40., 23., 30., 38., 21., 35., 35., 41., 44., 34

In [63]:
import numpy as np
import plotly.graph_objects as go

# Define adaptive histogram function with recursive splitting and merging
def adaptive_qthist(x, y, N=5, min_count=10, max_count=50):
    # Storage for bin data
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial range for x and y, with a small buffer
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    # Recursive function to divide bins and count points
    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        
        # Divide range into sub-bins
        bins_x = np.linspace(x_range[0], x_range[1], 3)  # Divides into 2 sub-bins per axis
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        
        # Count points within the new sub-bins
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])
        
        for i in range(len(bins_x) - 1):
            for j in range(len(bins_y) - 1):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Check if the bin count is within desired range
                if min_count <= count <= max_count:
                    # Store current bin's edges and count
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                # If count is above max_count, subdivide further
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                # If count is below min_count, add bin with low count
                else:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    divide_and_count(x_range, y_range)
    return np.array(num), np.array(xmin), np.array(xmax), np.array(ymin), np.array(ymax)

# Sample data
np.random.seed(42)
x = np.random.uniform(25, 30, 10000)
np.random.seed(39)
y = np.random.uniform(15, 25, 10000)

# Generate adaptive bins and counts
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=10, max_count=50)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif 0 < count < 20:
        color = 'blue'
    elif count > 50:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [66]:
counts

array([20.,  0.,  0., 41., 35.,  0., 33.,  0., 40., 16., 17., 14., 14.,
       13., 13., 14., 15., 48., 10., 13., 17., 17.,  0.,  0., 35., 36.,
        0.,  0., 45., 32., 16., 18., 14., 10., 14., 21., 13., 14., 20.,
       19., 13., 15., 15.,  7., 15., 17., 11., 20., 12., 12., 17., 19.,
       11., 13., 10., 13., 11., 17., 45.,  0., 44.,  0., 26., 12.,  8.,
       15., 17., 19., 12., 13., 25., 10., 17., 15., 12., 12., 15., 20.,
       13.,  0., 38.,  0., 32., 11., 11., 21., 17., 10., 16., 19., 24.,
       50., 11., 25., 17., 17., 10., 13., 15., 14., 15., 16., 21., 16.,
       16.,  9., 16., 16.,  9., 15., 22., 17., 46., 20., 10., 13., 15.,
       14., 16., 11., 17.,  6., 13., 18., 15., 21., 12., 15., 10., 17.,
       10., 14., 11., 15., 15., 10., 11., 41., 14., 20., 16., 18., 13.,
       17., 20., 18., 14., 23., 17., 16., 14., 13., 17., 12.,  0.,  0.,
        5.,  6., 19., 25., 30.,  0.,  0., 37., 31., 18., 14., 20., 10.,
       12., 17., 22., 20., 22., 25., 15., 15., 16., 11., 20., 12

In [None]:
fig = go.Figure()

# Add the heatmap with the custom color scale
fig.add_trace(go.Heatmap(
    z=H,
    x=xedges,
    y=yedges,
    colorscale=[
        [0, 'yellow'],  # Yellow for zero counts
        [0.25, 'blue'], # Blue for low counts
        [0.5, 'white'], # White for acceptable counts
        [1, 'red']      # Red for high counts
    ],
    showscale=True,  # Show color scale for reference
    colorbar=dict(title="Bin Counts")
))

# Configure layout
fig.update_layout(
    title="Binned Data with Custom Color Coding",
    xaxis_title="X-axis",
    yaxis_title="Y-axis"
)

# Display the plot in an adaptive environment
pio.show(fig)

In [58]:
xmin_edges,xmax_edges

(array([24.50020515, 24.50020515, 24.50020515, 24.87509492, 24.87509492,
        25.24998468, 25.24998468, 25.62487445, 25.62487445, 25.24998468,
        25.24998468, 25.43742957, 25.43742957, 25.24998468, 25.24998468,
        25.43742957, 25.43742957, 25.62487445, 25.62487445, 25.81231933,
        25.81231933, 25.62487445, 25.62487445, 25.81231933, 25.81231933,
        24.50020515, 24.50020515, 24.87509492, 24.87509492, 24.50020515,
        24.50020515, 24.87509492, 24.87509492, 24.87509492, 25.0625398 ,
        25.0625398 , 25.24998468, 25.24998468, 25.24998468, 25.43742957,
        25.43742957, 25.62487445, 25.62487445, 25.81231933, 25.81231933,
        25.62487445, 25.24998468, 25.24998468, 25.43742957, 25.43742957,
        25.24998468, 25.24998468, 25.43742957, 25.43742957, 25.62487445,
        25.62487445, 25.81231933, 25.81231933, 25.62487445, 25.62487445,
        25.81231933, 25.81231933, 25.99976421, 25.99976421, 26.37465398,
        26.37465398, 25.99976421, 25.99976421, 26.1

In [59]:
ymin_edges, ymax_edges

(array([19.50009105, 20.25006141, 20.62504659, 20.25006141, 20.62504659,
        19.50009105, 19.87507623, 19.50009105, 19.87507623, 20.25006141,
        20.437554  , 20.25006141, 20.437554  , 20.62504659, 20.81253918,
        20.62504659, 20.81253918, 20.25006141, 20.437554  , 20.25006141,
        20.437554  , 20.62504659, 20.81253918, 20.62504659, 20.81253918,
        21.00003177, 21.37501695, 21.00003177, 21.37501695, 21.75000213,
        22.12498731, 21.75000213, 22.12498731, 22.3124799 , 22.12498731,
        22.3124799 , 21.00003177, 21.37501695, 21.56250954, 21.37501695,
        21.56250954, 21.00003177, 21.18752436, 21.00003177, 21.18752436,
        21.37501695, 21.75000213, 21.93749472, 21.75000213, 21.93749472,
        22.12498731, 22.3124799 , 22.12498731, 22.3124799 , 21.75000213,
        21.93749472, 21.75000213, 21.93749472, 22.12498731, 22.3124799 ,
        22.12498731, 22.3124799 , 19.50009105, 19.87507623, 19.50009105,
        19.87507623, 20.25006141, 20.437554  , 20.2

In [55]:
# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif 0 < count < 20:
        color = 'blue'
    elif count > 50:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="black"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(10, 8))

# Loop through each bin and apply color based on count
for i in range(len(xedges) - 1):
    for j in range(len(yedges) - 1):
        # Coordinates of the bin
        x_min, x_max = xedges[i], xedges[i + 1]
        y_min, y_max = yedges[j], yedges[j + 1]
        count = counts[i, j]
        x_min, x_max = xedges[i], xedges[i + 1]
        y_min, y_max = yedges[j], yedges[j + 1]
        count = counts[i, j]
        # Determine color based on count
        if count == 0:
            color = 'yellow'
        elif count < min_count:
            color = 'blue'
        elif count > max_count:
            color = 'red'
        else:
            color = 'white'
        
        # Draw rectangle with the specified color
        rect = patches.Rectangle((x_min, y_min), x_max - x_min, y_max - y_min,
                                 linewidth=1, edgecolor="black", facecolor=color)
        ax.add_patch(rect)
        
        # Add the count as text in the center of the bin
        ax.text(x_min + (x_max - x_min) / 2, y_min + (y_max - y_min) / 2,
                int(count), color="black", ha='center', va='center', fontsize=8)

# Set limits and labels
ax.set_xlim(x.min(), x.max())
ax.set_ylim(y.min(), y.max())
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('2D Histogram with Custom Binning, Color Coding, and Bin Counts')
plt.show()


In [68]:
import numpy as np
import plotly.graph_objects as go

# Define adaptive histogram function with recursive splitting and no merging for bins with counts < min_count
def adaptive_qthist(x, y, N=5, min_count=1, max_count=50):
    # Storage for bin data
    num = []
    xmin = []
    xmax = []
    ymin = []
    ymax = []

    # Define the initial range for x and y, with a small buffer
    x_range = [np.nanmin(x), np.nanmax(x)]
    y_range = [np.nanmin(y), np.nanmax(y)]
    buffer_x = (x_range[1] - x_range[0]) * 0.1
    buffer_y = (y_range[1] - y_range[0]) * 0.1
    x_range = [x_range[0] - buffer_x, x_range[1] + buffer_x]
    y_range = [y_range[0] - buffer_y, y_range[1] + buffer_y]

    # Recursive function to divide bins and count points
    def divide_and_count(x_range, y_range, level=1):
        if level > N:
            return
        # Divide range into sub-bins
        bins_x = np.linspace(x_range[0], x_range[1], 3)
        bins_y = np.linspace(y_range[0], y_range[1], 3)
        H, xedges, yedges = np.histogram2d(x, y, bins=[bins_x, bins_y])

        for i in range(2):
            for j in range(2):
                count = H[i, j]
                x_min, x_max = xedges[i], xedges[i + 1]
                y_min, y_max = yedges[j], yedges[j + 1]

                # Check if the bin count is within desired range
                if min_count <= count <= max_count:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)
                # If count is above max_count, subdivide further
                elif count > max_count:
                    divide_and_count([x_min, x_max], [y_min, y_max], level + 1)
                # If count is below min_count, keep the bin without merging
                else:
                    num.append(count)
                    xmin.append(x_min)
                    xmax.append(x_max)
                    ymin.append(y_min)
                    ymax.append(y_max)

    divide_and_count(x_range, y_range)
    return np.array(num), np.array(xmin), np.array(xmax), np.array(ymin), np.array(ymax)

# Sample data
np.random.seed(42)
x = np.random.uniform(25, 10, 1000)
np.random.seed(39)
y = np.random.uniform(25, 5, 1000)

# Generate adaptive bins and counts
counts, xmin_edges, xmax_edges, ymin_edges, ymax_edges = adaptive_qthist(x, y, N=5, min_count=10, max_count=50)

# Plot with Plotly
fig = go.Figure()

# Add each bin as a rectangle
for xmin, xmax, ymin, ymax, count in zip(xmin_edges, xmax_edges, ymin_edges, ymax_edges, counts):
    # Color coding based on count thresholds
    if count == 0:
        color = 'yellow'
    elif 0 < count < 20:
        color = 'blue'
    elif 20 < count > 50:
        color = 'red'
    else:
        color = 'white'
    
    # Add rectangle for each bin
    fig.add_shape(
        type="rect",
        x0=xmin, y0=ymin, x1=xmax, y1=ymax,
        line=dict(color="yellow"),
        fillcolor=color,
        opacity=0.6
    )

    # Add annotation for each bin count at the center
    fig.add_annotation(
        x=(xmin + xmax) / 2,
        y=(ymin + ymax) / 2,
        text=str(int(count)),
        showarrow=False,
        font=dict(color="black", size=10)
    )

# Set up the layout
fig.update_layout(
    title="Adaptive 2D Histogram with Color Coding and Count Annotations",
    xaxis_title="X-axis",
    yaxis_title="Y-axis",
    xaxis=dict(range=[x.min(), x.max()]),
    yaxis=dict(range=[y.min(), y.max()]),
    showlegend=False,
    hovermode="closest"
)

# Show the plot
fig.show()


In [69]:
counts

array([36., 11., 12., 18., 25., 11., 23.,  8., 31., 14., 17., 32., 28.,
        4.,  4., 29., 35., 25., 14., 19., 20., 21., 20., 11., 19.,  9.,
        6., 26.,  4., 19., 14., 19., 22., 17., 50., 25., 25.,  9., 11.,
       30., 16., 24., 29., 19.,  7., 24.,  2., 27., 22., 10.,  9., 38.])