# CPU 1D Pixel Binning - Data Reduction

This demonstrates pixel binning by summing adjacent elements. Common in detector data to increase signal-to-noise ratio and reduce the size of data.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def bin_pixels_1d(data, bin_size):
    """Bin 1D pixel data by summing adjacent elements."""
    # validate input
    if bin_size <= 0:
        raise ValueError("bin_size must be positive.")
    
    data_size = data.size
    # calculate number of output bins (round up for partial bins)
    n_bins = (data_size + bin_size - 1) // bin_size
    
    # pre-allocate output array with same dtype as input
    result = np.zeros(n_bins, dtype=data.dtype)
    
    for bin_idx in range(n_bins):
        # sum pixels belonging to this bin
        bin_sum = np.float32(0.0)
        for i in range(bin_size):
            pixel_idx = bin_idx * bin_size + i
            
            # boundary check - critical to avoid processing out-of-bound data
            if pixel_idx < data_size:
                bin_sum += data[pixel_idx]
        
        # store result for this bin
        result[bin_idx] = bin_sum
    
    return result

In [None]:
def demonstrate_binning(data, bin_size):
    """Walk through the pixel binning algorithm."""
    if bin_size <= 0:
        raise ValueError("bin_size must be positive.")
    
    print(f"\nDemonstrating binning algorithm:")
    print(f"Input: {data}")
    print(f"Input size: {data.size}")
    print(f"Bin size: {bin_size}")
    
    data_size = data.size
    n_bins = (data_size + bin_size - 1) // bin_size
    print(f"Number of output bins: {n_bins}")
    
    result = bin_pixels_1d(data, bin_size)
    
    # show suuming of first few "threads"
    for bin_idx in range(min(3, n_bins)):
        print(f"\nThread {bin_idx} (computing bin {bin_idx}):")
        elements = []
        for i in range(bin_size):
            pixel_idx = bin_idx * bin_size + i
            if pixel_idx < data_size:
                elements.append(f"{data[pixel_idx]}")
        print(f"  Sums pixels {bin_idx * bin_size} to "
              f"{min(bin_idx * bin_size + bin_size - 1, data_size - 1)}")
        print(f"  Values: {' + '.join(elements)} = {result[bin_idx]}")

    print(f"\nResult: {result}")
    print(f"Total counts preserved: "
          f"{np.sum(pixel_data)} -> {np.sum(result)}")
        
    return result

In [None]:
pixel_data = np.array([10, 12, 8, 15, 20, 18, 5, 7, 9, 11, 13, 14], dtype=np.float32)

In [None]:
binned = demonstrate_binning(pixel_data, bin_size=2)

## Simulate Noisy Detector Data
With Poisson noise (photon counting statisitcs).

We will compute a signal-to-noise ratio (SNR), which is a common metric to estimate how noisy the data is. The higher the value of SNR, the 'cleaner' the data are. 

### Generate the Data

In [None]:
n_pixels = 1000
rng = np.random.default_rng()

# signal = background + peaks
signal = rng.poisson(lam=5, size=n_pixels).astype(np.float32)
peak_positions = [200, 500, 800]
peak_width = 50

for pos in peak_positions:
    start = max(0, pos - peak_width//2)
    end = min(n_pixels, pos + peak_width//2)
    background = rng.poisson(lam=100, size=end-start)
    signal[start:end] += background.astype(np.float32)

### Compare Different Binning Sizes

In [None]:
bin_factors = [1, 2, 5, 10]
fig, ax = plt.subplots(len(bin_factors), 1, figsize=(12, 10))

for idx, bin_size in enumerate(bin_factors):
    if bin_size == 1:
        binned = signal
        title = "Original Detector Data (no binning)"
    else:
        binned = bin_pixels_1d(signal, bin_size)
        title = f"Binned {bin_size}x ({len(binned)} bins)"
    
    # create appropriate x-axis
    if bin_size == 1:
        x_vals = np.arange(len(binned))
        width = 0.9
    else:
        x_vals = np.arange(len(binned)) * bin_size + bin_size/2
        width = bin_size * 0.9
    
    ax[idx].bar(x_vals, binned, width=width, 
                  color="darkblue", alpha=0.7, edgecolor="navy")
    ax[idx].set_title(title)
    ax[idx].set_ylabel("Photon Counts")
    ax[idx].grid(True, alpha=0.3)
    
    # statistics
    total = np.sum(binned)
    peak = np.max(binned)
    
    # estimate background from quiet regions (no signal peaks)
    # for this simulation, we know first 100 bins should be background only
    # the indices are scaled by bin size
    quiet_bins = binned[:100 // bin_size]  
    background_per_bin = np.mean(quiet_bins)
    
    # calculate SNR for each bin
    # signal = total_counts - background
    # guard negative signal
    signal_per_bin = np.maximum(binned - background_per_bin, 0)

    # noise = sqrt(total_counts) because of Poisson statistics
    noise_per_bin = np.sqrt(binned)

    # avoid division by zero (i.e. for noise equals to 0)
    snr_per_bin = np.divide(signal_per_bin, noise_per_bin, 
                           out=np.zeros_like(signal_per_bin), 
                           where=noise_per_bin!=0)
    
    # find peak SNR in signal regions
    peak_snr = np.max(snr_per_bin)
    
    # average of those that are signals
    mean_snr_in_peaks = np.mean(snr_per_bin[snr_per_bin > 3])  

    # display the statistics
    stats_text = (f"Total: {total:.0f}\nPeak: {peak:.0f}\n"
                  f"Max peak SNR: {peak_snr:.1f}\n"
                  f"Avg peak SNR: {mean_snr_in_peaks:.1f}"
                  )
    ax[idx].text(0.02, 0.95, stats_text,
                   transform=ax[idx].transAxes, 
                   verticalalignment="top",
                   bbox=dict(boxstyle="round,pad=0.3", 
                            facecolor="wheat", alpha=0.8))

ax[-1].set_xlabel("Pixel Position")
plt.tight_layout()
plt.show()