In [156]:
import pandas as pd
import numpy as np

In [157]:
def dual_histogram_bin_edges(x_array: pd.Series, y_array: pd.Series, bins='sturges'):
    """Compute bin edges thata are suitable for both x_array and y_array.
    Args:
        x_array: array of values of the first distribution
        y_array: array of values of the second distribution
        bins: binning estimator to use. Default 'Sturges' estimator recommended.
    Returns:
        ndarray: Equally spaced bin edges in the closed interval
    """
    #find overall min and max values for both distributions
    min_value = min(x_array.min(), y_array.min())
    max_value = max(x_array.max(), y_array.max())

    #select the smaller array and extend it with the overall min and max values
    select_smaller_array = lambda x_array, y_array: x_array if x_array.size < y_array.size else y_array
    smaller_array_extended = pd.concat([
        select_smaller_array(x_array,y_array), 
        pd.Series([min_value, max_value])], ignore_index=True)

    #return bin edges computed on the extended smaller array using the desired estimator
    return np.histogram_bin_edges(smaller_array_extended, bins=bins)

In [158]:
observations = 100_000
x_array = pd.Series(np.random.normal(50, 15, observations)) # reference
y_array = pd.Series(np.random.normal(50, 15, observations)) # identical with x

In [159]:
test_a = dual_histogram_bin_edges(x_array, y_array)
test_a

array([-19.41419953, -11.84324999,  -4.27230045,   3.29864909,
        10.86959863,  18.44054817,  26.01149771,  33.58244725,
        41.15339679,  48.72434633,  56.29529587,  63.86624541,
        71.43719495,  79.00814449,  86.57909403,  94.15004357,
       101.72099311, 109.29194265, 116.86289219])

In [160]:
print(len(test_a))

19


In [161]:
x_array = pd.Series([1,1,1,1,1]) # reference
y_array = pd.Series([1,1]) # identical with x

In [162]:
test_b = dual_histogram_bin_edges(x_array, y_array)
test_b


array([0.5, 1.5])