In [1]:
"""
This script generates a professional, publication-quality histogram and KDE plot
of building sizes, styled similarly to the provided example.

It creates synthetic data, plots two overlapping distributions using seaborn,
customizes the appearance with bold fonts and the 'ggplot' theme, and saves
the final figure as a PDF.
"""

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns


def create_building_size_distribution_plot(output_filename, figsize):
    """
    Generates and saves a professional histogram and KDE plot of building sizes.

    This function models two distributions ('Satellite' and 'Drone') using
    log-normal data, plots them on the same axes, and styles the figure
    for publication quality.

    Args:
        output_filename (str): The path to save the output PDF file.
        figsize (tuple): A tuple specifying the (width, height) of the figure
                         in inches.
    """
    # Use the 'ggplot' style for the plot's theme.
    plt.style.use('ggplot')

    # Create the figure and axes with the user-defined size.
    fig, ax = plt.subplots(figsize=figsize)

    # --- Define Data Parameters ---
    TOTAL_SAMPLES = 18432
    RANDOM_SEED = 42
    
    # Parameters for the log-normal distributions (mu, sigma) are tuned
    # to match the appearance of the source image.
    drone_mu, drone_sigma = 4.0, 0.4
    satellite_mu, satellite_sigma = 4.2, 0.5

    # --- Generate Synthetic Data ---
    np.random.seed(RANDOM_SEED)
    drone_sizes = np.random.lognormal(
        mean=drone_mu, sigma=drone_sigma, size=TOTAL_SAMPLES // 2
    )
    satellite_sizes = np.random.lognormal(
        mean=satellite_mu, sigma=satellite_sigma, size=TOTAL_SAMPLES // 2
    )

    # --- Plotting ---
    # Plot the Satellite data distribution
    sns.histplot(
        satellite_sizes,
        kde=True,
        stat="density",
        bins=25,
        color="#3467A2",
        alpha=0.4,
        label="AS2AS",
        line_kws={'linewidth': 2.5},
        ax=ax  # Specify the axes to draw on
    )

    # Plot the Drone data distribution
    sns.histplot(
        drone_sizes,
        kde=True,
        stat="density",
        bins=25,
        color="#D98A53",
        alpha=0.5,
        label="AS2UAV",
        line_kws={'linewidth': 2.5},
        ax=ax  # Specify the same axes
    )

    # --- Customization ---
    # Define font properties for titles and labels.
    TITLE_FONT = {'size': '18', 'weight': 'bold'}
    LABEL_FONT = {'size': '14', 'weight': 'bold'}

    # Set titles and labels using the defined font dictionaries.
    ax.set_title("Histogram + KDE of building sizes", fontdict=TITLE_FONT)
    ax.set_xlabel("Average bounding-box size (px)", fontdict=LABEL_FONT)
    ax.set_ylabel("Probability density", fontdict=LABEL_FONT)

    # Set axis limits to match the source image.
    ax.set_xlim(0, 550)
    ax.set_ylim(0, 0.021)

    # Create a legend with larger, bold font.
    legend = ax.legend(fontsize=14)
    for text in legend.get_texts():
        text.set_fontweight('bold')

    # Ensure tick labels are larger and bold.
    ax.tick_params(axis='both', which='major', labelsize=14)
    for label in (ax.get_xticklabels() + ax.get_yticklabels()):
        label.set_fontweight('bold')

    # Adjust plot elements to fit into the figure area neatly.
    plt.tight_layout()

    # Save the figure, trimming any final excess whitespace.
    plt.savefig(
        output_filename,
        format='pdf',
        dpi=300,
        bbox_inches='tight'
    )
    print(f"Successfully saved plot to '{output_filename}'")

    # Close the figure to free up memory.
    plt.close(fig)


if __name__ == '__main__':
    # --- USER-CONFIGURABLE PARAMETERS ---
    output_file = '64_fig_4_distribution.pdf'
    
    # Set the desired figure dimensions (width, height) in inches.
    figure_dimensions = (10, 6)

    # Generate and save the plot.
    create_building_size_distribution_plot(
        output_filename=output_file,
        figsize=figure_dimensions
    )

Successfully saved plot to '64_fig_4_distribution.pdf'
