In [None]:
using Revise
using Pkg; Pkg.activate(".")

In [None]:
using Unitful
using PotentialLearning
using Random: randperm
using JLD2
using InteratomicPotentials
using AtomsBase, AtomsCalculators
using Statistics
using CairoMakie, ColorSchemes

In [None]:
ensemble_members = load("ace_cmte1.jld2", "members")

In [None]:
includet("../files/committee_potentials.jl")
includet("../files/committee_qois.jl")

In [None]:
my_cmte = CommitteePotential(ensemble_members; energy_units=u"eV", length_units=u"Å")
cmte_energy = CmteEnergy(Statistics.std, strip_units=true)

In [None]:
datasets = load("datasets_with_descriptors.jld2")
pristine_base_calib_ds = datasets["pristine_base_calib_ds"]
pristine_base_test_ds = datasets["pristine_base_test_ds"]
frenkel_base_calib_ds = datasets["frenkel_base_calib_ds"]
frenkel_base_test_ds = datasets["frenkel_base_test_ds"]



Just doing a single qhat for a single energy

In [None]:
includet("../files/conformal_prediction_utils.jl")

In [None]:
# from subsampling_dpp.jl in PL.jl examples
function concat_dataset(confs::Vector{DataSet})
    N = length(confs)
    confs_vec = [[confs[i][j] for j = 1:length(confs[i])] for i = 1:N]
    confs_all = reduce(vcat, confs_vec)
    return DataSet(confs_all)
end

In [None]:
combined_calib_ds = concat_dataset([pristine_base_calib_ds; frenkel_base_calib_ds])
combined_test_ds = concat_dataset([pristine_base_test_ds; frenkel_base_test_ds])

In [None]:
ecalib_pred = [ustrip(PotentialLearning.potential_energy(sys,my_cmte)) for sys in combined_calib_ds]
ecalib_ref = [get_values(get_energy(config)) for config in combined_calib_ds]
calib_uq = [ustrip(compute(cmte_energy,config,my_cmte)) for config in combined_calib_ds]

In [None]:
qhat = calibrate(ecalib_pred, ecalib_ref, calib_uq, 0.1)

In [None]:
etest_pred = [ustrip(PotentialLearning.potential_energy(config,my_cmte)) for config in combined_test_ds]
etest_ref = [ustrip(get_values(get_energy(config))) for config in combined_test_ds]
test_uq = [ustrip(compute(cmte_energy, config, my_cmte)) for config in combined_test_ds]

num_test = length(etest_pred)
test_abs_residuals = abs.(etest_pred .- etest_ref)

In [None]:
qhat_scores = qhat*test_uq
coverage = sum(test_abs_residuals .> qhat_scores) / num_test

In [None]:
# Claude
function original_parity_plot(etest_ref, etest_pred, qhat_scored;
    title="Parity Plot",
    xlabel="Reference Values",
    ylabel="Predicted Values",
    figsize=(600, 600))
# Create figure and axis
fig = Figure(size=figsize)
ax = Axis(fig[1, 1],
title=title,
xlabel=xlabel,
ylabel=ylabel,
limits = (-5.0,-4.0,-5.0,-4.0))

# Calculate min and max for setting plot limits
min_val = min(minimum(etest_pred), minimum(etest_ref))
max_val = max(maximum(etest_pred), maximum(etest_ref))

# Add diagonal reference line
lines!(ax, [min_val, max_val], [min_val, max_val],
color=:red,
linestyle=:dash,
label="Perfect Prediction")

# Plot scatter with error bars
errorbars!(ax, etest_ref, etest_pred, qhat_scored,
whiskerwidth=1,  # Width of error bar caps
color=:cyan3)

# Scatter plot of points
scatter!(ax, etest_ref, etest_pred,
color=:teal,
markersize=10)

# Set equal aspect ratio
#ax.aspect = DataAspect()

# Add legend
axislegend(ax)

return fig
end

In [None]:
function custom_parity_plot(etest_ref, etest_pred, qhat_scored;
    title="Parity Plot Subset",
    xlabel="Reference Energies (eV)",
    ylabel="Predicted Energies(eV)",
    width=600,
    colormap=:viridis,
    color_value=0.6,
    marker_size=10,
    line_width=3.0,
    axis_color=:black,
    text_size=18,
    label_size=22,
    grid_visible=false,
    grid_color=(:gray, 0.3),
    grid_linewidth=0.5,
    errorbar_color=nothing,
    marker_color=nothing,
    diagonal_color=:red,
    diagonal_alpha=0.6,
    diagonal_style=:dash)

# Get color from colormap if no specific colors provided
diagonal_color = base_color = get(ColorSchemes.colorschemes[colormap], color_value)
marker_color = isnothing(marker_color) ?
                get(ColorSchemes.colorschemes[colormap], color_value+0.15) : marker_color
errorbar_color = isnothing(marker_color) ?
                get(ColorSchemes.colorschemes[colormap], color_value+0.25) : marker_color


# Create figure and axis with better formatting
fig = Figure(resolution=(1.75*width, width), fontsize=text_size, figure_padding=30)

# Calculate min and max for setting plot limits
min_val = min(minimum(etest_pred), minimum(etest_ref))
max_val = max(maximum(etest_pred), maximum(etest_ref))

min_val = -3257.35
max_val = -3257.15

## Add a small buffer to the limits to avoid cutting off points or labels
#buffer = (max_val - min_val) * 0.05
#plot_min = min_val - buffer
#plot_max = max_val + buffer

ax = Axis(fig[1, 1],
#title=title,
xlabel=xlabel,
ylabel=ylabel,
#limits=(plot_min, plot_max, plot_min, plot_max),
limits=(min_val, max_val, min_val-0.5, max_val+0.5),
titlesize=label_size,
xlabelsize=label_size,
ylabelsize=label_size,
xticklabelsize=text_size,
yticklabelsize=text_size,
spinewidth=1.5,
xgridvisible=grid_visible,
ygridvisible=grid_visible,
xgridcolor=grid_color,
ygridcolor=grid_color,
xgridwidth=grid_linewidth,
ygridwidth=grid_linewidth
)

# Set spine and tick colors
ax.bottomspinecolor = axis_color
ax.leftspinecolor = axis_color
ax.rightspinecolor = axis_color
ax.topspinecolor = axis_color

ax.xticklabelcolor = axis_color
ax.yticklabelcolor = axis_color
ax.xlabelcolor = axis_color
ax.ylabelcolor = axis_color
ax.titlecolor = axis_color

# Add diagonal reference line
#lines!(ax, [plot_min, plot_max], [plot_min, plot_max],
lines!(ax, [min_val,max_val], [min_val,max_val],

color=diagonal_color,
linestyle=diagonal_style,
linewidth=line_width - 1,  # Slightly thinner than main points
alpha=diagonal_alpha,
label="Perfect Prediction")

# Plot scatter with error bars
errorbars!(ax, etest_ref, etest_pred, qhat_scored,
whiskerwidth=6,  # Width of error bar caps
color=errorbar_color)

# Scatter plot of points
scatter!(ax, etest_ref, etest_pred,
color=marker_color,
markersize=marker_size)

# Set equal aspect ratio (usually important for parity plots)
#ax.aspect = DataAspect()

# Add legend with better formatting
#axislegend(ax, position=:lt, framevisible=true, framecolor=(:black, 0.2),
#padding=(10, 10, 10, 10), labelsize=text_size-2)

return fig
end

In [None]:
#parity_fig = custom_parity_plot(etest_ref,etest_pred,qhat_scores, colormap=:lipari, color_value=0.05,text_size=24, label_size=40)
#save("parity_figure.svg", parity_fig)

In [None]:
function custom_histogram(data;
    width=600,
    bins=500,
    colormap=:viridis,
    color_value=0.6,
    title="Histogram",
    xlabel="Value",
    ylabel="Frequency",
    fill_alpha=0.8,
    edge_linewidth=1.0,
    axis_color=:black,
    text_size=18,
    label_size=22,
    grid_visible=false,
    grid_color=(:gray, 0.3),
    grid_linewidth=0.5,
    bar_color=nothing,
    edge_color=nothing,
    normalize=false,
    kde=false,
    kde_linewidth=3.0,
    kde_color=:black)

# Get color from colormap if no specific colors provided
base_color = get(ColorSchemes.colorschemes[colormap], color_value)
bar_color = isnothing(bar_color) ? (base_color, fill_alpha) : bar_color
#edge_color = isnothing(edge_color) ? darker(base_color, 0.2) : edge_color

# Create figure and axis with better formatting
fig = Figure(resolution=(width, width), fontsize=text_size)

# Calculate sensible limits with buffer
data_min = minimum(data)
data_max = maximum(data)
#buffer = (data_max - data_min) * 0.05
#x_min = data_min - buffer
#x_max = data_max + buffer
x_min = -0.01
x_max = 1.0

# Create axis with formatting
ax = Axis(fig[1, 1],
#title=title,
xlabel=xlabel,
ylabel=ylabel,
xlabelsize=label_size,
ylabelsize=label_size,
titlesize=label_size,
xticklabelsize=text_size,
yticklabelsize=text_size,
spinewidth=1.5,
xgridvisible=grid_visible,
ygridvisible=grid_visible,
xgridcolor=grid_color,
ygridcolor=grid_color,
xgridwidth=grid_linewidth,
ygridwidth=grid_linewidth
)

# Set spine and tick colors

ax.bottomspinecolor = axis_color
ax.leftspinecolor = axis_color
ax.rightspinecolor = axis_color
ax.topspinecolor = axis_color

ax.xticks = 0:0.2:1.0
ax.yticks = 0:10:50

ax.xticklabelcolor = axis_color
ax.yticklabelcolor = axis_color
ax.xlabelcolor = axis_color
ax.ylabelcolor = axis_color
ax.titlecolor = axis_color

# Add extra padding to avoid cutting off labels
#fig.margin = 20

# Create the histogram
hist = hist!(ax, data,
bins=bins,
color=bar_color,
#strokecolor=edge_color,
strokecolor=bar_color,
strokewidth=edge_linewidth,
normalization=normalize ? :pdf : :none)

# Optionally add KDE curve
if kde
density = kde!(ax, data,
color=kde_color,
linewidth=kde_linewidth,
label="KDE")

# Add legend if KDE is used
axislegend(ax, position=:rt, framevisible=true,
framecolor=(:black, 0.2),
padding=(10, 10, 10, 10),
labelsize=text_size-2)
end

# Adjust x limits
ax.limits = (x_min, x_max, nothing, nothing)

return fig
end

In [None]:
#set_size_hist_fig = custom_histogram(qhat_scores; colormap=:lipari, color_value=0.2, text_size=24, label_size=28, xlabel="Interval Size (eV)", ylabel="Count", normalize=false)
#save("basic_set_size_histogram.svg", set_size_hist_fig)

In [None]:
hist(qhat_scores, bins=500)

In [None]:
#uncertainty_vs_residuals(test_uq,test_abs_residuals, limits=(0.0,0.05,-0.001,0.5))

In [None]:
calib_scores = abs.(ecalib_pred .- ecalib_ref) ./ calib_uq
alpha_complements = collect(range(0.01,0.99,step=0.01))
alpha_refs = 1 .- alpha_complements

alpha_pred = generate_predicted_alphas(calib_scores,test_uq, test_abs_residuals)

In [None]:
#function make_custom_calibration_plot(expected_ps, observed_ps;
#    width=600,
#    main_line_color=:blue,
#    main_line_width=3.0,
#    band_color=(:blue, 0.2),
#    axis_color=:black,
#    text_size=18,
#    label_size=22)
## Convert to percentages
#expected_ps = expected_ps .* 100
#observed_ps = observed_ps .* 100
#
#fig = Figure(resolution=(width, width), fontsize=text_size)
#ax = Axis(fig[1, 1],
#aspect=DataAspect(),
#xlabel="Expected conf. level",
#ylabel="Observed conf. level",
#limits=(0, 100, 0, 100),
#xlabelsize=label_size,
#ylabelsize=label_size,
#xticklabelsize=text_size,
#yticklabelsize=text_size,
#spinewidth=1.5,
#xgridvisible=false,
#ygridvisible=false
#)
#
## Set spine and tick colors
#ax.spinecolor = axis_color
#ax.xticklabelcolor = axis_color
#ax.yticklabelcolor = axis_color
#ax.xlabelcolor = axis_color
#ax.ylabelcolor = axis_color
#
## Main line - made bolder
#lines!(ax, 1.0 .- expected_ps, observed_ps, color=main_line_color, linewidth=main_line_width)
#
## Diagonal reference line
#lines!(ax, 1.0 .- expected_ps, 1.0 .- expected_ps, linestyle=:dash, alpha=0.6, linewidth=1.5)
#
## Filled area between curves
#band!(ax, 1.0 .- expected_ps, 1.0 .- expected_ps, observed_ps, color=band_color)
#
#
## Configure ticks - approximately 4 ticks on each axis
#ax.xticks = 0:20:100
#ax.yticks = 0:20:100
#
## Add percentage signs to ticks
#ax.xtickformat = xs -> ["$(Int(x))%" for x in xs]
#ax.ytickformat = xs -> ["$(Int(x))%" for x in xs]
#
#return fig
#end

In [None]:
using ColorSchemes

function make_custom_calibration_plot(expected_ps, observed_ps;
                                      width=600,
                                      colormap=:viridis,
                                      color_value=0.6,  # Value between 0-1 in the colormap
                                      main_line_width=3.0,
                                      band_alpha=0.2,
                                      axis_color=:black,
                                      text_size=18,
                                      label_size=22,
                                      grid_visible=true,
                                      grid_color=(:gray, 0.3),
                                      grid_width=0.5)
    # Convert to percentages
    #expected_ps = expected_ps .* 100
    #observed_ps = observed_ps .* 100

    expected_ps = (1.0 .- expected_ps).* 100
    observed_ps = (1.0 .- observed_ps).* 100
    # Get color from colormap
    color = get(ColorSchemes.colorschemes[colormap], color_value)
    band_color = (color, band_alpha)

    fig = Figure(resolution=(width, width), fontsize=text_size, figure_padding=30)
    ax = Axis(fig[1, 1],
        aspect=DataAspect(),
        xlabel="Expected Confidence Level",
        ylabel="Observed Confidence Level",
        limits=(0, 100, 0, 100),
        xlabelsize=label_size,
        ylabelsize=label_size,
        xticklabelsize=text_size,
        yticklabelsize=text_size,
        spinewidth=1.5,
        xgridvisible=grid_visible,
        ygridvisible=grid_visible,
        xgridcolor=grid_color,
        ygridcolor=grid_color,
        xgridwidth=grid_width,
        ygridwidth=grid_width
    )

    #Set spine and tick colors
    ax.bottomspinecolor = axis_color
    ax.leftspinecolor = axis_color
    ax.rightspinecolor = axis_color
    ax.topspinecolor = axis_color

    ax.xticklabelcolor = axis_color
    ax.yticklabelcolor = axis_color
    ax.xlabelcolor = axis_color
    ax.ylabelcolor = axis_color

    # Main line - made bolder
    lines!(ax, expected_ps, observed_ps, color=:black, linewidth=main_line_width)

    # Diagonal reference line
    lines!(ax, expected_ps, expected_ps, linestyle=:dash, color=:black, alpha=0.6, linewidth=1.5)

    # Filled area between curves
    band!(ax, expected_ps, expected_ps, observed_ps, color=band_color)
    #band!(ax, expected_ps, expected_ps, observed_ps, color=(:blue, 0.2))

    # Configure ticks
    ax.xticks = 0:20:100
    ax.yticks = 0:20:100

    # Add percentage signs to ticks
    ax.xtickformat = xs -> ["$(Int(x))%" for x in xs]
    ax.ytickformat = xs -> ["$(Int(x))%" for x in xs]

    return fig
end

In [None]:
# converted from Medford jupyter notebook via Claude
function make_original_calibration_plot(expected_ps, observed_ps; width=600)
    # Convert to percentages
    expected_ps = expected_ps .* 100
    observed_ps = observed_ps .* 100

    fig = Figure(resolution=(width, width))
    ax = Axis(fig[1, 1],
        aspect=DataAspect(),
        xlabel="Expected conf. level",
        ylabel="Observed conf. level",
        limits=(0, 100, 0, 100)
    )

    # Main line
    lines!(ax, 1.0 .- expected_ps, observed_ps)

    # Diagonal reference line
    lines!(ax, 1.0 .-expected_ps, 1.0 .-expected_ps, linestyle=:dash, alpha=0.4)

    # Filled area between curves
    band!(ax, expected_ps, expected_ps, observed_ps, color=(:blue, 0.2))

    # Configure ticks - approximately 4 ticks on each axis
    ax.xticks = 0:10:100
    ax.yticks = 0:10:100

    # Add percentage signs to ticks
    ax.xtickformat = xs -> ["$(Int(x))%" for x in xs]
    ax.ytickformat = xs -> ["$(Int(x))%" for x in xs]

    ## Add text for miscalibration area
    #text!(ax, "miscalc. area = $(round(area, digits=3))",
    #    position=(8, 2),
    #    align=(:left, :bottom)
    #)

    return fig
end

In [None]:
fig = make_custom_calibration_plot(alpha_refs,alpha_pred; main_line_width=2.0, colormap=:lipari, color_value=0.1, text_size=24, label_size=28)
save("just_energies_sample_calibration_plot.svg",fig, pt_per_unit=1)
#save("just_energies_sample_calibration_plot.png",fig, px_per_unit=3, dpi=300)

In [None]:
compute_miscalibration_area(alpha_refs,alpha_pred)

Generating coverage samples 
1. First combine all data sets 
2. inside a function will want 
    - generate random indices, spit 50/50 
    - compute ecalib_pred, ecalib_ref, calib_uq --> calib_scores 
    - compute etest_pred, etest_ref --> test_abs_residuals
    - compute test_scores, coverage

In [None]:
combined_dataset = concat_dataset([combined_calib_ds; combined_test_ds])

In [None]:
all_reference_energies = [get_values(get_energy(config)) for config in combined_dataset]
all_prediceted_energies = [ustrip(PotentialLearning.potential_energy(config,my_cmte)) for config in combined_dataset]
all_uq = [ustrip(compute(cmte_energy,config,my_cmte)) for config in combined_dataset]

In [None]:
function compute_coverage(ref_energies, pred_energies, uq_vals, num_calib=1500; alpha=0.05)
    total_num = length(ref_energies)
    random_idxs = randperm(total_num)
    calib_idxs = random_idxs[1:num_calib]
    test_idxs = random_idxs[num_calib+1:end]

    #ecalib_pred = [ustrip(PotentialLearning.potential_energy(sys,my_cmte)) for sys in ds[calib_idxs]]
    #ecalib_ref = [get_values(get_energy(config)) for config in ds[calib_idxs]]
    #calib_uq = [ustrip(compute(cmte_energy,config,my_cmte)) for config in ds[calib_idxs]]

    ecalib_pred = pred_energies[calib_idxs]
    ecalib_ref = ref_energies[calib_idxs]
    calib_uq = uq_vals[calib_idxs]

    qhat = calibrate(ecalib_pred,ecalib_ref, calib_uq, alpha)

    #etest_pred = [ustrip(PotentialLearning.potential_energy(config,my_cmte)) for config in ds[test_idxs]]
    #etest_ref = [ustrip(get_values(get_energy(config))) for config in ds[test_idxs]]
    #test_uq = [ustrip(compute(cmte_energy, config, my_cmte)) for config in ds[test_idxs]]

    etest_pred = pred_energies[test_idxs]
    etest_ref = ref_energies[test_idxs]
    test_uq = uq_vals[test_idxs]

    num_test = length(etest_pred)
    test_abs_residuals = abs.(etest_pred .- etest_ref)

    qhat_scores = qhat* test_uq

    pred_alpha = sum(test_abs_residuals .> qhat_scores)/num_test
    coverage = 100 * (1-pred_alpha)
    return coverage
end

In [None]:
#coverage = compute_coverage(combined_dataset,my_cmte,1500;alpha=0.05)
coverage = compute_coverage(all_reference_energies,all_prediceted_energies, all_uq, 1500; alpha=0.05)

In [None]:
coverages1 = Float64[]
for i in 1:100000
    coverage = compute_coverage(all_reference_energies,all_prediceted_energies, all_uq, 1500; alpha=0.05)
    push!(coverages1,coverage)
end

In [None]:
coverages2 = Float64[]
for i in 1:100000
    coverage = compute_coverage(all_reference_energies,all_prediceted_energies, all_uq, 750; alpha=0.05)
    push!(coverages2,coverage)
end

In [None]:
hist(coverages2, bins=50)

In [None]:
function custom_histogram2(data1, data2;
    width=600,
    bins=50,
    colormap=:viridis,
    color_value=0.6,
    title="Histogram",
    xlabel="Value",
    ylabel="Frequency",
    fill_alpha=0.5,
    edge_linewidth=1.0,
    axis_color=:black,
    text_size=18,
    label_size=22,
    grid_visible=false,
    grid_color=(:gray, 0.3),
    grid_linewidth=0.5,
    bar_color=nothing,
    edge_color=nothing,
    normalize=false,
    kde=false,
    kde_linewidth=3.0,
    kde_color=:black)

# Get color from colormap if no specific colors provided
base_color1 = get(ColorSchemes.colorschemes[colormap], color_value)
bar_color1 = isnothing(bar_color) ? (base_color1, fill_alpha) : bar_color

base_color2 = get(ColorSchemes.colorschemes[colormap], color_value+0.3)
bar_color2 = isnothing(bar_color) ? (base_color2, fill_alpha) : bar_color

#edge_color = isnothing(edge_color) ? darker(base_color, 0.2) : edge_color

# Create figure and axis with better formatting
fig = Figure(resolution=(width, width), fontsize=text_size)

# Calculate sensible limits with buffer
data_min = minimum([data1;data2])
data_max = maximum([data1;data2])
#buffer = (data_max - data_min) * 0.05
#x_min = data_min - buffer
#x_max = data_max + buffer
x_min = data_min
x_max = data_max
#x_min = -0.01
#x_max = 1.0

# Create axis with formatting
ax = Axis(fig[1, 1],
#title=title,
xlabel=xlabel,
ylabel=ylabel,
xlabelsize=label_size,
ylabelsize=label_size,
titlesize=label_size,
xticklabelsize=text_size,
yticklabelsize=text_size,
spinewidth=1.5,
xgridvisible=grid_visible,
ygridvisible=grid_visible,
xgridcolor=grid_color,
ygridcolor=grid_color,
xgridwidth=grid_linewidth,
ygridwidth=grid_linewidth
)

# Set spine and tick colors

ax.bottomspinecolor = axis_color
ax.leftspinecolor = axis_color
ax.rightspinecolor = axis_color
ax.topspinecolor = axis_color

ax.xticks = 0:0.2:1.0
ax.yticks = 0:10:50

ax.xticklabelcolor = axis_color
ax.yticklabelcolor = axis_color
ax.xlabelcolor = axis_color
ax.ylabelcolor = axis_color
ax.titlecolor = axis_color

# Add extra padding to avoid cutting off labels
#fig.margin = 20

# Create the histogram
hist = hist!(ax, data1,
bins=bins,
color=bar_color1,
#strokecolor=edge_color,
strokecolor=bar_color1,
strokewidth=edge_linewidth,
normalization=normalize ? :pdf : :none)

hist = hist!(ax, data2,
bins=bins,
color=bar_color2,
#strokecolor=edge_color,
strokecolor=bar_color2,
strokewidth=edge_linewidth,
normalization=normalize ? :pdf : :none)

# Optionally add KDE curve
#if kde
#density = kde!(ax, data,
#color=kde_color,
#linewidth=kde_linewidth,
#label="KDE")
#
## Add legend if KDE is used
#axislegend(ax, position=:rt, framevisible=true,
#framecolor=(:black, 0.2),
#padding=(10, 10, 10, 10),
#labelsize=text_size-2)
#end

# Adjust x limits
ax.limits = (x_min, x_max, nothing, nothing)

return fig
end

In [None]:
custom_histogram2(coverages1, coverages2)

In [None]:
function custom_histogram_v2(data;
    width=600,
    bins=50,
    colormap=:viridis,
    color_value=0.6,
    title="Histogram",
    xlabel="Value",
    ylabel="Frequency",
    fill_alpha=0.8,
    edge_linewidth=1.0,
    axis_color=:black,
    text_size=18,
    label_size=22,
    grid_visible=false,
    grid_color=(:gray, 0.3),
    grid_linewidth=0.5,
    bar_color=nothing,
    edge_color=nothing,
    normalize=false,
    kde=false,
    kde_linewidth=3.0,
    kde_color=:black)

# Get color from colormap if no specific colors provided
base_color = get(ColorSchemes.colorschemes[colormap], color_value)
bar_color = isnothing(bar_color) ? (base_color, fill_alpha) : bar_color
#edge_color = isnothing(edge_color) ? darker(base_color, 0.2) : edge_color

# Create figure and axis with better formatting
fig = Figure(resolution=(width, width), fontsize=text_size)

# Calculate sensible limits with buffer
data_min = minimum(data)
data_max = maximum(data)
#buffer = (data_max - data_min) * 0.05
#x_min = data_min - buffer
#x_max = data_max + buffer
x_min = data_min
x_max = data_max

# Create axis with formatting
ax = Axis(fig[1, 1],
#title=title,
xlabel=xlabel,
ylabel=ylabel,
xlabelsize=label_size,
ylabelsize=label_size,
titlesize=label_size,
xticklabelsize=text_size,
yticklabelsize=text_size,
spinewidth=1.5,
xgridvisible=grid_visible,
ygridvisible=grid_visible,
xgridcolor=grid_color,
ygridcolor=grid_color,
xgridwidth=grid_linewidth,
ygridwidth=grid_linewidth
)

# Set spine and tick colors

ax.bottomspinecolor = axis_color
ax.leftspinecolor = axis_color
ax.rightspinecolor = axis_color
ax.topspinecolor = axis_color

#ax.xticks = 0:0.2:1.0
#ax.yticks = 0:10:50
ax.xticks = 92:1:98

ax.xticklabelcolor = axis_color
ax.yticklabelcolor = axis_color
ax.xlabelcolor = axis_color
ax.ylabelcolor = axis_color
ax.titlecolor = axis_color

# Add extra padding to avoid cutting off labels
#fig.margin = 20

# Create the histogram
hist = hist!(ax, data,
bins=bins,
color=bar_color,
#strokecolor=edge_color,
strokecolor=bar_color,
strokewidth=edge_linewidth,
normalization=normalize ? :pdf : :none)

# Optionally add KDE curve
if kde
density = kde!(ax, data,
color=kde_color,
linewidth=kde_linewidth,
label="KDE")

# Add legend if KDE is used
axislegend(ax, position=:rt, framevisible=true,
framecolor=(:black, 0.2),
padding=(10, 10, 10, 10),
labelsize=text_size-2)
end

# Adjust x limits
#ax.limits = (x_min-0.5, x_max+0.5, nothing, nothing)
ax.limits = (92-0.5, 98+0.5, nothing, nothing)

ax.xtickformat = xs -> ["$(Int(x))%" for x in xs]
#ax.ytickformat = xs -> ["$(Int(x))%" for x in xs]

return fig
end

In [None]:
coverage_freq_fig = custom_histogram_v2(coverages1; colormap=:lipari, color_value=0.2, text_size=24, label_size=28, normalize=true, xlabel="Coverage", ylabel="Normalized Frequency")
save("coverage_frequency_histogram.svg", coverage_freq_fig)