We generate samples to measure sample complexity for both $\lambda$ and $m^2$. First we generate samples, then check if the errors are less than our threshold value. If not, we will increase the number of samples by a factor of 1.1

In [None]:
using AdvancedHMC, ForwardDiff
using LogDensityProblems, LinearAlgebra, Random, Statistics, Optim, GalacticOptim, Plots

# Phi^4 model structure
struct Phi4Model2D
    lattice_size::Int
    lambda::Float64
    m2::Float64
    a::Float64
end

# Energy function for 2D lattice
function calculate_energy_2d(phi::Matrix{T}, model::Phi4Model2D) where T<:Real
    N = size(phi, 1)
    kinetic = 0.0
    potential = 0.0
    a = model.a
    for i in 1:N, j in 1:N
        i_right = mod1(i + 1, N)
        j_up = mod1(j + 1, N)
        kinetic += 0.5 * ((phi[i,j] - phi[i_right,j])^2 + (phi[i,j] - phi[i,j_up])^2) / a^2
        potential += 0.5 * model.m2 * phi[i,j]^2 + 0.25 * model.lambda * phi[i,j]^4
    end
    return kinetic + potential
end

# Log-density wrapper
struct Phi4TargetDensity2D
    dim::Int
    model::Phi4Model2D
end

function LogDensityProblems.logdensity(p::Phi4TargetDensity2D, phi_vec::Vector{T}) where T<:Real
    N = p.model.lattice_size
    phi = reshape(phi_vec, N, N)
    return -calculate_energy_2d(phi, p.model)
end

LogDensityProblems.dimension(p::Phi4TargetDensity2D) = p.model.lattice_size^2
LogDensityProblems.capabilities(::Type{Phi4TargetDensity2D}) = LogDensityProblems.LogDensityOrder{1}()

# HMC data generation
function generate_phi4_data_hmc_2d(model::Phi4Model2D, n_samples::Int, n_adapts::Int = 2000)
    N = model.lattice_size
    phi4_target = Phi4TargetDensity2D(N^2, model)
    init_phi = randn(N, N)
    metric = DiagEuclideanMetric(N^2)
    hamiltonian = Hamiltonian(metric, phi4_target, ForwardDiff)
    epsilon = find_good_stepsize(hamiltonian, vec(init_phi))
    integrator = Leapfrog(epsilon)
    kernel = HMCKernel(Trajectory{MultinomialTS}(integrator, GeneralisedNoUTurn()))
    adaptor = StanHMCAdaptor(MassMatrixAdaptor(metric), StepSizeAdaptor(0.8, integrator))

    samples_vec = redirect_stdout(devnull) do
        redirect_stderr(devnull) do
            sample(hamiltonian, kernel, vec(init_phi), n_samples, adaptor, n_adapts; progress=false)[1]
        end
    end

    return [reshape(s, N, N) for s in samples_vec]
end


# Score matching objective
function score_matching_objective_2d(lambda::Float64, m2::Float64, a::Float64, data::Vector{Matrix{Float64}})
    N = size(data[1], 1)
    obj = 0.0
    for phi in data
        for i in 1:N, j in 1:N
            x = phi[i,j]
            x_left = phi[mod1(i-1,N), j]
            x_right = phi[mod1(i+1,N), j]
            x_down = phi[i, mod1(j-1,N)]
            x_up = phi[i, mod1(j+1,N)]
            laplacian = -(4x - x_left - x_right - x_down - x_up) / a^2
            s = laplacian - (m2 * x + lambda * x^3)
            s_prime = -(m2 + 3 * lambda * x^2 + 4 / a^2)
            obj += 0.5 * s^2 + s_prime
        end
    end
    return obj / length(data)
end

# Parameter estimation
function estimate_parameters_score_matching_2d(data::Vector{Matrix{Float64}}, a::Float64)
    f(x) = score_matching_objective_2d(x[1], x[2], a, data)
    initial_x = [0.1, 0.1]
    result = optimize(f, initial_x, ParticleSwarm())
    x_opt = result.minimizer
    println("Estimated λ: ", x_opt[1], ", m²: ", x_opt[2])
    return x_opt[1], x_opt[2]
end

Code to illustrate complexity: We start with various values of $\lambda$, then increase the number of samples by a factor of $1.1$ until the error threshold is below $0.02$. Then we plot our results.

In [None]:
# -----------------------------
# Complexity Study 
# -----------------------------
println("\n=== Starting Top-Level Complexity Study ===")
λ_values = 0.5:0.05:2.5        # Range of λ values to test
target_error = 0.02           # Acceptable standard error for λ estimate
reps = 50                    # Number of repetitions per λ value
lattice_size = 4
m2_true = -0.5
a = 1.0

n_required = Float64[]        # Use Float64 to allow non-integer markers like NaN
λ_recorded = Float64[]

for λ in λ_values
    println("\nRunning λ = $λ")
    n_samples = 500.0         # Start as a float for multiplication
    max_samples = 1000000.0   # Also a float for consistency
    found = false

    while n_samples ≤ max_samples
        λ_estimates = Float64[]
        for trial in 1:reps
            model = Phi4Model2D(lattice_size, λ, m2_true, a)
            # Use round(Int, n_samples) to convert the current sample size to an integer when calling HMC:
            samples = generate_phi4_data_hmc_2d(model, round(Int, n_samples), 2000)
            λ_est, _ = estimate_parameters_score_matching_2d(samples, a)
            push!(λ_estimates, λ_est)
        end
        stderr = std(λ_estimates) / sqrt(reps)
        println("  n = $(n_samples) → stderr = $stderr")
        if stderr ≤ target_error
            println("  ✓ λ = $λ: n = $(n_samples) suffices")
            push!(n_required, round(Int, n_samples))
            push!(λ_recorded, λ)
            found = true
            break
        else
            n_samples *= 1.1
        end
    end

    if !found
        println("  ✗ λ = $λ: No sample size reached target error")
        push!(n_required, NaN)
        push!(λ_recorded, λ)
    end
end


Code to illustrate complexity for $m^2$: For a fixed $\lambda$, we vary $m^2$ then increase the number of samples by a factor of $1.1$ until the error threshold is below $0.02$. Then we plot our results.

In [None]:
using StatsBase, Plots, Random, Statistics, Optim

println("\n=== Starting Top-Level Complexity Study for m² ===")
# Use the range for m² values as given.
m2_values = -0.5:0.05:0.5      # Range of m² values to test.
target_error = 0.02         # Acceptable SEM for m² estimate.
reps = 10                    # Increase reps to get a meaningful SEM.
lattice_size = 4
λ_true = 1.0               # Fixed value for λ.
a = 1.0

n_required = Float64[]      # To store the required sample size for each m².
m2_recorded  = Float64[]     # To record the m² values corresponding to the study.

for m2 in m2_values
    println("\nRunning m² = $m2")
    n_samples = 500        # Start as a float for multiplication.
    max_samples = 1_000_000.0 # Maximum allowed number of samples.
    found = false
    println("Starting sample count: ", n_samples)
    
    while n_samples ≤ max_samples
        m2_estimates = Float64[]
        for trial in 1:reps
            println("  Trial $trial with n_samples = $n_samples")
            # Create the model with fixed λ_true, current m², and a.
            model = Phi4Model2D(lattice_size, λ_true, m2, a)
            
            # Generate samples; make sure generate_phi4_data_hmc_2d returns a vector of matrices.
            samples = generate_phi4_data_hmc_2d(model, round(Int, n_samples), 2000)
            println("    [DEBUG] Generated ", length(samples), " samples; type(samples) = ", typeof(samples))
            
            # Estimate parameters using score matching.
            # Pass samples directly if they are already a Vector of Matrix{Float64}
            _, m2_est = estimate_parameters_score_matching_2d(samples, a)
            push!(m2_estimates, m2_est)
            println("    Estimated m² = $(m2_est)")
        end

        # Compute the standard error (SEM) of the m² estimates.
        sem_m2 = std(m2_estimates) / sqrt(reps)
        println("  n = $(n_samples) → SEM(m²) = $sem_m2")
        
        if sem_m2 ≤ target_error
            println("  ✓ m² = $m2: n = $(n_samples) suffices")
            push!(n_required, round(Int, n_samples))
            push!(m2_recorded, m2)
            found = true
            break
        else
            n_samples *= 1.1
        end
    end

    if !found
        println("  ✗ m² = $m2: No sample size reached target error")
        push!(n_required, NaN)
        push!(m2_recorded, m2)
    end
end

println("\nFinal Results:")
println("m² values: ", m2_recorded)
println("Required sample sizes: ", n_required)

