diff --git a/docs/src/lowlevel.md b/docs/src/lowlevel.md
index fe2bbd0..3d8d0f3 100644
--- a/docs/src/lowlevel.md
+++ b/docs/src/lowlevel.md
@@ -121,6 +121,29 @@ ACCEPTANCE_QUANTILES
 explore_local_acceptance_ratios
 ```
 
+## Reporting information during runs
+
+Samplers take an [`AbstractReport`](@ref) argument, which is then used for reporting. The interface is as follows.
+
+```@docs
+DynamicHMC.AbstractReport
+DynamicHMC.report!
+DynamicHMC.start_progress!
+DynamicHMC.end_progress!
+```
+
+The default is
+```@docs
+ReportIO
+```
+
+Reporting information can be suppressed with
+```@docs
+ReportSilent
+```
+
+Other interfaces should define similar types.
+
 ## Utilities and miscellanea
 
 ```@docs
diff --git a/src/DynamicHMC.jl b/src/DynamicHMC.jl
index 9c1874b..cbd8aee 100644
--- a/src/DynamicHMC.jl
+++ b/src/DynamicHMC.jl
@@ -5,1191 +5,19 @@ import Base: rand, length, show
 import Base.LinAlg.checksquare
 
 using ArgCheck: @argcheck
-import Compat                   # for DomainError(val, msg) in v0.6
-using DataStructures
+using Compat: DomainError       # for DomainError(val, msg) in v0.6
+using DataStructures: counter
 using DiffResults: value, gradient
 using DocStringExtensions: SIGNATURES, FIELDS
 using Parameters: @unpack
 import StatsFuns: logsumexp
 
-export
-    # Hamiltonian
-    KineticEnergy, EuclideanKE, GaussianKE,
-    # transition
-    NUTS_Transition, get_position, get_neg_energy, get_depth, get_termination,
-    get_acceptance_rate, get_steps, NUTS, mcmc, mcmc_adapting_ϵ,
-    # tuning and diagnostics
-    NUTS_init_tune_mcmc, sample_cov, EBFMI, NUTS_statistics, get_position_matrix
-
-
-# Hamiltonian and leapfrog
-
-"""
-Kinetic energy specifications.
-
-For all subtypes, it is assumed that kinetic energy is symmetric in
-the momentum `p`, ie.
-
-```julia
-neg_energy(::KineticEnergy, p, q) == neg_energy(::KineticEnergy, -p, q)
-```
-
-When the above is violated, various implicit assumptions will not hold.
-"""
-abstract type KineticEnergy end
-
-"Euclidean kinetic energies (position independent)."
-abstract type EuclideanKE <: KineticEnergy end
-
-"""
-Gaussian kinetic energy.
-
-```math
-p \\mid q ∼ \\text{Normal}(0, M) \\qquad (\\text{importantly, independent of \$q\$})
-```
-
-The inverse covariance `M⁻¹` is stored.
-"""
-struct GaussianKE{T <: AbstractMatrix, S <: AbstractMatrix} <: EuclideanKE
-    "M⁻¹"
-    Minv::T
-    "W such that W*W'=M. Used for generating random draws."
-    W::S
-    function GaussianKE{T, S}(Minv, W) where {T, S}
-        @argcheck checksquare(Minv) == checksquare(W)
-        new(Minv, W)
-    end
-end
-
-GaussianKE(M::T, W::S) where {T,S} = GaussianKE{T,S}(M, W)
-
-"""
-    GaussianKE(M⁻¹::AbstractMatrix)
-
-Gaussian kinetic energy with the given inverse covariance matrix `M⁻¹`.
-"""
-GaussianKE(Minv::AbstractMatrix) = GaussianKE(Minv, inv(chol(Minv)))
-
-"""
-    GaussianKE(N::Int, [m⁻¹ = 1.0])
-
-Gaussian kinetic energy with a diagonal inverse covariance matrix `M⁻¹=m⁻¹*I`.
-"""
-GaussianKE(N::Int, m⁻¹ = 1.0) = GaussianKE(Diagonal(fill(m⁻¹, N)))
-
-show(io::IO, κ::GaussianKE) =
-    print(io::IO, "Gaussian kinetic energy, √diag(M⁻¹): $(.√(diag(κ.Minv)))")
-
-"""
-    neg_energy(κ, p, [q])
-
-Return the log density of kinetic energy `κ`, at momentum `p`. Some kinetic
-energies (eg Riemannian geometry) will need `q`, too.
-"""
-neg_energy(κ::GaussianKE, p, q = nothing) = -dot(p, κ.Minv * p) / 2
-
-"""
-    get_p♯(κ, p, [q])
-
-Return ``p\sharp``, used for turn diagnostics.
-"""
-get_p♯(κ::GaussianKE, p, q = nothing) = κ.Minv * p
-
-loggradient(κ::GaussianKE, p, q = nothing) = -get_p♯(κ, p)
-
-rand(rng, κ::GaussianKE, q = nothing) = κ.W * randn(rng, size(κ.W, 1))
-
-"""
-    Hamiltonian(ℓ, κ)
-
-Construct a Hamiltonian from the log density `ℓ`, and the kinetic energy
-specification `κ`. Calls of `ℓ` with a vector are expected to return a value
-that supports `DiffResults.value` and `DiffResults.gradient`.
-"""
-struct Hamiltonian{Tℓ, Tκ}
-    "The (log) density we are sampling from."
-    ℓ::Tℓ
-    "The kinetic energy."
-    κ::Tκ
-end
-
-show(io::IO, H::Hamiltonian) = print(io, "Hamiltonian with $(H.κ)")
-
-"""
-    is_valid_ℓq(ℓq)
-
-Test that a value returned by ℓ is *valid*, in the following sense:
-
-1. supports `DiffResults.value` and `DiffResults.gradient` (when not, a
-`MethodError` is thrown),
-
-2. the value is a float, either `-Inf` or finite,
-
-3. the gradient is finite when the value is; otherwise the gradient is ignored.
-"""
-function is_valid_ℓq(ℓq)
-    v = value(ℓq)
-    v isa AbstractFloat || return false
-    (v == -Inf) || (isfinite(v) && all(isfinite, gradient(ℓq)))
-end
-
-"""
-A point in phase space, consists of a position and a momentum.
-
-Log densities and gradients are saved for speed gains, so that the gradient of ℓ
-at q is not calculated twice for every leapfrog step (both as start- and
-endpoints).
-
-Because of caching, a `PhasePoint` should only be used with a specific
-Hamiltonian.
-"""
-struct PhasePoint{T,S}
-    "Position."
-    q::T
-    "Momentum."
-    p::T
-    "ℓ(q). Cached for reuse in sampling."
-    ℓq::S
-    function PhasePoint(q::T, p::T, ℓq::S) where {T,S}
-        @argcheck is_valid_ℓq(ℓq) DomainError("Invalid value of ℓ.")
-        @argcheck length(p) == length(q)
-        new{T,S}(q, p, ℓq)
-    end
-end
-
-"""
-    get_ℓq(z)
-
-The value returned by `ℓ` when evaluated at position `q`.
-"""
-get_ℓq(z::PhasePoint) = z.ℓq
-
-"""
-    phasepoint_in(H::Hamiltonian, q, p)
-
-The recommended interface for creating a phase point in a Hamiltonian. Computes
-cached values.
-"""
-phasepoint_in(H::Hamiltonian, q, p) = PhasePoint(q, p, H.ℓ(q))
-
-"""
-    rand_phasepoint(rng, H, q)
-
-Extend a position `q` to a phasepoint with a random momentum according to the
-kinetic energy of `H`.
-"""
-rand_phasepoint(rng, H, q) = phasepoint_in(H, q, rand(rng, H.κ))
-
-"""
-    $SIGNATURES
-
-Log density for Hamiltonian `H` at point `z`.
-
-If `ℓ(q) == -Inf` (rejected), ignores the kinetic energy.
-"""
-function neg_energy(H::Hamiltonian, z::PhasePoint)
-    v = value(get_ℓq(z))
-    v == -Inf ? v : (v + neg_energy(H.κ, z.p, z.q))
-end
-
-get_p♯(H::Hamiltonian, z::PhasePoint) = get_p♯(H.κ, z.p, z.q)
-
-"""
-    leapfrog(H, z, ϵ)
-
-Take a leapfrog step of length `ϵ` from `z` along the Hamiltonian `H`.
-
-Return the new position.
-
-The leapfrog algorithm uses the gradient of the next position to evolve the
-momentum. If this is not finite, the momentum won't be either. Since the
-constructor `PhasePoint` validates its arguments, this can only happen for
-divergent points anyway, and should not cause a problem.
-"""
-function leapfrog{Tℓ, Tκ <: EuclideanKE}(H::Hamiltonian{Tℓ,Tκ}, z::PhasePoint, ϵ)
-    @unpack ℓ, κ = H
-    @unpack p, q, ℓq = z
-    pₘ = p + ϵ/2 * gradient(ℓq)
-    q′ = q - ϵ * loggradient(κ, pₘ)
-    ℓq′ = ℓ(q′)
-    p′ = pₘ + ϵ/2 * gradient(ℓq′)
-    PhasePoint(q′, p′, ℓq′)
-end
-
-
-# stepsize heuristics and adaptation
-
-"""
-Parameters for the search algorithm for the initial stepsize.
-
-The algorithm finds an initial stepsize ``ϵ`` so that the local acceptance ratio
-``A(ϵ)`` satisfies
-
-```math
-a_\\text{min} ≤ A(ϵ) ≤ a_\\text{max}
-```
-
-This is achieved by an initial bracketing, then bisection.
-
-$FIELDS
-
-!!! note
-
-    Cf. Hoffman and Gelman (2014), which does not ensure bounds for the
-    acceptance ratio, just that it has crossed a threshold. This version seems
-    to work better for some tricky posteriors with high curvature.
-"""
-struct InitialStepsizeSearch
-    "Lowest local acceptance rate."
-    a_min::Float64
-    "Highest local acceptance rate."
-    a_max::Float64
-    "Initial stepsize."
-    ϵ₀::Float64
-    "Scale factor for initial bracketing, > 1. *Default*: `2.0`."
-    C::Float64
-    "Maximum number of iterations for initial bracketing."
-    maxiter_crossing::Int
-    "Maximum number of iterations for bisection."
-    maxiter_bisect::Int
-    function InitialStepsizeSearch(; a_min = 0.25, a_max = 0.75, ϵ₀ = 1.0,
-                                   C = 2.0,
-                                   maxiter_crossing = 400, maxiter_bisect = 400)
-        @argcheck 0 < a_min < a_max < 1
-        @argcheck 0 < ϵ₀
-        @argcheck 1 < C
-        @argcheck maxiter_crossing ≥ 50
-        @argcheck maxiter_bisect ≥ 50
-        new(a_min, a_max, ϵ₀, C, maxiter_crossing, maxiter_bisect)
-    end
-end
-
-"""
-Find the stepsize for which the local acceptance rate `A(ϵ)` crosses `a`.
-
-    $SIGNATURES
-
-Return `ϵ₀, A(ϵ₀), ϵ₁`, A(ϵ₁)`, where `ϵ₀` and `ϵ₁` are stepsizes before and
-after crossing `a` with `A(ϵ)`, respectively.
-
-Assumes that ``A(ϵ₀) ∉ (a_\\text{min}, a_\\text{max})``, where the latter are
-defined in `parameters`.
-
-- `parameters`: parameters for the iteration.
-
-- `A`: local acceptance ratio (uncapped), a function of stepsize `ϵ`
-
-- `ϵ₀`, `Aϵ₀`: initial value of `ϵ`, and `A(ϵ₀)`
-"""
-function find_crossing_stepsize(parameters, A, ϵ₀, Aϵ₀ = A(ϵ₀))
-    @unpack a_min, a_max, C, maxiter_crossing = parameters
-    s, a = Aϵ₀ > a_max ? (1.0, a_max) : (-1.0, a_min)
-    if s < 0                    # when A(ϵ) < a,
-        C = 1/C                 # decrease ϵ
-    end
-    for _ in 1:maxiter_crossing
-        ϵ = ϵ₀ * C
-        Aϵ = A(ϵ)
-        if s*(Aϵ - a) ≤ 0
-            return ϵ₀, Aϵ₀, ϵ, Aϵ
-        else
-            ϵ₀ = ϵ
-            Aϵ₀ = Aϵ
-        end
-    end
-    # should never each this, miscoded log density?
-    dir = s > 0 ? "below" : "above"
-    error("Reached maximum number of iterations searching for ϵ from $(dir).")
-end
-
-"""
-Return the desired stepsize `ϵ` by bisection.
-
-    $SIGNATURES
-
-- `parameters`: algorithm parameters, see [`InitialStepsizeSearch`](@ref)
-
-- `A`: local acceptance ratio (uncapped), a function of stepsize `ϵ`
-
-- `ϵ₀`, `ϵ₁`, `Aϵ₀`, `Aϵ₁`: stepsizes and acceptance rates (latter optional).
-
-This function assumes that ``ϵ₀ < ϵ₁``, the stepsize is not yet acceptable, and
-the cached `A` values have the correct ordering.
-"""
-function bisect_stepsize(parameters, A, ϵ₀, ϵ₁, Aϵ₀ = A(ϵ₀), Aϵ₁ = A(ϵ₁))
-    @unpack a_min, a_max, maxiter_bisect = parameters
-    @argcheck ϵ₀ < ϵ₁
-    @argcheck Aϵ₀ > a_max && Aϵ₁ < a_min
-    for _ in 1:maxiter_bisect
-        ϵₘ = middle(ϵ₀, ϵ₁)
-        Aϵₘ = A(ϵₘ)
-        if a_min ≤ Aϵₘ ≤ a_max  # in
-            return ϵₘ
-        elseif Aϵₘ < a_min      # above
-            ϵ₁ = ϵₘ
-            Aϵ₁ = Aϵₘ
-        else                    # below
-            ϵ₀ = ϵₘ
-            Aϵ₀ = Aϵₘ
-        end
-    end
-    # should never each this, miscoded log density?
-    error("Reached maximum number of iterations while bisecting interval for ϵ.")
-end
-
-"""
-    $SIGNATURES
-
-Find an initial stepsize that matches the conditions of `parameters` (see
-[`InitialStepsizeSearch`](@ref)).
-
-`A` is the local acceptance ratio (uncapped). When given a Hamiltonian `H` and a
-phasepoint `z`, it will be calculated using [`local_acceptance_ratio`](@ref).
-"""
-function find_initial_stepsize(parameters::InitialStepsizeSearch, A)
-    @unpack a_min, a_max, ϵ₀ = parameters
-    Aϵ₀ = A(ϵ₀)
-    if a_min ≤ Aϵ₀ ≤ a_max
-        ϵ₀
-    else
-        ϵ₀, Aϵ₀, ϵ₁, Aϵ₁ = find_crossing_stepsize(parameters, A, ϵ₀, Aϵ₀)
-        if a_min ≤ Aϵ₁ ≤ a_max  # in interval
-            ϵ₁
-        elseif ϵ₀ < ϵ₁          # order as necessary
-            bisect_stepsize(parameters, A, ϵ₀, ϵ₁, Aϵ₀, Aϵ₁)
-        else
-            bisect_stepsize(parameters, A, ϵ₁, ϵ₀, Aϵ₁, Aϵ₀)
-        end
-    end
-end
-
-find_initial_stepsize(parameters::InitialStepsizeSearch, H, z) =
-    find_initial_stepsize(parameters, local_acceptance_ratio(H, z))
-
-"""
-    $(SIGNATURES)
-
-Return a function of the stepsize (``ϵ``) that calculates the local acceptance
-ratio for a single leapfrog step around `z` along the Hamiltonian `H`. Formally,
-let
-
-```math
-A(ϵ) = \\exp(\\text{neg_energy}(H, \\text{leapfrog}(H, z, ϵ)) - \\text{neg_energy}(H, z))
-```
-
-Note that the ratio is not capped by `1`, so it is not a valid probability
-*per se*.
-"""
-function local_acceptance_ratio(H, z)
-    target = neg_energy(H, z)
-    isfinite(target) ||
-        throw(DomainError(z.p, "Starting point has non-finite density."))
-    ϵ -> exp(neg_energy(H, leapfrog(H, z, ϵ)) - target)
-end
-
-"""
-    $(SIGNATURES)
-
-Return a matrix of [`local_acceptance_ratio`](@ref) values for stepsizes `ϵs`
-and the given momentums `ps`. The latter is calculated from random values when
-an integer is given.
-
-To facilitate plotting, ``-∞`` values are replaced by `NaN`.
-"""
-function explore_local_acceptance_ratios(H, q, ϵs, ps)
-    R = hcat([local_acceptance_ratio(H, q, p).(ϵs) for p in ps]...)
-    R[isinfinite.(R)] .= NaN
-    R
-end
-
-explore_local_acceptance_ratios(H, q, ϵs, N::Int) =
-    explore_local_acceptance_ratios(H, q, ϵs, [rand(H.κ) for _ in 1:N])
-
-"""
-Parameters for the dual averaging algorithm of Gelman and Hoffman (2014,
-Algorithm 6).
-
-To get reasonable defaults, initialize with
-`DualAveragingParameters(logϵ₀)`. See [`adapting_ϵ`](@ref) for a joint
-constructor.
-"""
-struct DualAveragingParameters{T}
-    μ::T
-    "target acceptance rate"
-    δ::T
-    "regularization scale"
-    γ::T
-    "relaxation exponent"
-    κ::T
-    "offset"
-    t₀::Int
-    function DualAveragingParameters{T}(μ, δ, γ, κ, t₀) where {T}
-        @argcheck 0 < δ < 1
-        @argcheck γ > 0
-        @argcheck 0.5 < κ ≤ 1
-        @argcheck t₀ ≥ 0
-        new(μ, δ, γ, κ, t₀)
-    end
-end
-
-DualAveragingParameters(μ::T, δ::T, γ::T, κ::T, t₀::Int) where T =
-    DualAveragingParameters{T}(μ, δ, γ, κ, t₀)
-
-DualAveragingParameters(logϵ₀; δ = 0.8, γ = 0.05, κ = 0.75, t₀ = 10) =
-    DualAveragingParameters(promote(log(10) + logϵ₀, δ, γ, κ)..., t₀)
-
-"Current state of adaptation for `ϵ`. Use `DualAverageingAdaptation(logϵ₀)` to
-get an initial value. See [`adapting_ϵ`](@ref) for a joint constructor."
-struct DualAveragingAdaptation{T <: AbstractFloat}
-    m::Int
-    H̄::T
-    logϵ::T
-    logϵ̄::T
-end
-
-"""
-    get_ϵ(A, tuning = true)
-
-When `tuning`, return the stepsize `ϵ` for the next HMC step. Otherwise return
-the tuned `ϵ`.
-"""
-get_ϵ(A::DualAveragingAdaptation, tuning = true) = exp(tuning ? A.logϵ : A.logϵ̄)
-
-DualAveragingAdaptation(logϵ₀) =
-    DualAveragingAdaptation(0, zero(logϵ₀), logϵ₀, zero(logϵ₀))
-
-"""
-    DA_params, A = adapting_ϵ(ϵ; args...)
-
-Constructor for both the adaptation parameters and the initial state.
-"""
-function adapting_ϵ(ϵ; args...)
-    logϵ = log(ϵ)
-    DualAveragingParameters(logϵ; args...), DualAveragingAdaptation(logϵ)
-end
-
-"""
-    A′ = adapt_stepsize(parameters, A, a)
-
-Update the adaptation `A` of log stepsize `logϵ` with average Metropolis
-acceptance rate `a` over the whole visited trajectory, using the dual averaging
-algorithm of Gelman and Hoffman (2014, Algorithm 6). Return the new adaptation.
-"""
-function adapt_stepsize(parameters::DualAveragingParameters,
-                        A::DualAveragingAdaptation, a)
-    @argcheck 0 ≤ a ≤ 1
-    @unpack μ, δ, γ, κ, t₀ = parameters
-    @unpack m, H̄, logϵ, logϵ̄ = A
-    m += 1
-    H̄ += (δ - a - H̄) / (m+t₀)
-    logϵ = μ - √m/γ*H̄
-    logϵ̄ += m^(-κ)*(logϵ - logϵ̄)
-    DualAveragingAdaptation(m, H̄, logϵ, logϵ̄)
-end
-
-# random booleans
-
-"""
-    rand_bool(rng, prob)
-
-Random boolean which is `true` with the given probability `prob`.
-
-All random numbers in this library are obtained from this function.
-"""
-rand_bool{T <: AbstractFloat}(rng, prob::T) = rand(rng, T) ≤ prob
-
-
-# abstract trajectory interface
-
-"""
-    ζ, τ, d, z = adjacent_tree(rng, trajectory, z, depth, fwd)
-
-Traverse the tree of given `depth` adjacent to point `z` in `trajectory`.
-
-`fwd` specifies the direction, `rng` is used for random numbers.
-
-Return:
-
-- `ζ`: the proposal from the tree. Only valid when `!isdivergent(d) &&
-  !isturning(τ)`, otherwise the value should not be used.
-
-- `τ`: turn statistics. Only valid when `!isdivergent(d)`.
-
-- `d`: divergence statistics, always valid.
-
-- `z`: the point at the end of the tree.
-
-`trajectory` should support the following interface:
-
-- Starting from leaves: `ζ, τ, d = leaf(trajectory, z, isinitial)`
-
-- Moving along the trajectory: `z = move(trajectory, z, fwd)`
-
-- Testing for turning and divergence: `isturning(τ)`, `isdivergent(d)`
-
-- Combination of return values: `combine_proposals(ζ₁, ζ₂, bias)`,
-  `combine_turnstats(τ₁, τ₂)`, and `combine_divstats(d₁, d₂)`
-"""
-function adjacent_tree(rng, trajectory, z, depth, fwd)
-    if depth == 0
-        z = move(trajectory, z, fwd)
-        ζ, τ, d = leaf(trajectory, z, false)
-        ζ, τ, d, z
-    else
-        ζ₋, τ₋, d₋, z = adjacent_tree(rng, trajectory, z, depth-1, fwd)
-        (isdivergent(d₋) || (depth > 1 && isturning(τ₋))) && return ζ₋, τ₋, d₋, z
-        ζ₊, τ₊, d₊, z = adjacent_tree(rng, trajectory, z, depth-1, fwd)
-        d = combine_divstats(d₋, d₊)
-        (isdivergent(d) || (depth > 1 && isturning(τ₊))) && return ζ₊, τ₊, d, z
-        τ = fwd ? combine_turnstats(τ₋, τ₊) : combine_turnstats(τ₊, τ₋)
-        ζ = isturning(τ) ? nothing : combine_proposals(rng, ζ₋, ζ₊, false)
-        ζ, τ, d, z
-    end
-end
-
-"Reason for terminating a trajectory."
-@enum Termination MaxDepth AdjacentDivergent AdjacentTurn DoubledTurn
-
-"""
-    ζ, d, termination, depth = sample_trajectory(rng, trajectory, z, max_depth)
-
-Sample a `trajectory` starting at `z`.
-
-Return:
-
-- `ζ`: proposal from the tree
-
-- `d`: divergence statistics
-
-- `termination`: reason for termination (see [`Termination`](@ref))
-
-- `depth`: the depth of the tree that as sampled from. Doubling steps that lead
-  to an invalid tree do not contribute to `depth`.
-
-See [`adjacent_tree`](@ref) for the interface that needs to be supported by
-`trajectory`.
-"""
-function sample_trajectory(rng, trajectory, z, max_depth)
-    ζ, τ, d = leaf(trajectory, z, true)
-    z₋ = z₊ = z
-    depth = 0
-    termination = MaxDepth
-    while depth < max_depth
-        fwd = rand_bool(rng, 0.5)
-        ζ′, τ′, d′, z = adjacent_tree(rng, trajectory, fwd ? z₊ : z₋, depth, fwd)
-        d = combine_divstats(d, d′)
-        isdivergent(d) && (termination = AdjacentDivergent; break)
-        (depth > 0 && isturning(τ′)) && (termination = AdjacentTurn; break)
-        ζ = combine_proposals(rng, ζ, ζ′, true)
-        τ = fwd ? combine_turnstats(τ, τ′) : combine_turnstats(τ′, τ)
-        fwd ? z₊ = z : z₋ = z
-        depth += 1
-        isturning(τ) && (termination = DoubledTurn; break)
-    end
-    ζ, d, termination, depth
-end
-
-
-# proposals
-
-"""
-Proposal that is propagated through by sampling recursively when building the
-trees.
-"""
-struct Proposal{Tz,Tf}
-    "Proposed point."
-    z::Tz
-    "Log weight (log(∑ exp(Δ)) of trajectory/subtree)."
-    ω::Tf
-end
-
-"""
-    logprob, ω = combined_logprob_logweight(ω₁, ω₂, bias)
-
-Given (relative) log probabilities `ω₁` and `ω₂`, return the log probabiliy of
-drawing a sampel from the second (`logprob`) and the combined (relative) log
-probability (`ω`).
-
-When `bias`, biases towards the second argument, introducing anti-correlations.
-"""
-function combined_logprob_logweight(ω₁, ω₂, bias)
-    ω = logsumexp(ω₁, ω₂)
-    ω₂ - (bias ? ω₁ : ω), ω
-end
-
-"""
-    combine_proposals(rng, ζ₁, ζ₂, bias)
-
-Combine proposals from two trajectories, using their weights.
-
-When `bias`, biases towards the second proposal, introducing anti-correlations.
-"""
-function combine_proposals(rng, ζ₁::Proposal, ζ₂::Proposal, bias)
-    logprob, ω = combined_logprob_logweight(ζ₁.ω, ζ₂.ω, bias)
-    z = (logprob ≥ 0 || rand_bool(rng, exp(logprob))) ? ζ₂.z : ζ₁.z
-    Proposal(z, ω)
-end
-
-
-# divergence statistics
-
-"""
-Divergence and acceptance statistics.
-
-Calculated over all visited phase points (not just the tree that is sampled
-from).
-"""
-struct DivergenceStatistic{Tf}
-    "`true` iff the sampler was terminated because of divergence."
-    divergent::Bool
-    "Sum of metropolis acceptances probabilities over the whole trajectory
-    (including invalid parts)."
-    ∑a::Tf
-    "Total number of leapfrog steps."
-    steps::Int
-end
-
-"""
-    divergence_statistic()
-
-Empty divergence statistic (for initial node).
-"""
-divergence_statistic() = DivergenceStatistic(false, 0.0, 0)
-
-"""
-    divergence_statistic(isdivergent, Δ)
-
-Divergence statistic for leaves. `Δ` is the log density relative to the initial
-point.
-"""
-divergence_statistic(isdivergent, Δ) =
-    DivergenceStatistic(isdivergent, Δ ≥ 0 ? one(Δ) : exp(Δ), 1)
-
-"""
-    isdivergent(x)
-
-Test if divergence statistic `x` indicates divergence.
-"""
-isdivergent(x::DivergenceStatistic) = x.divergent
-
-"""
-    combine_divstats(x, y)
-
-Combine divergence statistics from (subtrees) `x` and `y`. A divergent subtree
-make a subtree divergent.
-"""
-function combine_divstats(x::DivergenceStatistic, y::DivergenceStatistic)
-    DivergenceStatistic(x.divergent || y.divergent,
-                        x.∑a + y.∑a, x.steps + y.steps)
-end
-
-"""
-    get_acceptance_rate(x)
-
-Return average Metropolis acceptance rate.
-"""
-get_acceptance_rate(x::DivergenceStatistic) = x.∑a / x.steps
-
-
-# turn analysis
-
-"""
-Statistics for the identification of turning points. See Betancourt (2017,
-appendix).
-"""
-struct TurnStatistic{T}
-    p♯₋::T
-    p♯₊::T
-    ρ::T
-end
-
-"""
-    combine_turnstats(x, y)
-
-Combine turn statistics of two trajectories `x` and `y`, which are assume to be
-adjacent and in that order.
-"""
-combine_turnstats(x::TurnStatistic, y::TurnStatistic) =
-    TurnStatistic(x.p♯₋, y.p♯₊, x.ρ + y.ρ)
-
-"""
-    isturning(τ)
-
-Test termination based on turn statistics. Uses the generalized NUTS criterion
-from Betancourt (2017).
-
-Note that this function should not be called with turn statistics returned by
-[`leaf`](@ref), ie `depth > 0` is required.
-"""
-function isturning(τ::TurnStatistic)
-    @unpack p♯₋, p♯₊, ρ = τ
-    dot(p♯₋, ρ) < 0 || dot(p♯₊, ρ) < 0
-end
-
-
-# sampling
-
-"""
-Representation of a trajectory, ie a Hamiltonian with a discrete integrator that
-also checks for divergence.
-"""
-struct Trajectory{TH,Tf}
-    "Hamiltonian."
-    H::TH
-    "Log density of z (negative log energy) at initial point."
-    π₀::Tf
-    "Stepsize for leapfrog."
-    ϵ::Tf
-    "Smallest decrease allowed in the log density."
-    min_Δ::Tf
-end
-
-"""
-    Trajectory(H, π₀, ϵ; min_Δ = -1000.0)
-
-Convenience constructor for trajectory.
-"""
-Trajectory(H, π₀, ϵ; min_Δ = -1000.0) = Trajectory(H, π₀, ϵ, min_Δ)
-
-"""
-    ζ, τ, d = leaf(trajectory, z, isinitial)
-
-Construct a proposal, turn statistic, and divergence statistic for a single
-point `z` in `trajectory`. When `isinitial`, `z` is the initial point in the
-trajectory.
-
-Return
-
-- `ζ`: the proposal, which should only be used when `!isdivergent(d)`
-
-- `τ`: the turn statistic, which should only be used when `!isdivergent(d)`
-
-- `d`: divergence statistic
-"""
-function leaf(trajectory::Trajectory, z, isinitial)
-    @unpack H, π₀, min_Δ = trajectory
-    Δ = isinitial ? zero(π₀) : neg_energy(H, z) - π₀
-    isdiv = min_Δ > Δ
-    d = isinitial ? divergence_statistic() : divergence_statistic(isdiv, Δ)
-    ζ = isdiv ? nothing : Proposal(z, Δ)
-    τ = isdiv ? nothing : (p♯ = get_p♯(trajectory.H, z); TurnStatistic(p♯, p♯, z.p))
-    ζ, τ, d
-end
-
-"""
-    move(trajectory, z, fwd)
-
-Return next phase point adjacent to `z` along `trajectory` in the direction
-specified by `fwd`.
-"""
-function move(trajectory::Trajectory, z, fwd)
-    @unpack H, ϵ = trajectory
-    leapfrog(H, z, fwd ? ϵ : -ϵ)
-end
-
-"""
-Single transition by the No-U-turn sampler. Contains new position and
-diagnostic information.
-"""
-struct NUTS_Transition{Tv,Tf}
-    "New position."
-    q::Tv
-    "Log density (negative energy)."
-    π::Tf
-    "Depth of the tree."
-    depth::Int
-    "Reason for termination."
-    termination::Termination
-    "Average acceptance probability."
-    a::Tf
-    "Number of leapfrog steps evaluated."
-    steps::Int
-end
-
-"Position after transition."
-get_position(x::NUTS_Transition) = x.q
-
-"Negative energy of the Hamiltonian at the position."
-get_neg_energy(x::NUTS_Transition) = x.π
-
-"Tree depth."
-get_depth(x::NUTS_Transition) = x.depth
-
-"Reason for termination, see [`Termination`](@ref)."
-get_termination(x::NUTS_Transition) = x.termination
-
-"Average acceptance rate over trajectory."
-get_acceptance_rate(x::NUTS_Transition) = x.a
-
-"Number of integrator steps."
-get_steps(x::NUTS_Transition) = x.steps
-
-"""
-    NUTS_transition(rng, H, q, ϵ, max_depth; args...)
-
-No-U-turn Hamiltonian Monte Carlo transition, using Hamiltonian `H`, starting at
-position `q`, using stepsize `ϵ`. Builds a doubling dynamic tree of maximum
-depth `max_depth`. `args` are passed to the `Trajectory` constructor. `rng` is
-the random number generator used.
-"""
-function NUTS_transition(rng, H, q, ϵ, max_depth; args...)
-    z = rand_phasepoint(rng, H, q)
-    trajectory = Trajectory(H, neg_energy(H, z), ϵ; args...)
-    ζ, d, termination, depth = sample_trajectory(rng, trajectory, z, max_depth)
-    NUTS_Transition(ζ.z.q, neg_energy(H, ζ.z), depth, termination,
-                    get_acceptance_rate(d), d.steps)
-end
-
-
-# high-level interface: sampler
-
-"""
-Specification for the No-U-turn algorithm, including the random number
-generator, Hamiltonian, the initial position, and various parameters.
-"""
-struct NUTS{Tv, Tf, TR, TH}
-    "Random number generator."
-    rng::TR
-    "Hamiltonian"
-    H::TH
-    "position"
-    q::Tv
-    "stepsize"
-    ϵ::Tf
-    "maximum depth of the tree"
-    max_depth::Int
-end
-
-function show(io::IO, nuts::NUTS)
-    @unpack q, ϵ, max_depth = nuts
-    println(io, "NUTS sampler in $(length(q)) dimensions")
-    println(io, "  stepsize (ϵ) ≈ $(signif(ϵ, 3))")
-    println(io, "  maximum depth = $(max_depth)")
-    println(io, "  $(nuts.H.κ)")
-end
-
-"""
-    mcmc(sampler, N)
-
-Run the MCMC `sampler` for `N` iterations, returning the results as a vector,
-which has elements that conform to the sampler.
-"""
-function mcmc(sampler::NUTS{Tv,Tf}, N::Int) where {Tv,Tf}
-    @unpack rng, H, q, ϵ, max_depth = sampler
-    sample = Vector{NUTS_Transition{Tv,Tf}}(N)
-    for i in 1:N
-        trans = NUTS_transition(rng, H, q, ϵ, max_depth)
-        q = trans.q
-        sample[i] .= trans
-    end
-    sample
-end
-
-"""
-    sample, A = mcmc_adapting_ϵ(rng, sampler, N, [A_params, A])
-
-Same as [`mcmc`](@ref), but [`tune`](@ref) stepsize ϵ according to the
-parameters `A_params` and initial state `A`. Return the updated `A` as the
-second value.
-
-When the last two parameters are not specified, initialize using `adapting_ϵ`.
-"""
-function mcmc_adapting_ϵ(sampler::NUTS{Tv,Tf}, N::Int, A_params, A) where {Tv,Tf}
-    @unpack rng, H, q, max_depth = sampler
-    sample = Vector{NUTS_Transition{Tv,Tf}}(N)
-    for i in 1:N
-        trans = NUTS_transition(rng, H, q, get_ϵ(A), max_depth)
-        A = adapt_stepsize(A_params, A, trans.a)
-        q = trans.q
-        sample[i] .= trans
-    end
-    sample, A
-end
-
-mcmc_adapting_ϵ(sampler::NUTS, N) =
-    mcmc_adapting_ϵ(sampler, N, adapting_ϵ(sampler.ϵ)...)
-
-"""
-    variable_matrix(posterior)
-
-Return the samples of the parameter vector as rows of a matrix.
-"""
-get_position_matrix(sample) = vcat(get_position.(sample)'...)
-
-
-# tuning and diagnostics
-
-
-"""
-    sample_cov(sample)
-
-Covariance matrix of the sample.
-"""
-sample_cov(sample) = cov(get_position_matrix(sample), 1)
-
-"""
-    EBFMI(sample)
-
-Energy Bayesian fraction of missing information. Useful for diagnosing poorly
-chosen kinetic energies.
-
-Low values (`≤ 0.3`) are considered problematic. See Betancourt (2016).
-"""
-EBFMI(sample) = (πs = get_neg_energy.(sample); mean(abs2, diff(πs)) / var(πs))
-
-"""
-    NUTS_init(rng, ℓ, q; κ = GaussianKE(length(q)), p, max_depth, ϵ)
-
-Initialize a NUTS sampler for log density `ℓ` using local information.
-
-# Arguments
-
-- `rng`: the random number generator
-
-- `ℓ`: the likelihood function, should return a type that supports
-  `DiffResults.value` and `DiffResults.gradient`
-
-- `q`: initial position.
-
-- `κ`: kinetic energy specification. *Default*: Gaussian with identity matrix.
-
-- `p`: initial momentum. *Default*: random from standard multivariate normal.
-
-- `max_depth`: maximum tree depth. *Default*: `5`.
-
-- `ϵ`: initial stepsize, or parameters for finding it (passed on to
-  [`find_initial_stepsize`](@ref).
-"""
-function NUTS_init(rng, ℓ, q;
-                   κ = GaussianKE(length(q)),
-                   p = rand(rng, κ),
-                   max_depth = 5,
-                   ϵ = InitialStepsizeSearch())
-    H = Hamiltonian(ℓ, κ)
-    z = phasepoint_in(H, q, p)
-    if !(ϵ isa Float64)
-        ϵ = find_initial_stepsize(ϵ, H, z)
-    end
-    NUTS(rng, H, q, ϵ, max_depth)
-end
-
-"""
-    NUTS_init(rng, ℓ, dim::Integer; args...)
-
-Random initialization with position `randn(dim)`, all other arguments are passed
-on the the other method of this function.
-"""
-NUTS_init(rng, ℓ, dim::Integer; args...) = NUTS_init(rng, ℓ, randn(dim); args...)
-
-
-# tuning: abstract interface
-
-"""
-A tuner that adapts the sampler.
-
-All subtypes support `length` which returns the number of steps (*note*: if not
-in field `N`, define `length` accordingly), other parameters vary.
-"""
-abstract type AbstractTuner end
-
-length(tuner::AbstractTuner) = tuner.N
-
-"""
-    sampler′ = tune(sampler, tune)
-
-Given a `sampler` (or similar a parametrization) and a `tuner`, return the
-updated sampler state after tuning.
-"""
-function tune end
-
-
-# tuning: tuner building blocks
-
-"Adapt the integrator stepsize for `N` samples."
-struct StepsizeTuner <: AbstractTuner
-    N::Int
-end
-
-show(io::IO, tuner::StepsizeTuner) =
-    print(io, "Stepsize tuner, $(tuner.N) samples")
-
-function tune(sampler::NUTS, tuner::StepsizeTuner)
-    @unpack rng, H, max_depth = sampler
-    sample, A = mcmc_adapting_ϵ(sampler, tuner.N)
-    NUTS(rng, H, sample[end].q, get_ϵ(A, false), max_depth)
-end
-
-"""
-Tune the integrator stepsize and covariance. Covariance tuning is from scratch
-(no prior information is used), regularized towards the identity matrix.
-"""
-struct StepsizeCovTuner{Tf} <: AbstractTuner
-    "Number of samples."
-    N::Int
-    """
-    Regularization factor for normalizing variance. An estimated covariance
-    matrix `Σ` is rescaled by `regularize/sample size`` towards `σ²I`, where
-    `σ²` is the median of the diagonal.
-    """
-    regularize::Tf
-end
-
-function show(io::IO, tuner::StepsizeCovTuner)
-    @unpack N, regularize = tuner
-    print(io, "Stepsize and covariance tuner, $(N) samples, regularization $(regularize)")
-end
-
-function tune(sampler::NUTS, tuner::StepsizeCovTuner)
-    @unpack regularize, N = tuner
-    @unpack rng, H, max_depth = sampler
-    sample, A = mcmc_adapting_ϵ(sampler, N)
-    Σ = sample_cov(sample)
-    Σ .+= (UniformScaling(median(diag(Σ)))-Σ) * regularize/N
-    κ = GaussianKE(Σ)
-    NUTS(rng, Hamiltonian(H.ℓ, κ), sample[end].q, get_ϵ(A), max_depth)
-end
-
-"Sequence of tuners, applied in the given order."
-struct TunerSequence{T} <: AbstractTuner
-    tuners::T
-end
-
-function show(io::IO, tuner::TunerSequence)
-    @unpack tuners = tuner
-    print(io, "Sequence of $(length(tuners)) tuners, $(length(tuner)) total samples")
-    for t in tuners
-        print(io, "\n  ")
-        show(io, t)
-    end
-end
-
-length(seq::TunerSequence) = sum(length, seq.tuners)
-
-"""
-    bracketed_doubling_tuner(; [init], [mid], [M], [term], [regularize])
-
-A sequence of tuners:
-
-1. tuning stepsize with `init` steps
-
-2. tuning stepsize and covariance: first with `mid` steps, then repeat with
-   twice the steps `M` times
-
-3. tuning stepsize with `term` steps
-
-`regularize` is used for covariance regularization.
-"""
-function bracketed_doubling_tuner(; init = 75, mid = 25, M = 5, term = 50,
-                                  regularize = 5.0, _...)
-    tuners = Union{StepsizeTuner, StepsizeCovTuner}[StepsizeTuner(init)]
-    for _ in 1:M
-        tuners = push!(tuners, StepsizeCovTuner(mid, regularize))
-        mid *= 2
-    end
-    push!(tuners, StepsizeTuner(term))
-    TunerSequence((tuners...))
-end
-
-function tune(sampler, seq::TunerSequence)
-    for tuner in seq.tuners
-        sampler = tune(sampler, tuner)
-    end
-    sampler
-end
-
-"""
-    $SIGNATURES
-
-Init, tune, and then draw `N` samples from `ℓ` using the NUTS algorithm.
-
-Return the *sample* (a vector of [`NUTS_transition`](@ref)s) and the *tuned
-sampler*.
-
-`rng` is the random number generator.
-
-`q_or_dim` is a starting position or the dimension (for random initialization).
-
-`args` are passed on to various methods, see [`NUTS_init`](@ref) and
-[`bracketed_doubling_tuner`](@ref).
-
-For parameters `q`, `ℓ(q)` should return an object that support the following
-methods: `DiffResults.value`, `DiffResults.gradient`.
-
-Most users would use this function, unless they are doing something that
-requires manual tuning.
-"""
-function NUTS_init_tune_mcmc(rng, ℓ, q_or_dim, N::Int; args...)
-    sampler_init = NUTS_init(rng, ℓ, q_or_dim; args...)
-    sampler_tuned = tune(sampler_init, bracketed_doubling_tuner(; args...))
-    mcmc(sampler_tuned, N), sampler_tuned
-end
-
-"""
-    $SIGNATURES
-
-Same as the other method, but with random number generator
-`Base.Random.GLOBAL_RNG`.
-"""
-NUTS_init_tune_mcmc(ℓ, q_or_dim, N::Int; args...) =
-    NUTS_init_tune_mcmc(Base.Random.GLOBAL_RNG, ℓ, q_or_dim, N; args...)
-
-
-# statistics and diagnostics
-
-"Acceptance quantiles for [`NUTS_Statistics`](@ref) diagnostic summary."
-const ACCEPTANCE_QUANTILES = linspace(0, 1, 5)
-
-"""
-Storing the output of [`NUTS_statistics`](@ref) in a structured way, for pretty
-printing. Currently for internal use.
-"""
-struct NUTS_Statistics{T <: Real,
-                       DT <: Associative{Termination,Int},
-                       DD <: Associative{Int,Int}}
-    "Sample length."
-    N::Int
-    "average_acceptance"
-    a_mean::T
-    "acceptance quantiles"
-    a_quantiles::Vector{T}
-    "termination counts"
-    termination_counts::DT
-    "depth counts"
-    depth_counts::DD
-end
-
-"""
-    NUTS_statistics(sample)
-
-Return statistics about the sample (ie not the variables). Mostly useful for
-NUTS diagnostics.
-"""
-function NUTS_statistics(sample)
-    as = get_acceptance_rate.(sample)
-    NUTS_Statistics(length(sample),
-                    mean(as), quantile(as, ACCEPTANCE_QUANTILES),
-                    counter(get_termination.(sample)), counter(get_depth.(sample)))
-end
-
-function show(io::IO, stats::NUTS_Statistics)
-    @unpack N, a_mean, a_quantiles, termination_counts, depth_counts = stats
-    println(io, "Hamiltonian Monte Carlo sample of length $(N)")
-    print(io, "  acceptance rate mean: $(round(a_mean,2)), min/25%/median/75%/max:")
-    for aq in a_quantiles
-        print(io, " ", round(aq, 2))
-    end
-    println(io)
-    function print_dict(dict)
-        for (key, value) in sort(collect(dict), by = first)
-            print(io, " $(key) => $(round(Int, 100*value/N))%")
-        end
-    end
-    print(io, "  termination:")
-    print_dict(termination_counts)
-    println(io)
-    print(io, "  depth:")
-    print_dict(depth_counts)
-    println(io)
-end
+include("utilities.jl")
+include("hamiltonian.jl")
+include("stepsize.jl")
+include("buildingblocks.jl")
+include("reporting.jl")
+include("sampler.jl")
+include("diagnostics.jl")
 
 end # module
diff --git a/src/buildingblocks.jl b/src/buildingblocks.jl
new file mode 100644
index 0000000..77a8500
--- /dev/null
+++ b/src/buildingblocks.jl
@@ -0,0 +1,345 @@
+export
+    NUTS_Transition, get_position, get_neg_energy, get_depth, get_termination,
+    get_acceptance_rate, get_steps
+
+
+# abstract trajectory interface
+
+"""
+    ζ, τ, d, z = adjacent_tree(rng, trajectory, z, depth, fwd)
+
+Traverse the tree of given `depth` adjacent to point `z` in `trajectory`.
+
+`fwd` specifies the direction, `rng` is used for random numbers.
+
+Return:
+
+- `ζ`: the proposal from the tree. Only valid when `!isdivergent(d) &&
+  !isturning(τ)`, otherwise the value should not be used.
+
+- `τ`: turn statistics. Only valid when `!isdivergent(d)`.
+
+- `d`: divergence statistics, always valid.
+
+- `z`: the point at the end of the tree.
+
+`trajectory` should support the following interface:
+
+- Starting from leaves: `ζ, τ, d = leaf(trajectory, z, isinitial)`
+
+- Moving along the trajectory: `z = move(trajectory, z, fwd)`
+
+- Testing for turning and divergence: `isturning(τ)`, `isdivergent(d)`
+
+- Combination of return values: `combine_proposals(ζ₁, ζ₂, bias)`,
+  `combine_turnstats(τ₁, τ₂)`, and `combine_divstats(d₁, d₂)`
+"""
+function adjacent_tree(rng, trajectory, z, depth, fwd)
+    if depth == 0
+        z = move(trajectory, z, fwd)
+        ζ, τ, d = leaf(trajectory, z, false)
+        ζ, τ, d, z
+    else
+        ζ₋, τ₋, d₋, z = adjacent_tree(rng, trajectory, z, depth-1, fwd)
+        (isdivergent(d₋) || (depth > 1 && isturning(τ₋))) && return ζ₋, τ₋, d₋, z
+        ζ₊, τ₊, d₊, z = adjacent_tree(rng, trajectory, z, depth-1, fwd)
+        d = combine_divstats(d₋, d₊)
+        (isdivergent(d) || (depth > 1 && isturning(τ₊))) && return ζ₊, τ₊, d, z
+        τ = fwd ? combine_turnstats(τ₋, τ₊) : combine_turnstats(τ₊, τ₋)
+        ζ = isturning(τ) ? nothing : combine_proposals(rng, ζ₋, ζ₊, false)
+        ζ, τ, d, z
+    end
+end
+
+"Reason for terminating a trajectory."
+@enum Termination MaxDepth AdjacentDivergent AdjacentTurn DoubledTurn
+
+"""
+    ζ, d, termination, depth = sample_trajectory(rng, trajectory, z, max_depth)
+
+Sample a `trajectory` starting at `z`.
+
+Return:
+
+- `ζ`: proposal from the tree
+
+- `d`: divergence statistics
+
+- `termination`: reason for termination (see [`Termination`](@ref))
+
+- `depth`: the depth of the tree that as sampled from. Doubling steps that lead
+  to an invalid tree do not contribute to `depth`.
+
+See [`adjacent_tree`](@ref) for the interface that needs to be supported by
+`trajectory`.
+"""
+function sample_trajectory(rng, trajectory, z, max_depth)
+    ζ, τ, d = leaf(trajectory, z, true)
+    z₋ = z₊ = z
+    depth = 0
+    termination = MaxDepth
+    while depth < max_depth
+        fwd = rand_bool(rng, 0.5)
+        ζ′, τ′, d′, z = adjacent_tree(rng, trajectory, fwd ? z₊ : z₋, depth, fwd)
+        d = combine_divstats(d, d′)
+        isdivergent(d) && (termination = AdjacentDivergent; break)
+        (depth > 0 && isturning(τ′)) && (termination = AdjacentTurn; break)
+        ζ = combine_proposals(rng, ζ, ζ′, true)
+        τ = fwd ? combine_turnstats(τ, τ′) : combine_turnstats(τ′, τ)
+        fwd ? z₊ = z : z₋ = z
+        depth += 1
+        isturning(τ) && (termination = DoubledTurn; break)
+    end
+    ζ, d, termination, depth
+end
+
+
+# proposals
+
+"""
+Proposal that is propagated through by sampling recursively when building the
+trees.
+"""
+struct Proposal{Tz,Tf}
+    "Proposed point."
+    z::Tz
+    "Log weight (log(∑ exp(Δ)) of trajectory/subtree)."
+    ω::Tf
+end
+
+"""
+    logprob, ω = combined_logprob_logweight(ω₁, ω₂, bias)
+
+Given (relative) log probabilities `ω₁` and `ω₂`, return the log probabiliy of
+drawing a sampel from the second (`logprob`) and the combined (relative) log
+probability (`ω`).
+
+When `bias`, biases towards the second argument, introducing anti-correlations.
+"""
+function combined_logprob_logweight(ω₁, ω₂, bias)
+    ω = logsumexp(ω₁, ω₂)
+    ω₂ - (bias ? ω₁ : ω), ω
+end
+
+"""
+    combine_proposals(rng, ζ₁, ζ₂, bias)
+
+Combine proposals from two trajectories, using their weights.
+
+When `bias`, biases towards the second proposal, introducing anti-correlations.
+"""
+function combine_proposals(rng, ζ₁::Proposal, ζ₂::Proposal, bias)
+    logprob, ω = combined_logprob_logweight(ζ₁.ω, ζ₂.ω, bias)
+    z = (logprob ≥ 0 || rand_bool(rng, exp(logprob))) ? ζ₂.z : ζ₁.z
+    Proposal(z, ω)
+end
+
+
+# divergence statistics
+
+"""
+Divergence and acceptance statistics.
+
+Calculated over all visited phase points (not just the tree that is sampled
+from).
+"""
+struct DivergenceStatistic{Tf}
+    "`true` iff the sampler was terminated because of divergence."
+    divergent::Bool
+    "Sum of metropolis acceptances probabilities over the whole trajectory
+    (including invalid parts)."
+    ∑a::Tf
+    "Total number of leapfrog steps."
+    steps::Int
+end
+
+"""
+    divergence_statistic()
+
+Empty divergence statistic (for initial node).
+"""
+divergence_statistic() = DivergenceStatistic(false, 0.0, 0)
+
+"""
+    divergence_statistic(isdivergent, Δ)
+
+Divergence statistic for leaves. `Δ` is the log density relative to the initial
+point.
+"""
+divergence_statistic(isdivergent, Δ) =
+    DivergenceStatistic(isdivergent, Δ ≥ 0 ? one(Δ) : exp(Δ), 1)
+
+"""
+    isdivergent(x)
+
+Test if divergence statistic `x` indicates divergence.
+"""
+isdivergent(x::DivergenceStatistic) = x.divergent
+
+"""
+    combine_divstats(x, y)
+
+Combine divergence statistics from (subtrees) `x` and `y`. A divergent subtree
+make a subtree divergent.
+"""
+function combine_divstats(x::DivergenceStatistic, y::DivergenceStatistic)
+    DivergenceStatistic(x.divergent || y.divergent,
+                        x.∑a + y.∑a, x.steps + y.steps)
+end
+
+"""
+    get_acceptance_rate(x)
+
+Return average Metropolis acceptance rate.
+"""
+get_acceptance_rate(x::DivergenceStatistic) = x.∑a / x.steps
+
+
+# turn analysis
+
+"""
+Statistics for the identification of turning points. See Betancourt (2017,
+appendix).
+"""
+struct TurnStatistic{T}
+    p♯₋::T
+    p♯₊::T
+    ρ::T
+end
+
+"""
+    combine_turnstats(x, y)
+
+Combine turn statistics of two trajectories `x` and `y`, which are assume to be
+adjacent and in that order.
+"""
+combine_turnstats(x::TurnStatistic, y::TurnStatistic) =
+    TurnStatistic(x.p♯₋, y.p♯₊, x.ρ + y.ρ)
+
+"""
+    isturning(τ)
+
+Test termination based on turn statistics. Uses the generalized NUTS criterion
+from Betancourt (2017).
+
+Note that this function should not be called with turn statistics returned by
+[`leaf`](@ref), ie `depth > 0` is required.
+"""
+function isturning(τ::TurnStatistic)
+    @unpack p♯₋, p♯₊, ρ = τ
+    dot(p♯₋, ρ) < 0 || dot(p♯₊, ρ) < 0
+end
+
+
+# sampling
+
+"""
+Representation of a trajectory, ie a Hamiltonian with a discrete integrator that
+also checks for divergence.
+"""
+struct Trajectory{TH,Tf}
+    "Hamiltonian."
+    H::TH
+    "Log density of z (negative log energy) at initial point."
+    π₀::Tf
+    "Stepsize for leapfrog."
+    ϵ::Tf
+    "Smallest decrease allowed in the log density."
+    min_Δ::Tf
+end
+
+"""
+    Trajectory(H, π₀, ϵ; min_Δ = -1000.0)
+
+Convenience constructor for trajectory.
+"""
+Trajectory(H, π₀, ϵ; min_Δ = -1000.0) = Trajectory(H, π₀, ϵ, min_Δ)
+
+"""
+    ζ, τ, d = leaf(trajectory, z, isinitial)
+
+Construct a proposal, turn statistic, and divergence statistic for a single
+point `z` in `trajectory`. When `isinitial`, `z` is the initial point in the
+trajectory.
+
+Return
+
+- `ζ`: the proposal, which should only be used when `!isdivergent(d)`
+
+- `τ`: the turn statistic, which should only be used when `!isdivergent(d)`
+
+- `d`: divergence statistic
+"""
+function leaf(trajectory::Trajectory, z, isinitial)
+    @unpack H, π₀, min_Δ = trajectory
+    Δ = isinitial ? zero(π₀) : neg_energy(H, z) - π₀
+    isdiv = min_Δ > Δ
+    d = isinitial ? divergence_statistic() : divergence_statistic(isdiv, Δ)
+    ζ = isdiv ? nothing : Proposal(z, Δ)
+    τ = isdiv ? nothing : (p♯ = get_p♯(trajectory.H, z); TurnStatistic(p♯, p♯, z.p))
+    ζ, τ, d
+end
+
+"""
+    move(trajectory, z, fwd)
+
+Return next phase point adjacent to `z` along `trajectory` in the direction
+specified by `fwd`.
+"""
+function move(trajectory::Trajectory, z, fwd)
+    @unpack H, ϵ = trajectory
+    leapfrog(H, z, fwd ? ϵ : -ϵ)
+end
+
+"""
+Single transition by the No-U-turn sampler. Contains new position and
+diagnostic information.
+"""
+struct NUTS_Transition{Tv,Tf}
+    "New position."
+    q::Tv
+    "Log density (negative energy)."
+    π::Tf
+    "Depth of the tree."
+    depth::Int
+    "Reason for termination."
+    termination::Termination
+    "Average acceptance probability."
+    a::Tf
+    "Number of leapfrog steps evaluated."
+    steps::Int
+end
+
+"Position after transition."
+get_position(x::NUTS_Transition) = x.q
+
+"Negative energy of the Hamiltonian at the position."
+get_neg_energy(x::NUTS_Transition) = x.π
+
+"Tree depth."
+get_depth(x::NUTS_Transition) = x.depth
+
+"Reason for termination, see [`Termination`](@ref)."
+get_termination(x::NUTS_Transition) = x.termination
+
+"Average acceptance rate over trajectory."
+get_acceptance_rate(x::NUTS_Transition) = x.a
+
+"Number of integrator steps."
+get_steps(x::NUTS_Transition) = x.steps
+
+"""
+    NUTS_transition(rng, H, q, ϵ, max_depth; args...)
+
+No-U-turn Hamiltonian Monte Carlo transition, using Hamiltonian `H`, starting at
+position `q`, using stepsize `ϵ`. Builds a doubling dynamic tree of maximum
+depth `max_depth`. `args` are passed to the `Trajectory` constructor. `rng` is
+the random number generator used.
+"""
+function NUTS_transition(rng, H, q, ϵ, max_depth; args...)
+    z = rand_phasepoint(rng, H, q)
+    trajectory = Trajectory(H, neg_energy(H, z), ϵ; args...)
+    ζ, d, termination, depth = sample_trajectory(rng, trajectory, z, max_depth)
+    NUTS_Transition(ζ.z.q, neg_energy(H, ζ.z), depth, termination,
+                    get_acceptance_rate(d), d.steps)
+end
diff --git a/src/diagnostics.jl b/src/diagnostics.jl
new file mode 100644
index 0000000..ede767f
--- /dev/null
+++ b/src/diagnostics.jl
@@ -0,0 +1,70 @@
+# statistics and diagnostics
+
+export EBFMI, NUTS_statistics
+
+"""
+    EBFMI(sample)
+
+Energy Bayesian fraction of missing information. Useful for diagnosing poorly
+chosen kinetic energies.
+
+Low values (`≤ 0.3`) are considered problematic. See Betancourt (2016).
+"""
+EBFMI(sample) = (πs = get_neg_energy.(sample); mean(abs2, diff(πs)) / var(πs))
+
+
+"Acceptance quantiles for [`NUTS_Statistics`](@ref) diagnostic summary."
+const ACCEPTANCE_QUANTILES = linspace(0, 1, 5)
+
+"""
+Storing the output of [`NUTS_statistics`](@ref) in a structured way, for pretty
+printing. Currently for internal use.
+"""
+struct NUTS_Statistics{T <: Real,
+                       DT <: Associative{Termination,Int},
+                       DD <: Associative{Int,Int}}
+    "Sample length."
+    N::Int
+    "average_acceptance"
+    a_mean::T
+    "acceptance quantiles"
+    a_quantiles::Vector{T}
+    "termination counts"
+    termination_counts::DT
+    "depth counts"
+    depth_counts::DD
+end
+
+"""
+    NUTS_statistics(sample)
+
+Return statistics about the sample (ie not the variables). Mostly useful for
+NUTS diagnostics.
+"""
+function NUTS_statistics(sample)
+    as = get_acceptance_rate.(sample)
+    NUTS_Statistics(length(sample),
+                    mean(as), quantile(as, ACCEPTANCE_QUANTILES),
+                    counter(get_termination.(sample)), counter(get_depth.(sample)))
+end
+
+function show(io::IO, stats::NUTS_Statistics)
+    @unpack N, a_mean, a_quantiles, termination_counts, depth_counts = stats
+    println(io, "Hamiltonian Monte Carlo sample of length $(N)")
+    print(io, "  acceptance rate mean: $(round(a_mean,2)), min/25%/median/75%/max:")
+    for aq in a_quantiles
+        print(io, " ", round(aq, 2))
+    end
+    println(io)
+    function print_dict(dict)
+        for (key, value) in sort(collect(dict), by = first)
+            print(io, " $(key) => $(round(Int, 100*value/N))%")
+        end
+    end
+    print(io, "  termination:")
+    print_dict(termination_counts)
+    println(io)
+    print(io, "  depth:")
+    print_dict(depth_counts)
+    println(io)
+end
diff --git a/src/hamiltonian.jl b/src/hamiltonian.jl
new file mode 100644
index 0000000..7f381e3
--- /dev/null
+++ b/src/hamiltonian.jl
@@ -0,0 +1,198 @@
+# This file contains building blocks for traversing a Hamiltonian
+# deterministically, using the leapfrog integrator.
+
+export KineticEnergy, EuclideanKE, GaussianKE
+
+"""
+Kinetic energy specifications.
+
+For all subtypes, it is implicitly assumed that kinetic energy is symmetric in
+the momentum `p`, ie.
+
+```julia
+neg_energy(::KineticEnergy, p, q) == neg_energy(::KineticEnergy, -p, q)
+```
+
+When the above is violated, the consequences are undefined.
+"""
+abstract type KineticEnergy end
+
+"Euclidean kinetic energies (position independent)."
+abstract type EuclideanKE <: KineticEnergy end
+
+"""
+Gaussian kinetic energy.
+
+```math
+p ∣ q ∼ N(0, M)
+```
+
+**independently** of ``q``.
+
+The inverse covariance ``M⁻¹`` is stored.
+"""
+struct GaussianKE{T <: AbstractMatrix, S <: AbstractMatrix} <: EuclideanKE
+    "M⁻¹"
+    Minv::T
+    "W such that W*W'=M. Used for generating random draws."
+    W::S
+    function GaussianKE{T, S}(Minv, W) where {T, S}
+        @argcheck checksquare(Minv) == checksquare(W)
+        new(Minv, W)
+    end
+end
+
+GaussianKE(M::T, W::S) where {T,S} = GaussianKE{T,S}(M, W)
+
+"""
+    GaussianKE(M⁻¹::AbstractMatrix)
+
+Gaussian kinetic energy with the given inverse covariance matrix `M⁻¹`.
+"""
+GaussianKE(Minv::AbstractMatrix) = GaussianKE(Minv, inv(chol(Minv)))
+
+"""
+    GaussianKE(N::Int, [m⁻¹ = 1.0])
+
+Gaussian kinetic energy with a diagonal inverse covariance matrix `M⁻¹=m⁻¹*I`.
+"""
+GaussianKE(N::Int, m⁻¹ = 1.0) = GaussianKE(Diagonal(fill(m⁻¹, N)))
+
+show(io::IO, κ::GaussianKE) =
+    print(io::IO, "Gaussian kinetic energy, √diag(M⁻¹): $(.√(diag(κ.Minv)))")
+
+"""
+    neg_energy(κ, p, [q])
+
+Return the log density of kinetic energy `κ`, at momentum `p`. Some kinetic
+energies (eg Riemannian geometry) will need `q`, too.
+"""
+neg_energy(κ::GaussianKE, p, q = nothing) = -dot(p, κ.Minv * p) / 2
+
+"""
+    get_p♯(κ, p, [q])
+
+Return ``p\sharp``, used for turn diagnostics.
+"""
+get_p♯(κ::GaussianKE, p, q = nothing) = κ.Minv * p
+
+loggradient(κ::GaussianKE, p, q = nothing) = -get_p♯(κ, p)
+
+rand(rng, κ::GaussianKE, q = nothing) = κ.W * randn(rng, size(κ.W, 1))
+
+"""
+    Hamiltonian(ℓ, κ)
+
+Construct a Hamiltonian from the log density `ℓ`, and the kinetic energy
+specification `κ`. Calls of `ℓ` with a vector are expected to return a value
+that supports `DiffResults.value` and `DiffResults.gradient`.
+"""
+struct Hamiltonian{Tℓ, Tκ}
+    "The (log) density we are sampling from."
+    ℓ::Tℓ
+    "The kinetic energy."
+    κ::Tκ
+end
+
+show(io::IO, H::Hamiltonian) = print(io, "Hamiltonian with $(H.κ)")
+
+"""
+    is_valid_ℓq(ℓq)
+
+Test that a value returned by ℓ is *valid*, in the following sense:
+
+1. supports `DiffResults.value` and `DiffResults.gradient` (when not, a
+`MethodError` is thrown),
+
+2. the value is a float, either `-Inf` or finite,
+
+3. the gradient is finite when the value is; otherwise the gradient is ignored.
+"""
+function is_valid_ℓq(ℓq)
+    v = value(ℓq)
+    v isa AbstractFloat || return false
+    (v == -Inf) || (isfinite(v) && all(isfinite, gradient(ℓq)))
+end
+
+"""
+A point in phase space, consists of a position and a momentum.
+
+Log densities and gradients are saved for speed gains, so that the gradient of ℓ
+at q is not calculated twice for every leapfrog step (both as start- and
+endpoints).
+
+Because of caching, a `PhasePoint` should only be used with a specific
+Hamiltonian.
+"""
+struct PhasePoint{T,S}
+    "Position."
+    q::T
+    "Momentum."
+    p::T
+    "ℓ(q). Cached for reuse in sampling."
+    ℓq::S
+    function PhasePoint(q::T, p::T, ℓq::S) where {T,S}
+        @argcheck is_valid_ℓq(ℓq) DomainError("Invalid value of ℓ.")
+        @argcheck length(p) == length(q)
+        new{T,S}(q, p, ℓq)
+    end
+end
+
+"""
+    get_ℓq(z)
+
+The value returned by `ℓ` when evaluated at position `q`.
+"""
+get_ℓq(z::PhasePoint) = z.ℓq
+
+"""
+    phasepoint_in(H::Hamiltonian, q, p)
+
+The recommended interface for creating a phase point in a Hamiltonian. Computes
+cached values.
+"""
+phasepoint_in(H::Hamiltonian, q, p) = PhasePoint(q, p, H.ℓ(q))
+
+"""
+    rand_phasepoint(rng, H, q)
+
+Extend a position `q` to a phasepoint with a random momentum according to the
+kinetic energy of `H`.
+"""
+rand_phasepoint(rng, H, q) = phasepoint_in(H, q, rand(rng, H.κ))
+
+"""
+    $SIGNATURES
+
+Log density for Hamiltonian `H` at point `z`.
+
+If `ℓ(q) == -Inf` (rejected), ignores the kinetic energy.
+"""
+function neg_energy(H::Hamiltonian, z::PhasePoint)
+    v = value(get_ℓq(z))
+    v == -Inf ? v : (v + neg_energy(H.κ, z.p, z.q))
+end
+
+get_p♯(H::Hamiltonian, z::PhasePoint) = get_p♯(H.κ, z.p, z.q)
+
+"""
+    leapfrog(H, z, ϵ)
+
+Take a leapfrog step of length `ϵ` from `z` along the Hamiltonian `H`.
+
+Return the new position.
+
+The leapfrog algorithm uses the gradient of the next position to evolve the
+momentum. If this is not finite, the momentum won't be either. Since the
+constructor `PhasePoint` validates its arguments, this can only happen for
+divergent points anyway, and should not cause a problem.
+"""
+function leapfrog{Tℓ, Tκ <: EuclideanKE}(H::Hamiltonian{Tℓ,Tκ}, z::PhasePoint, ϵ)
+    @unpack ℓ, κ = H
+    @unpack p, q, ℓq = z
+    pₘ = p + ϵ/2 * gradient(ℓq)
+    q′ = q - ϵ * loggradient(κ, pₘ)
+    ℓq′ = ℓ(q′)
+    p′ = pₘ + ϵ/2 * gradient(ℓq′)
+    PhasePoint(q′, p′, ℓq′)
+end
diff --git a/src/reporting.jl b/src/reporting.jl
new file mode 100644
index 0000000..98fe9b9
--- /dev/null
+++ b/src/reporting.jl
@@ -0,0 +1,97 @@
+export ReportSilent, ReportIO
+
+"""
+Subtypes implement [`report!`](@ref), [`start_progress!`](@ref), and
+[`end_progress!`](@ref).
+"""
+abstract type AbstractReport end
+
+"""
+A placeholder type for not reporting any information.
+"""
+struct ReportSilent <: AbstractReport end
+
+report!(::ReportSilent, objects...) = nothing
+
+start_progress!(::ReportSilent, ::Union{Int, Void}, ::Any) = nothing
+
+end_progress!(::ReportSilent) = nothing
+
+mutable struct ReportIO{TIO <: IO} <: AbstractReport
+    io::TIO
+    color::Union{Symbol, Int}
+    step_count::Int
+    total::Union{Int, Void}
+    last_count::Union{Int, Void}
+    last_time::UInt
+end
+
+"""
+    $SIGNATURES
+
+Report to the given stream `io` (defaults to `STDERR`).
+
+For progress bars, emit new information every after `step_count` steps.
+
+`color` is used with `print_with_color`.
+"""
+ReportIO(; io = STDERR, color = :blue, step_count = 100) =
+    ReportIO(io, color, step_count, nothing, nothing, zero(UInt))
+
+"""
+    $SIGNATURES
+
+Start a progress meter for an iteration. The second argument is either
+
+- `nothing`, if the total number of steps is unknown,
+
+- an integer, for the total number of steps.
+
+After calling this function, [`report!`](@ref) should be used at every step with
+an integer.
+"""
+function start_progress!(report::ReportIO, total, msg)
+    if total isa Integer
+        msg *= " ($(total) steps)"
+    end
+    print_with_color(report.color, report.io, msg, '\n'; bold = true)
+    report.total = total
+    report.last_count = 0
+    report.last_time = time_ns()
+    nothing
+end
+
+"""
+    $SIGNATURES
+
+Terminate a progress meter.
+"""
+function end_progress!(report::ReportIO)
+    print_with_color(report.color, report.io, " ...done\n"; bold = true)
+    report.last_count = nothing
+end
+
+"""
+    $SIGNATURES
+
+Display `objects` via the appropriate mechanism.
+
+When a single `Int` is given, it is treated as the index of the current step.
+"""
+function report!(report::ReportIO, count::Int)
+    @unpack io, step_count, color, total = report
+    @argcheck report.last_count isa Int "start_progress! was not called."
+    if count % step_count == 0
+        msg = "step $(count)"
+        if total isa Int
+            msg *= "/$(total)"
+        end
+        t = time_ns()
+        s_per_iteration = (t - report.last_time) / step_count / 1000
+        msg *= ", $(signif(s_per_iteration, 2)) s/step"
+        print_with_color(color, io, msg, '\n')
+        report.last_time = t
+        report.last_count = count
+    end
+    nothing
+end
diff --git a/src/sampler.jl b/src/sampler.jl
new file mode 100644
index 0000000..7378d55
--- /dev/null
+++ b/src/sampler.jl
@@ -0,0 +1,294 @@
+export
+    NUTS, mcmc, mcmc_adapting_ϵ, NUTS_init_tune_mcmc, sample_cov,
+    get_position_matrix
+
+# high-level interface: sampler
+
+"""
+Specification for the No-U-turn algorithm, including the random number
+generator, Hamiltonian, the initial position, and various parameters.
+"""
+struct NUTS{Tv, Tf, TR, TH, Trep <: AbstractReport}
+    "Random number generator."
+    rng::TR
+    "Hamiltonian"
+    H::TH
+    "position"
+    q::Tv
+    "stepsize"
+    ϵ::Tf
+    "maximum depth of the tree"
+    max_depth::Int
+    "reporting"
+    report::Trep
+end
+
+function show(io::IO, nuts::NUTS)
+    @unpack q, ϵ, max_depth = nuts
+    println(io, "NUTS sampler in $(length(q)) dimensions")
+    println(io, "  stepsize (ϵ) ≈ $(signif(ϵ, 3))")
+    println(io, "  maximum depth = $(max_depth)")
+    println(io, "  $(nuts.H.κ)")
+end
+
+"""
+    mcmc(sampler, N)
+
+Run the MCMC `sampler` for `N` iterations, returning the results as a vector,
+which has elements that conform to the sampler.
+"""
+function mcmc(sampler::NUTS{Tv,Tf}, N::Int) where {Tv,Tf}
+    @unpack rng, H, q, ϵ, max_depth, report = sampler
+    sample = Vector{NUTS_Transition{Tv,Tf}}(N)
+    start_progress!(report, N, "MCMC")
+    for i in 1:N
+        trans = NUTS_transition(rng, H, q, ϵ, max_depth)
+        q = trans.q
+        sample[i] .= trans
+        report!(report, i)
+    end
+    end_progress!(report)
+    sample
+end
+
+"""
+    sample, A = mcmc_adapting_ϵ(rng, sampler, N, [A_params, A])
+
+Same as [`mcmc`](@ref), but [`tune`](@ref) stepsize ϵ according to the
+parameters `A_params` and initial state `A`. Return the updated `A` as the
+second value.
+
+When the last two parameters are not specified, initialize using `adapting_ϵ`.
+"""
+function mcmc_adapting_ϵ(sampler::NUTS{Tv,Tf}, N::Int, A_params, A) where {Tv,Tf}
+    @unpack rng, H, q, max_depth, report = sampler
+    sample = Vector{NUTS_Transition{Tv,Tf}}(N)
+    start_progress!(report, N, "MCMC, adapting ϵ")
+    for i in 1:N
+        trans = NUTS_transition(rng, H, q, get_ϵ(A), max_depth)
+        A = adapt_stepsize(A_params, A, trans.a)
+        q = trans.q
+        sample[i] .= trans
+        report!(report, i)
+    end
+    end_progress!(report)
+    sample, A
+end
+
+mcmc_adapting_ϵ(sampler::NUTS, N) =
+    mcmc_adapting_ϵ(sampler, N, adapting_ϵ(sampler.ϵ)...)
+
+"""
+    variable_matrix(posterior)
+
+Return the samples of the parameter vector as rows of a matrix.
+"""
+get_position_matrix(sample) = vcat(get_position.(sample)'...)
+
+
+# tuning and diagnostics
+
+
+"""
+    sample_cov(sample)
+
+Covariance matrix of the sample.
+"""
+sample_cov(sample) = cov(get_position_matrix(sample), 1)
+
+"""
+    NUTS_init(rng, ℓ, q; κ = GaussianKE(length(q)), p, max_depth, ϵ)
+
+Initialize a NUTS sampler for log density `ℓ` using local information.
+
+# Arguments
+
+- `rng`: the random number generator
+
+- `ℓ`: the likelihood function, should return a type that supports
+  `DiffResults.value` and `DiffResults.gradient`
+
+- `q`: initial position.
+
+- `κ`: kinetic energy specification. *Default*: Gaussian with identity matrix.
+
+- `p`: initial momentum. *Default*: random from standard multivariate normal.
+
+- `max_depth`: maximum tree depth. *Default*: `5`.
+
+- `ϵ`: initial stepsize, or parameters for finding it (passed on to
+  [`find_initial_stepsize`](@ref).
+"""
+function NUTS_init(rng, ℓ, q;
+                   κ = GaussianKE(length(q)),
+                   p = rand(rng, κ),
+                   max_depth = 5,
+                   ϵ = InitialStepsizeSearch(),
+                   report = ReportIO())
+    H = Hamiltonian(ℓ, κ)
+    z = phasepoint_in(H, q, p)
+    if !(ϵ isa Float64)
+        ϵ = find_initial_stepsize(ϵ, H, z)
+    end
+    NUTS(rng, H, q, ϵ, max_depth, report)
+end
+
+"""
+    NUTS_init(rng, ℓ, dim::Integer; args...)
+
+Random initialization with position `randn(dim)`, all other arguments are passed
+on the the other method of this function.
+"""
+NUTS_init(rng, ℓ, dim::Integer; args...) = NUTS_init(rng, ℓ, randn(dim); args...)
+
+
+# tuning: abstract interface
+
+"""
+A tuner that adapts the sampler.
+
+All subtypes support `length` which returns the number of steps (*note*: if not
+in field `N`, define `length` accordingly), other parameters vary.
+"""
+abstract type AbstractTuner end
+
+length(tuner::AbstractTuner) = tuner.N
+
+"""
+    sampler′ = tune(sampler, tune)
+
+Given a `sampler` (or similar a parametrization) and a `tuner`, return the
+updated sampler state after tuning.
+"""
+function tune end
+
+
+# tuning: tuner building blocks
+
+"Adapt the integrator stepsize for `N` samples."
+struct StepsizeTuner <: AbstractTuner
+    N::Int
+end
+
+show(io::IO, tuner::StepsizeTuner) =
+    print(io, "Stepsize tuner, $(tuner.N) samples")
+
+function tune(sampler::NUTS, tuner::StepsizeTuner)
+    @unpack rng, H, max_depth, report = sampler
+    sample, A = mcmc_adapting_ϵ(sampler, tuner.N)
+    NUTS(rng, H, sample[end].q, get_ϵ(A, false), max_depth, report)
+end
+
+"""
+Tune the integrator stepsize and covariance. Covariance tuning is from scratch
+(no prior information is used), regularized towards the identity matrix.
+"""
+struct StepsizeCovTuner{Tf} <: AbstractTuner
+    "Number of samples."
+    N::Int
+    """
+    Regularization factor for normalizing variance. An estimated covariance
+    matrix `Σ` is rescaled by `regularize/sample size`` towards `σ²I`, where
+    `σ²` is the median of the diagonal.
+    """
+    regularize::Tf
+end
+
+function show(io::IO, tuner::StepsizeCovTuner)
+    @unpack N, regularize = tuner
+    print(io, "Stepsize and covariance tuner, $(N) samples, regularization $(regularize)")
+end
+
+function tune(sampler::NUTS, tuner::StepsizeCovTuner)
+    @unpack regularize, N = tuner
+    @unpack rng, H, max_depth, report = sampler
+    sample, A = mcmc_adapting_ϵ(sampler, N)
+    Σ = sample_cov(sample)
+    Σ .+= (UniformScaling(median(diag(Σ)))-Σ) * regularize/N
+    κ = GaussianKE(Σ)
+    NUTS(rng, Hamiltonian(H.ℓ, κ), sample[end].q, get_ϵ(A), max_depth, report)
+end
+
+"Sequence of tuners, applied in the given order."
+struct TunerSequence{T} <: AbstractTuner
+    tuners::T
+end
+
+function show(io::IO, tuner::TunerSequence)
+    @unpack tuners = tuner
+    print(io, "Sequence of $(length(tuners)) tuners, $(length(tuner)) total samples")
+    for t in tuners
+        print(io, "\n  ")
+        show(io, t)
+    end
+end
+
+length(seq::TunerSequence) = sum(length, seq.tuners)
+
+"""
+    bracketed_doubling_tuner(; [init], [mid], [M], [term], [regularize])
+
+A sequence of tuners:
+
+1. tuning stepsize with `init` steps
+
+2. tuning stepsize and covariance: first with `mid` steps, then repeat with
+   twice the steps `M` times
+
+3. tuning stepsize with `term` steps
+
+`regularize` is used for covariance regularization.
+"""
+function bracketed_doubling_tuner(; init = 75, mid = 25, M = 5, term = 50,
+                                  regularize = 5.0, _...)
+    tuners = Union{StepsizeTuner, StepsizeCovTuner}[StepsizeTuner(init)]
+    for _ in 1:M
+        tuners = push!(tuners, StepsizeCovTuner(mid, regularize))
+        mid *= 2
+    end
+    push!(tuners, StepsizeTuner(term))
+    TunerSequence((tuners...))
+end
+
+function tune(sampler, seq::TunerSequence)
+    for tuner in seq.tuners
+        sampler = tune(sampler, tuner)
+    end
+    sampler
+end
+
+"""
+    $SIGNATURES
+
+Init, tune, and then draw `N` samples from `ℓ` using the NUTS algorithm.
+
+Return the *sample* (a vector of [`NUTS_transition`](@ref)s) and the *tuned
+sampler*.
+
+`rng` is the random number generator.
+
+`q_or_dim` is a starting position or the dimension (for random initialization).
+
+`args` are passed on to various methods, see [`NUTS_init`](@ref) and
+[`bracketed_doubling_tuner`](@ref).
+
+For parameters `q`, `ℓ(q)` should return an object that support the following
+methods: `DiffResults.value`, `DiffResults.gradient`.
+
+Most users would use this function, unless they are doing something that
+requires manual tuning.
+"""
+function NUTS_init_tune_mcmc(rng, ℓ, q_or_dim, N::Int; args...)
+    sampler_init = NUTS_init(rng, ℓ, q_or_dim; args...)
+    sampler_tuned = tune(sampler_init, bracketed_doubling_tuner(; args...))
+    mcmc(sampler_tuned, N), sampler_tuned
+end
+
+"""
+    $SIGNATURES
+
+Same as the other method, but with random number generator
+`Base.Random.GLOBAL_RNG`.
+"""
+NUTS_init_tune_mcmc(ℓ, q_or_dim, N::Int; args...) =
+    NUTS_init_tune_mcmc(Base.Random.GLOBAL_RNG, ℓ, q_or_dim, N; args...)
diff --git a/src/stepsize.jl b/src/stepsize.jl
new file mode 100644
index 0000000..1f620a6
--- /dev/null
+++ b/src/stepsize.jl
@@ -0,0 +1,269 @@
+# stepsize heuristics and adaptation
+
+"""
+Parameters for the search algorithm for the initial stepsize.
+
+The algorithm finds an initial stepsize ``ϵ`` so that the local acceptance ratio
+``A(ϵ)`` satisfies
+
+```math
+a_\\text{min} ≤ A(ϵ) ≤ a_\\text{max}
+```
+
+This is achieved by an initial bracketing, then bisection.
+
+$FIELDS
+
+!!! note
+
+    Cf. Hoffman and Gelman (2014), which does not ensure bounds for the
+    acceptance ratio, just that it has crossed a threshold. This version seems
+    to work better for some tricky posteriors with high curvature.
+"""
+struct InitialStepsizeSearch
+    "Lowest local acceptance rate."
+    a_min::Float64
+    "Highest local acceptance rate."
+    a_max::Float64
+    "Initial stepsize."
+    ϵ₀::Float64
+    "Scale factor for initial bracketing, > 1. *Default*: `2.0`."
+    C::Float64
+    "Maximum number of iterations for initial bracketing."
+    maxiter_crossing::Int
+    "Maximum number of iterations for bisection."
+    maxiter_bisect::Int
+    function InitialStepsizeSearch(; a_min = 0.25, a_max = 0.75, ϵ₀ = 1.0,
+                                   C = 2.0,
+                                   maxiter_crossing = 400, maxiter_bisect = 400)
+        @argcheck 0 < a_min < a_max < 1
+        @argcheck 0 < ϵ₀
+        @argcheck 1 < C
+        @argcheck maxiter_crossing ≥ 50
+        @argcheck maxiter_bisect ≥ 50
+        new(a_min, a_max, ϵ₀, C, maxiter_crossing, maxiter_bisect)
+    end
+end
+
+"""
+Find the stepsize for which the local acceptance rate `A(ϵ)` crosses `a`.
+
+    $SIGNATURES
+
+Return `ϵ₀, A(ϵ₀), ϵ₁`, A(ϵ₁)`, where `ϵ₀` and `ϵ₁` are stepsizes before and
+after crossing `a` with `A(ϵ)`, respectively.
+
+Assumes that ``A(ϵ₀) ∉ (a_\\text{min}, a_\\text{max})``, where the latter are
+defined in `parameters`.
+
+- `parameters`: parameters for the iteration.
+
+- `A`: local acceptance ratio (uncapped), a function of stepsize `ϵ`
+
+- `ϵ₀`, `Aϵ₀`: initial value of `ϵ`, and `A(ϵ₀)`
+"""
+function find_crossing_stepsize(parameters, A, ϵ₀, Aϵ₀ = A(ϵ₀))
+    @unpack a_min, a_max, C, maxiter_crossing = parameters
+    s, a = Aϵ₀ > a_max ? (1.0, a_max) : (-1.0, a_min)
+    if s < 0                    # when A(ϵ) < a,
+        C = 1/C                 # decrease ϵ
+    end
+    for _ in 1:maxiter_crossing
+        ϵ = ϵ₀ * C
+        Aϵ = A(ϵ)
+        if s*(Aϵ - a) ≤ 0
+            return ϵ₀, Aϵ₀, ϵ, Aϵ
+        else
+            ϵ₀ = ϵ
+            Aϵ₀ = Aϵ
+        end
+    end
+    # should never each this, miscoded log density?
+    dir = s > 0 ? "below" : "above"
+    error("Reached maximum number of iterations searching for ϵ from $(dir).")
+end
+
+"""
+Return the desired stepsize `ϵ` by bisection.
+
+    $SIGNATURES
+
+- `parameters`: algorithm parameters, see [`InitialStepsizeSearch`](@ref)
+
+- `A`: local acceptance ratio (uncapped), a function of stepsize `ϵ`
+
+- `ϵ₀`, `ϵ₁`, `Aϵ₀`, `Aϵ₁`: stepsizes and acceptance rates (latter optional).
+
+This function assumes that ``ϵ₀ < ϵ₁``, the stepsize is not yet acceptable, and
+the cached `A` values have the correct ordering.
+"""
+function bisect_stepsize(parameters, A, ϵ₀, ϵ₁, Aϵ₀ = A(ϵ₀), Aϵ₁ = A(ϵ₁))
+    @unpack a_min, a_max, maxiter_bisect = parameters
+    @argcheck ϵ₀ < ϵ₁
+    @argcheck Aϵ₀ > a_max && Aϵ₁ < a_min
+    for _ in 1:maxiter_bisect
+        ϵₘ = middle(ϵ₀, ϵ₁)
+        Aϵₘ = A(ϵₘ)
+        if a_min ≤ Aϵₘ ≤ a_max  # in
+            return ϵₘ
+        elseif Aϵₘ < a_min      # above
+            ϵ₁ = ϵₘ
+            Aϵ₁ = Aϵₘ
+        else                    # below
+            ϵ₀ = ϵₘ
+            Aϵ₀ = Aϵₘ
+        end
+    end
+    # should never each this, miscoded log density?
+    error("Reached maximum number of iterations while bisecting interval for ϵ.")
+end
+
+"""
+    $SIGNATURES
+
+Find an initial stepsize that matches the conditions of `parameters` (see
+[`InitialStepsizeSearch`](@ref)).
+
+`A` is the local acceptance ratio (uncapped). When given a Hamiltonian `H` and a
+phasepoint `z`, it will be calculated using [`local_acceptance_ratio`](@ref).
+"""
+function find_initial_stepsize(parameters::InitialStepsizeSearch, A)
+    @unpack a_min, a_max, ϵ₀ = parameters
+    Aϵ₀ = A(ϵ₀)
+    if a_min ≤ Aϵ₀ ≤ a_max
+        ϵ₀
+    else
+        ϵ₀, Aϵ₀, ϵ₁, Aϵ₁ = find_crossing_stepsize(parameters, A, ϵ₀, Aϵ₀)
+        if a_min ≤ Aϵ₁ ≤ a_max  # in interval
+            ϵ₁
+        elseif ϵ₀ < ϵ₁          # order as necessary
+            bisect_stepsize(parameters, A, ϵ₀, ϵ₁, Aϵ₀, Aϵ₁)
+        else
+            bisect_stepsize(parameters, A, ϵ₁, ϵ₀, Aϵ₁, Aϵ₀)
+        end
+    end
+end
+
+find_initial_stepsize(parameters::InitialStepsizeSearch, H, z) =
+    find_initial_stepsize(parameters, local_acceptance_ratio(H, z))
+
+"""
+    $(SIGNATURES)
+
+Return a function of the stepsize (``ϵ``) that calculates the local acceptance
+ratio for a single leapfrog step around `z` along the Hamiltonian `H`. Formally,
+let
+
+```math
+A(ϵ) = \\exp(\\text{neg_energy}(H, \\text{leapfrog}(H, z, ϵ)) - \\text{neg_energy}(H, z))
+```
+
+Note that the ratio is not capped by `1`, so it is not a valid probability
+*per se*.
+"""
+function local_acceptance_ratio(H, z)
+    target = neg_energy(H, z)
+    isfinite(target) ||
+        throw(DomainError(z.p, "Starting point has non-finite density."))
+    ϵ -> exp(neg_energy(H, leapfrog(H, z, ϵ)) - target)
+end
+
+"""
+    $(SIGNATURES)
+
+Return a matrix of [`local_acceptance_ratio`](@ref) values for stepsizes `ϵs`
+and the given momentums `ps`. The latter is calculated from random values when
+an integer is given.
+
+To facilitate plotting, ``-∞`` values are replaced by `NaN`.
+"""
+function explore_local_acceptance_ratios(H, q, ϵs, ps)
+    R = hcat([local_acceptance_ratio(H, q, p).(ϵs) for p in ps]...)
+    R[isinfinite.(R)] .= NaN
+    R
+end
+
+explore_local_acceptance_ratios(H, q, ϵs, N::Int) =
+    explore_local_acceptance_ratios(H, q, ϵs, [rand(H.κ) for _ in 1:N])
+
+"""
+Parameters for the dual averaging algorithm of Gelman and Hoffman (2014,
+Algorithm 6).
+
+To get reasonable defaults, initialize with
+`DualAveragingParameters(logϵ₀)`. See [`adapting_ϵ`](@ref) for a joint
+constructor.
+"""
+struct DualAveragingParameters{T}
+    μ::T
+    "target acceptance rate"
+    δ::T
+    "regularization scale"
+    γ::T
+    "relaxation exponent"
+    κ::T
+    "offset"
+    t₀::Int
+    function DualAveragingParameters{T}(μ, δ, γ, κ, t₀) where {T}
+        @argcheck 0 < δ < 1
+        @argcheck γ > 0
+        @argcheck 0.5 < κ ≤ 1
+        @argcheck t₀ ≥ 0
+        new(μ, δ, γ, κ, t₀)
+    end
+end
+
+DualAveragingParameters(μ::T, δ::T, γ::T, κ::T, t₀::Int) where T =
+    DualAveragingParameters{T}(μ, δ, γ, κ, t₀)
+
+DualAveragingParameters(logϵ₀; δ = 0.8, γ = 0.05, κ = 0.75, t₀ = 10) =
+    DualAveragingParameters(promote(log(10) + logϵ₀, δ, γ, κ)..., t₀)
+
+"Current state of adaptation for `ϵ`. Use `DualAverageingAdaptation(logϵ₀)` to
+get an initial value. See [`adapting_ϵ`](@ref) for a joint constructor."
+struct DualAveragingAdaptation{T <: AbstractFloat}
+    m::Int
+    H̄::T
+    logϵ::T
+    logϵ̄::T
+end
+
+"""
+    get_ϵ(A, tuning = true)
+
+When `tuning`, return the stepsize `ϵ` for the next HMC step. Otherwise return
+the tuned `ϵ`.
+"""
+get_ϵ(A::DualAveragingAdaptation, tuning = true) = exp(tuning ? A.logϵ : A.logϵ̄)
+
+DualAveragingAdaptation(logϵ₀) =
+    DualAveragingAdaptation(0, zero(logϵ₀), logϵ₀, zero(logϵ₀))
+
+"""
+    DA_params, A = adapting_ϵ(ϵ; args...)
+
+Constructor for both the adaptation parameters and the initial state.
+"""
+function adapting_ϵ(ϵ; args...)
+    logϵ = log(ϵ)
+    DualAveragingParameters(logϵ; args...), DualAveragingAdaptation(logϵ)
+end
+
+"""
+    A′ = adapt_stepsize(parameters, A, a)
+
+Update the adaptation `A` of log stepsize `logϵ` with average Metropolis
+acceptance rate `a` over the whole visited trajectory, using the dual averaging
+algorithm of Gelman and Hoffman (2014, Algorithm 6). Return the new adaptation.
+"""
+function adapt_stepsize(parameters::DualAveragingParameters,
+                        A::DualAveragingAdaptation, a)
+    @argcheck 0 ≤ a ≤ 1
+    @unpack μ, δ, γ, κ, t₀ = parameters
+    @unpack m, H̄, logϵ, logϵ̄ = A
+    m += 1
+    H̄ += (δ - a - H̄) / (m+t₀)
+    logϵ = μ - √m/γ*H̄
+    logϵ̄ += m^(-κ)*(logϵ - logϵ̄)
+    DualAveragingAdaptation(m, H̄, logϵ, logϵ̄)
+end
diff --git a/src/utilities.jl b/src/utilities.jl
new file mode 100644
index 0000000..694ff67
--- /dev/null
+++ b/src/utilities.jl
@@ -0,0 +1,8 @@
+"""
+    $SIGNATURES
+
+Random boolean which is `true` with the given probability `prob`.
+
+All random numbers in this library are obtained from this function.
+"""
+rand_bool{T <: AbstractFloat}(rng::AbstractRNG, prob::T) = rand(rng, T) ≤ prob
diff --git a/test/REQUIRE b/test/REQUIRE
index b8aeab9..23186c2 100644
--- a/test/REQUIRE
+++ b/test/REQUIRE
@@ -1,3 +1,6 @@
+Compat
 Distributions
+Documenter
 ForwardDiff
 MCMCDiagnostics
+Suppressor
diff --git a/test/runtests.jl b/test/runtests.jl
index 6b30540..5e45b9b 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -9,3 +9,6 @@ include("test-tuners.jl")
 include("test-sample-normal.jl")
 include("test-normal-mcmc.jl")
 include("test-statistics.jl")
+include("test-reporting.jl")
+
+include("../docs/make.jl")
diff --git a/test/setup-and-utilities.jl b/test/setup-and-utilities.jl
index f6251ca..2de8c3f 100644
--- a/test/setup-and-utilities.jl
+++ b/test/setup-and-utilities.jl
@@ -5,7 +5,7 @@ import DynamicHMC:
 
 using Base.Test
 
-using ArgCheck
+using ArgCheck: @argcheck
 using DataStructures
 using DiffResults
 using Distributions
@@ -13,6 +13,7 @@ import ForwardDiff: gradient
 using MCMCDiagnostics
 using Parameters
 using StatsBase
+using Suppressor
 
 "RNG for consistent test environment"
 const RNG = srand(UInt32[0x23ef614d, 0x8332e05c, 0x3c574111, 0x121aa2f4])
diff --git a/test/test-normal-mcmc.jl b/test/test-normal-mcmc.jl
index e3011f2..6a0d34d 100644
--- a/test/test-normal-mcmc.jl
+++ b/test/test-normal-mcmc.jl
@@ -104,7 +104,8 @@ end
                    5.57947 -0.0540131 1.78163 1.73862 -2.99741 3.6118 10.215 9.60671;
                    7.28634 1.79718 -0.0821483 2.55874 -1.95031 5.22626 9.60671 11.5554])
     for ℓ in [ℓ0, ℓ1, ℓ2, ℓ3]
-        sample, nuts = NUTS_init_tune_mcmc(RNG, ℓ, length(ℓ), 1000)
+        sample, nuts = NUTS_init_tune_mcmc(RNG, ℓ, length(ℓ), 1000;
+                                           report = ReportSilent())
         @test EBFMI(sample) ≥ 0.3
         @test maximum(R̂(nuts, 1000, 3)) ≤ 1.05
         zs = zvalue.([sample], mean_cov_ztests(ℓ))
@@ -117,7 +118,8 @@ end
     for _ in 1:100
         K = rand(2:10)
         ℓ = MvNormal(randn(K), full(rand_Σ(K)))
-        sample, nuts = NUTS_init_tune_mcmc(RNG, ℓ, K, 1000)
+        sample, nuts = NUTS_init_tune_mcmc(RNG, ℓ, K, 1000;
+                                           report = ReportSilent())
         @test EBFMI(sample) ≥ 0.3
         @test maximum(R̂(nuts, 1000, 3)) ≤ 1.05
         zs = zvalue.([sample], mean_cov_ztests(ℓ))
diff --git a/test/test-reporting.jl b/test/test-reporting.jl
new file mode 100644
index 0000000..dc6aafa
--- /dev/null
+++ b/test/test-reporting.jl
@@ -0,0 +1,19 @@
+@testset "reporting" begin
+    ℓ = MvNormal(zeros(3), ones(3))
+    @color_output false begin
+        output = @capture_err begin
+            sample, nuts = NUTS_init_tune_mcmc(RNG, ℓ, length(ℓ), 1000;
+                                               report = ReportIO())
+        end
+    end
+    function expectedA(msg, n)
+        r = "$msg \\($(n) steps\\)\\n"
+        for i in 100:100:n
+            r *= "step $(i)/$(n), \\d+\\.\\d+ s/step\\n"
+        end
+        r *=  " \\.\\.\\.done\\n"
+    end
+    raw_regex = join(expectedA.(vcat(fill("MCMC, adapting ϵ", 7), ["MCMC"]),
+                                [75, 25, 50, 100, 200, 400, 50, 1000]), "")
+    @test ismatch(Regex(raw_regex), output)
+end