Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/agent-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ jobs:
- name: R CMD check
uses: r-lib/actions/check-r-package@v2
with:
args: '"--no-build-vignettes"'
build_args: '"--no-build-vignettes"'
args: 'c("--no-build-vignettes", "--no-manual", "--ignore-vignettes")'
build_args: 'c("--no-build-vignettes", "--no-manual")'
error-on: '"error"'

- name: Run filtered tests
if: inputs.test_filter != ''
Expand Down
15 changes: 13 additions & 2 deletions R/MaximizeParsimony.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,11 @@
wagnerBias = 1L, wagnerBiasTemp = 0.3,
nniFirst = TRUE, sprFirst = FALSE,
outerCycles = 1L,
consensusStableReps = 2L
consensusStableReps = 2L,
# PCSA: 3 SA+TBR cycles as escape mechanism (T-207).
# Benchmarked at 125–205 tips EW: reduces variance 6× (SD 62→10)
# and improves mean score by 40–100 steps vs cold-only TBR.
saCycles = 3L, saTstart = 20.0, saNphases = 5L
)
)

Expand Down Expand Up @@ -853,7 +857,14 @@ MaximizeParsimony <- function(
else ctrl$outerCycles),
adaptiveStart = as.logical(
if (is.null(ctrl$adaptiveStart)) FALSE
else ctrl$adaptiveStart)
else ctrl$adaptiveStart),
# SA params packed as numeric vector: [cycles, t_start, t_end, n_phases, moves_per_phase]
saParams = c(
as.double(if (is.null(ctrl$saCycles)) 0 else ctrl$saCycles),
as.double(if (is.null(ctrl$saTstart)) 20.0 else ctrl$saTstart),
as.double(if (is.null(ctrl$saTend)) 0.0 else ctrl$saTend),
as.double(if (is.null(ctrl$saNphases)) 5 else ctrl$saNphases),
as.double(if (is.null(ctrl$saMovesPerPhase)) 0 else ctrl$saMovesPerPhase))
)
result <- do.call(ts_driven_search, c(searchArgs, consArgs, profileArgs,
hsjArgs, xformArgs))
Expand Down
12 changes: 10 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ ts_xss_search <- function(edge, contrast, tip_data, weight, levels, nPartitions
.Call(`_TreeSearch_ts_xss_search`, edge, contrast, tip_data, weight, levels, nPartitions, xssRounds, acceptEqual, ratchetCycles, maxHits, min_steps, concavity)
}

ts_driven_search <- function(contrast, tip_data, weight, levels, maxReplicates = 100L, targetHits = 10L, tbrMaxHits = 1L, ratchetCycles = 10L, ratchetPerturbProb = 0.04, ratchetPerturbMode = 0L, ratchetPerturbMaxMoves = 0L, ratchetAdaptive = FALSE, driftCycles = 6L, driftAfdLimit = 3L, driftRfdLimit = 0.1, xssRounds = 3L, xssPartitions = 4L, rssRounds = 1L, cssRounds = 1L, cssPartitions = 4L, sectorMinSize = 6L, sectorMaxSize = 50L, fuseInterval = 3L, fuseAcceptEqual = FALSE, poolMaxSize = 100L, poolSuboptimal = 0.0, maxSeconds = 0.0, verbosity = 0L, min_steps = integer(), concavity = -1.0, consSplitMatrix = NULL, consContrast = NULL, consTipData = NULL, consWeight = NULL, consLevels = NULL, consExpectedScore = 0L, infoAmounts = NULL, tabuSize = 100L, wagnerStarts = 1L, progressCallback = NULL, nThreads = 1L, startEdge = NULL, sprFirst = FALSE, nniFirst = TRUE, hierarchyBlocks = NULL, hsjTipLabels = NULL, hsjAlpha = 1.0, hsjAbsentState = 0L, xformChars = NULL, xpiwe = FALSE, xpiwe_r = 0.5, xpiwe_max_f = 5.0, obs_count = integer(), consensusStableReps = 0L, adaptiveLevel = FALSE, consensusConstrain = FALSE, nniPerturbCycles = 0L, nniPerturbFraction = 0.5, wagnerBias = 0L, wagnerBiasTemp = 0.3, outerCycles = 1L, adaptiveStart = FALSE) {
.Call(`_TreeSearch_ts_driven_search`, contrast, tip_data, weight, levels, maxReplicates, targetHits, tbrMaxHits, ratchetCycles, ratchetPerturbProb, ratchetPerturbMode, ratchetPerturbMaxMoves, ratchetAdaptive, driftCycles, driftAfdLimit, driftRfdLimit, xssRounds, xssPartitions, rssRounds, cssRounds, cssPartitions, sectorMinSize, sectorMaxSize, fuseInterval, fuseAcceptEqual, poolMaxSize, poolSuboptimal, maxSeconds, verbosity, min_steps, concavity, consSplitMatrix, consContrast, consTipData, consWeight, consLevels, consExpectedScore, infoAmounts, tabuSize, wagnerStarts, progressCallback, nThreads, startEdge, sprFirst, nniFirst, hierarchyBlocks, hsjTipLabels, hsjAlpha, hsjAbsentState, xformChars, xpiwe, xpiwe_r, xpiwe_max_f, obs_count, consensusStableReps, adaptiveLevel, consensusConstrain, nniPerturbCycles, nniPerturbFraction, wagnerBias, wagnerBiasTemp, outerCycles, adaptiveStart)
ts_driven_search <- function(contrast, tip_data, weight, levels, maxReplicates = 100L, targetHits = 10L, tbrMaxHits = 1L, ratchetCycles = 10L, ratchetPerturbProb = 0.04, ratchetPerturbMode = 0L, ratchetPerturbMaxMoves = 0L, ratchetAdaptive = FALSE, driftCycles = 6L, driftAfdLimit = 3L, driftRfdLimit = 0.1, xssRounds = 3L, xssPartitions = 4L, rssRounds = 1L, cssRounds = 1L, cssPartitions = 4L, sectorMinSize = 6L, sectorMaxSize = 50L, fuseInterval = 3L, fuseAcceptEqual = FALSE, poolMaxSize = 100L, poolSuboptimal = 0.0, maxSeconds = 0.0, verbosity = 0L, min_steps = integer(), concavity = -1.0, consSplitMatrix = NULL, consContrast = NULL, consTipData = NULL, consWeight = NULL, consLevels = NULL, consExpectedScore = 0L, infoAmounts = NULL, tabuSize = 100L, wagnerStarts = 1L, progressCallback = NULL, nThreads = 1L, startEdge = NULL, sprFirst = FALSE, nniFirst = TRUE, hierarchyBlocks = NULL, hsjTipLabels = NULL, hsjAlpha = 1.0, hsjAbsentState = 0L, xformChars = NULL, xpiwe = FALSE, xpiwe_r = 0.5, xpiwe_max_f = 5.0, obs_count = integer(), consensusStableReps = 0L, adaptiveLevel = FALSE, consensusConstrain = FALSE, nniPerturbCycles = 0L, nniPerturbFraction = 0.5, wagnerBias = 0L, wagnerBiasTemp = 0.3, outerCycles = 1L, adaptiveStart = FALSE, saParams = as.numeric( c(0, 20.0, 0.0, 5, 0))) {
.Call(`_TreeSearch_ts_driven_search`, contrast, tip_data, weight, levels, maxReplicates, targetHits, tbrMaxHits, ratchetCycles, ratchetPerturbProb, ratchetPerturbMode, ratchetPerturbMaxMoves, ratchetAdaptive, driftCycles, driftAfdLimit, driftRfdLimit, xssRounds, xssPartitions, rssRounds, cssRounds, cssPartitions, sectorMinSize, sectorMaxSize, fuseInterval, fuseAcceptEqual, poolMaxSize, poolSuboptimal, maxSeconds, verbosity, min_steps, concavity, consSplitMatrix, consContrast, consTipData, consWeight, consLevels, consExpectedScore, infoAmounts, tabuSize, wagnerStarts, progressCallback, nThreads, startEdge, sprFirst, nniFirst, hierarchyBlocks, hsjTipLabels, hsjAlpha, hsjAbsentState, xformChars, xpiwe, xpiwe_r, xpiwe_max_f, obs_count, consensusStableReps, adaptiveLevel, consensusConstrain, nniPerturbCycles, nniPerturbFraction, wagnerBias, wagnerBiasTemp, outerCycles, adaptiveStart, saParams)
}

ts_resample_search <- function(contrast, tip_data, weight, levels, bootstrap = FALSE, jackProportion = 2.0 / 3.0, maxReplicates = 5L, targetHits = 2L, tbrMaxHits = 1L, ratchetCycles = 3L, ratchetPerturbProb = 0.04, driftCycles = 0L, min_steps = integer(), concavity = -1.0, consSplitMatrix = NULL, consContrast = NULL, consTipData = NULL, consWeight = NULL, consLevels = NULL, consExpectedScore = 0L, infoAmounts = NULL, xpiwe = FALSE, xpiwe_r = 0.5, xpiwe_max_f = 5.0, obs_count = integer()) {
Expand Down Expand Up @@ -184,6 +184,14 @@ ts_wagner_bias_bench <- function(contrast, tip_data, weight, levels, min_steps,
.Call(`_TreeSearch_ts_wagner_bias_bench`, contrast, tip_data, weight, levels, min_steps, concavity, bias, temperature, n_reps, run_tbr)
}

ts_anneal_diag <- function(contrast, tip_data, weight, levels, min_steps = integer(), concavity = -1.0, t_start = 20.0, t_end = 0.0, n_phases = 10L, moves_per_phase = 0L, tbr_polish = TRUE, tbr_first = FALSE, sa_cycles = 1L, startEdge = NULL) {
.Call(`_TreeSearch_ts_anneal_diag`, contrast, tip_data, weight, levels, min_steps, concavity, t_start, t_end, n_phases, moves_per_phase, tbr_polish, tbr_first, sa_cycles, startEdge)
}

ts_parallel_temper_diag <- function(contrast, tip_data, weight, levels, min_steps = integer(), concavity = -1.0, n_chains = 4L, temperatures = numeric(), rounds = 5L, moves_per_round = 0L, score_transfer = FALSE) {
.Call(`_TreeSearch_ts_parallel_temper_diag`, contrast, tip_data, weight, levels, min_steps, concavity, n_chains, temperatures, rounds, moves_per_round, score_transfer)
}

ts_test_strategy_tracker <- function(seed, n_draws) {
.Call(`_TreeSearch_ts_test_strategy_tracker`, seed, n_draws)
}
Expand Down
34 changes: 32 additions & 2 deletions R/SearchControl.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,20 @@
#' starting strategy from a pool of options (random Wagner, biased Wagner,
#' random tree, pool ratchet, pool NNI-perturb), adapting to which
#' strategies yield the best scores. Default `FALSE`.
#' @param saCycles Integer; number of simulated annealing perturbation
#' cycles (PCSA) per replicate. Each cycle perturbs the current best tree
#' via scheduled SA cooling, then reconverges with TBR. If the result
#' improves on the best, it becomes the new starting point. Effective at
#' escaping deep basins under equal-weights parsimony at ≥100 tips.
#' 0 (default) disables SA perturbation.
#' @param saTstart Numeric; initial Boltzmann temperature for SA cooling
#' schedule (default 20).
#' @param saTend Numeric; final Boltzmann temperature (default 0, strict
#' descent at end of each cycle).
#' @param saNphases Integer; number of temperature steps in the linear
#' cooling schedule per SA cycle (default 5).
#' @param saMovesPerPhase Integer; stochastic TBR moves per temperature
#' phase. 0 (default) = one move per tip (n_tip).
#'
#' @return A named list of class `"SearchControl"`.
#'
Expand Down Expand Up @@ -160,7 +174,16 @@ SearchControl <- function(
# When TRUE, each replicate draws its starting strategy via Thompson
# sampling from {Wagner-random, Wagner-Goloboff, Wagner-entropy,
# random-tree, pool-ratchet, pool-NNI-perturb}. Overrides wagnerBias.
adaptiveStart = FALSE
adaptiveStart = FALSE,
# Simulated annealing perturbation (PCSA, T-207)
# Multi-cycle SA with best-tree restart after drift. Each cycle:
# SA cooling → TBR reconverge → keep if improved.
# 0L = disabled. Typical: 3–5 cycles for large EW trees.
saCycles = 0L,
saTstart = 20.0,
saTend = 0.0,
saNphases = 5L,
saMovesPerPhase = 0L
) {
structure(
list(
Expand Down Expand Up @@ -196,7 +219,12 @@ SearchControl <- function(
consensusStableReps = as.integer(consensusStableReps),
adaptiveLevel = as.logical(adaptiveLevel),
consensusConstrain = as.logical(consensusConstrain),
adaptiveStart = as.logical(adaptiveStart)
adaptiveStart = as.logical(adaptiveStart),
saCycles = as.integer(saCycles),
saTstart = as.double(saTstart),
saTend = as.double(saTend),
saNphases = as.integer(saNphases),
saMovesPerPhase = as.integer(saMovesPerPhase)
),
class = "SearchControl"
)
Expand All @@ -216,6 +244,8 @@ print.SearchControl <- function(x, ...) {
"sectorMinSize", "sectorMaxSize"),
"Fuse/Pool" = c("fuseInterval", "fuseAcceptEqual",
"poolMaxSize", "poolSuboptimal"),
"SA Perturbation" = c("saCycles", "saTstart", "saTend",
"saNphases", "saMovesPerPhase"),
"Stopping" = c("consensusStableReps", "adaptiveLevel",
"consensusConstrain", "adaptiveStart")
)
Expand Down
149 changes: 149 additions & 0 deletions dev/pt_diagnostic_profiling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
# T-199: PT diagnostic profiling across size range
# Run ts_parallel_temper_diag() on 8 representative matrices with default
# temperature ladder {0, 3, 9, 27} to identify where swaps become useful.

.libPaths(c("../.builds/TreeSearch-C", .libPaths()))
library(TreeSearch.C)
library(TreeTools)

ts_parallel_temper_diag <- get("ts_parallel_temper_diag",
envir = asNamespace("TreeSearch.C"))

matrices <- c(
small_1 = "project3419.nex", # 40 tips, 368 chars
small_2 = "project2604.nex", # 43 tips, 307 chars
medium_1 = "project4531.nex", # 71 tips, 256 chars
medium_2 = "project3741.nex", # 86 tips, 110 chars
large_1 = "project3253.nex", # 125 tips, 394 chars
large_2 = "project1221.nex", # 150 tips, 252 chars
xlarge_1 = "project3763.nex", # 205 tips, 105 chars
xlarge_2 = "project2722.nex" # 385 tips, 520 chars
)

prepare_for_diag <- function(dataset) {
at <- attributes(dataset)
contrast <- at$contrast
tip_data <- matrix(unlist(dataset, use.names = FALSE),
nrow = length(dataset), byrow = TRUE)
list(
contrast = contrast,
tip_data = tip_data,
weight = at$weight,
levels = at$levels,
min_steps = integer(0)
)
}

n_chains <- 4L
temps <- c(0, 3, 9, 27)
rounds <- 10L
moves_per_round <- 0L # default = n_tip

results <- list()

for (nm in names(matrices)) {
cat("\n=== ", nm, " (", matrices[nm], ") ===\n", sep = "")
path <- file.path("../matrixes", matrices[nm])
if (!file.exists(path)) {
cat(" SKIP: file not found\n")
next
}

res <- tryCatch({
dataset <- ReadAsPhyDat(path)
ntip <- length(dataset)
nchar_total <- sum(attr(dataset, "weight"))
cat(" Tips:", ntip, " Chars:", nchar_total, "\n")

pd <- prepare_for_diag(dataset)

set.seed(8127)
t0 <- proc.time()
diag <- ts_parallel_temper_diag(
contrast = pd$contrast,
tip_data = pd$tip_data,
weight = pd$weight,
levels = pd$levels,
min_steps = pd$min_steps,
concavity = -1,
n_chains = n_chains,
temperatures = temps,
rounds = rounds,
moves_per_round = moves_per_round
)
elapsed <- (proc.time() - t0)[["elapsed"]]

cl <- diag$chain_log
sl <- diag$swap_log

cat(" Best score:", diag$best_score, "\n")
cat(" Cold final:", diag$cold_final, "\n")
cat(" Cold improvements from swaps:", diag$cold_improvements, "\n")
cat(" Swap acceptance (overall):",
diag$swaps_accepted, "/", diag$swaps_attempted, "\n")

if (nrow(sl) > 0) {
pair_rates <- tapply(sl$accepted, paste(sl$pair_lo, sl$pair_hi, sep = "-"),
mean)
cat(" Pair acceptance rates:\n")
for (p in names(pair_rates)) {
cat(" Pair", p, ":", sprintf("%.1f%%", pair_rates[p] * 100), "\n")
}
}

cat(" Timing: cold_tbr=", round(diag$cold_tbr_ms, 1), "ms",
" hot=", round(diag$hot_stochastic_ms, 1), "ms",
" total=", round(diag$total_pt_ms, 1), "ms",
" (wall:", round(elapsed, 1), "s)\n")
cat(" Hot overhead:", sprintf("%.1f%%",
diag$hot_stochastic_ms / max(diag$total_pt_ms, 0.01) * 100), "\n")

# Per-chain score trajectories
cat(" Per-chain final scores:\n")
for (ch in sort(unique(cl$chain))) {
ch_rows <- cl[cl$chain == ch, ]
cat(" Chain", ch, "(T=", temps[ch + 1], "):",
"start=", ch_rows$score_before[1],
"end=", tail(ch_rows$score_after, 1),
"accept_rate=", sprintf("%.1f%%",
sum(ch_rows$n_accepted) / max(sum(ch_rows$n_attempted), 1) * 100),
"\n")
}

results[[nm]] <- list(
diag = diag, ntip = ntip, nchar = nchar_total,
chain_log = cl, swap_log = sl
)
NULL # success
}, error = function(e) {
cat(" ERROR:", conditionMessage(e), "\n")
})
}

# Summary table
cat("\n\n=== SUMMARY TABLE ===\n")
rows <- lapply(names(results), function(nm) {
r <- results[[nm]]
d <- r$diag
sl <- r$swap_log

# Pair-0-1 acceptance (cold-warm boundary)
pair01 <- if (nrow(sl) > 0) {
p01 <- sl[sl$pair_lo == 0 & sl$pair_hi == 1, ]
if (nrow(p01) > 0) mean(p01$accepted) else NA
} else NA

data.frame(
matrix = nm, ntip = r$ntip, nchar = r$nchar,
best = d$best_score,
swap_rate = if (d$swaps_attempted > 0) d$swaps_accepted / d$swaps_attempted else 0,
pair01_rate = pair01,
cold_improve = d$cold_improvements,
cold_ms = d$cold_tbr_ms,
hot_ms = d$hot_stochastic_ms,
total_ms = d$total_pt_ms,
stringsAsFactors = FALSE
)
})
summary_df <- do.call(rbind, rows)
print(summary_df, digits = 3)
Loading