ms609 · ms609 · Apr 1, 2026 · Mar 19, 2026 · Mar 19, 2026 · Mar 20, 2026
diff --git a/.github/workflows/ASan.yml b/.github/workflows/ASan.yml
@@ -55,8 +55,9 @@ jobs:
         run: |
           export LD_PRELOAD=$(gcc -print-file-name=libasan.so)
 
-          echo "PKG_CFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" > src/Makevars
-          echo "PKG_CXXFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> src/Makevars
+          # Replace PKG_CXXFLAGS in-place; preserve PKG_CPPFLAGS and PKG_LIBS
+          sed -i 's/^PKG_CXXFLAGS.*/PKG_CXXFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer/' src/Makevars
+          echo "PKG_CFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> src/Makevars
 
           mkdir ~/.R
           echo "LDFLAGS = -g -O0 -fsanitize=address -fno-omit-frame-pointer" >> ~/.R/Makevars

diff --git a/.positai/settings.json b/.positai/settings.json
@@ -28,6 +28,17 @@
       "rm -f src/*.o src/*.dll": "allow",
       "tail *": "allow",
       "wc *": "allow"
+    },
+    "external_directory": {
+      "C:/Users/pjjg18/GitHub/TreeSearch-a/*": "allow",
+      "C:/Users/pjjg18/.positai/skills/r-package-profiling/references/*": "allow",
+      "../Quartet/*": "allow",
+      "../TreeTools/*": "allow"
+    },
+    "webfetch": {
+      "https://github.com/*": "allow",
+      "https://api.github.com/*": "allow",
+      "https://raw.githubusercontent.com/*": "allow"
     }
   },
   "model": {

diff --git a/AGENTS.md b/AGENTS.md
@@ -257,6 +257,20 @@ source("benchmark/bench-tree-distances.R")
 C++ compilation flags are controlled by `src/Makevars.win` (Windows) / `src/Makevars`.
 The package requires **C++17** (`CXX_STD = CXX17`).
 
+### Documentation and spelling checks
+
+After any work that adds or modifies roxygen comments, Rd files, NEWS.md, or
+vignettes, run:
+
+```r
+devtools::check_man()                # regenerates Rd files and checks for issues
+spelling::spell_check_package()      # flags potential misspellings
+```
+
+Legitimate technical terms, proper nouns, and code identifiers flagged by the
+spell checker should be added to `inst/WORDLIST` (one word per line,
+alphabetically sorted).  Only fix actual typos in the source.
+
 ### Code coverage
 
 Check that existing tests cover all new code.  The GHA test suite uses codecov.
@@ -271,6 +285,26 @@ covr::file_coverage(cov, "src/pairwise_distances.cpp")  # per-file summary
 Aim for full line coverage of new C++ and R code.  If a new code path is not
 exercised by the existing test suite, add targeted tests in `tests/testthat/`.
 
+### ⚠ Exploratory / risky R code — use a subprocess
+
+When running experimental R code that may be slow, allocate large objects,
+or involve complex loops (e.g., hill-climbing over tree space, brute-force
+evaluation of many candidate trees), **run it in a subprocess** rather than
+the interactive RStudio session.  Long-running or memory-heavy computations
+in the main session can freeze or crash RStudio.
+
+```r
+# Write the experiment to a temp script, then run via Rscript:
+writeLines(code, tmp <- tempfile(fileext = ".R"))
+system2("Rscript", tmp, stdout = TRUE, stderr = TRUE)
+
+# Or use callr for structured subprocess execution:
+callr::r(function() { ... }, timeout = 120)
+```
+
+This applies especially to prototype algorithm exploration (e.g., CID
+hill-climbing over split space) where per-iteration cost is uncertain.
+
 ---
 
 ## Completed Optimizations (this dev cycle)
@@ -1143,6 +1177,101 @@ for the cross-pairs case.
 
 ---
 
+## LinkingTo Header Exposure (`expose-lapjv` branch)
+
+Extracted LAP and MCI C++ APIs into `inst/include/TreeDist/` headers so that
+downstream packages (e.g., TreeSearch) can use `LinkingTo: TreeDist`:
+
+| Header | Content |
+|--------|---------|
+| `types.h` | `cost`, `lap_dim`, `lap_row`, `lap_col`, constants |
+| `cost_matrix.h` | `CostMatrix` class (Rcpp-free) |
+| `lap_scratch.h` | `LapScratch` struct |
+| `lap.h` | `lap()` declarations |
+| `lap_impl.h` | `lap()` implementation (include in exactly one TU) |
+| `mutual_clustering.h` | MCI declarations |
+| `mutual_clustering_impl.h` | MCI implementation (include in exactly one TU) |
+
+`src/lap.h` is now a thin wrapper that includes `<TreeDist/lap.h>` and
+re-exports types to global scope.
+
+### LAPJV codegen regression (diagnosed, characterised, accepted)
+
+Including `lap_impl.h` in `lap.cpp` changed the TU context enough for GCC 14's
+register allocator to produce ~8% more instructions in the Dijkstra hot loop,
+causing a consistent 20–25% regression on standalone LAPJV (n ≥ 400).
+
+**Root cause:** the installed-header version of `CostMatrix` (in
+`inst/include/TreeDist/cost_matrix.h`) has a different method set than main's
+monolithic `src/lap.h` (extra methods like `setWithTranspose()`, `dim8()`;
+missing test variants like `findRowSubminNaive`).  This changes GCC's
+optimization heuristics for the entire TU, even though `lap()` never calls
+the differing methods.
+
+**Fix:** define `lap()` directly in `lap.cpp` (not via `#include <TreeDist/lap_impl.h>`)
+with `__attribute__((optimize("align-functions=64", "align-loops=16")))`.
+The `lapjv()` wrapper fills the transposed buffer first (matching R's
+column-major storage) then untransposes — restoring the cache-friendly pattern.
+
+**VTune profiling (vtune-lap/, 2026-04-01):**  Profiled with `-O2 -g
+-fno-omit-frame-pointer` on the current branch (LAPJV 1999×1999 ×30s +
+400×400 ×30s + CID/MSD random 200-tip ×30s each).
+
+| Function | CPU Time | % | Notes |
+|---|---|---|---|
+| `TreeDist::CostMatrix::findRowSubmin` | 23.8s | 19.8% | Inlined into lap() at -O2 (separate symbol with -g) |
+| `TreeDist::lap` | 23.4s | 19.5% | Dijkstra phase dominates |
+| `msd_score` | 9.4s | 7.8% | Contains inlined lap() |
+| `mutual_clustering_score` | 9.3s | 7.7% | Contains inlined lap() |
+| `lapjv` (R wrapper) | 6.0s | 5.0% | Matrix conversion overhead |
+
+Source-line breakdown of `lap()`:
+
+| Phase | Lines | CPU Time (s) | % of lap() |
+|---|---|---|---|
+| Dijkstra inner update loop | 308–325 | ~16.5 | 72% |
+| Dijkstra min-scan | 274–287 | ~3.5 | 15% |
+| Matrix fill (lapjv wrapper) | 61–65 | ~2.7 | 12% |
+| findRowSubmin (reduction) | 173–184 | ~2.0 | 9% |
+
+**Optimisation attempts:**
+
+1. **`__restrict__` on Dijkstra pointers** (DONE, kept): Added restrict-qualified
+   local pointers (`d_ptr`, `pred_ptr`, `cl_ptr`) for the augment-solution phase
+   to eliminate potential alias reloads. Applied to both `lap.cpp` and `lap_impl.h`.
+   Benchmark: no measurable change in regression magnitude, but correct optimisation.
+
+2. **`#pragma GCC optimize("O3")`** (attempted, reverted): Forced O3 for the entire
+   `lap.cpp` TU. Benchmark: no measurable improvement; same alignment pattern.
+
+3. **Per-object Makevars flags** (attempted, failed): R's build system does not support
+   per-target variable overrides in `Makevars.win`.
+
+**Residual regression (confirmed, alignment-dependent):**
+
+| LAPJV size | dev/ref ratio | Direction |
+|---|---|---|
+| 400×400 | 0.81 | **Dev 19% faster** (alignment win) |
+| 1999×1999 | 1.08–1.13 | **Dev 8–13% slower** (alignment loss) |
+
+The regression is alignment-sensitive and manifests differently at different problem
+sizes. The same codegen change that makes 400×400 faster makes 1999×1999 slower.
+This is the classic instruction-alignment lottery: the Dijkstra inner loop's branch
+prediction and instruction fetch are affected by code placement that varies with
+TU context.
+
+**Conclusion:** The regression cannot be reliably eliminated without matching main's
+exact TU context (adding dead code back to the installed header, which is
+unacceptable for CRAN). **Tree distance metrics are completely unaffected** —
+`lap()` is called from `pairwise_distances.cpp` (different TU context), and
+benchmarks confirm neutral performance across all metrics and tree sizes.
+
+**Maintenance note:** if the `lap()` algorithm changes, update BOTH `src/lap.cpp`
+and `inst/include/TreeDist/lap_impl.h`.  The duplication is intentional — it
+preserves the TU context that was profiled and tuned.
+
+---
+
 ## Remaining Optimization Opportunities
 
 - Sort+merge pre-scan for `rf_info_score`: **DONE** — replaced O(n²) loop with

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: TreeDist
 Title: Calculate and Map Distances Between Phylogenetic Trees
-Version: 2.13.0.9001
+Version: 2.13.0.9002
 Authors@R: c(person("Martin R.", "Smith",
                     email = "martin.smith@durham.ac.uk", 
                     role = c("aut", "cre", "cph", "prg"), 

diff --git a/NAMESPACE b/NAMESPACE
@@ -141,6 +141,10 @@ export(StrainCol)
 export(SumOfRanges)
 export(SumOfVariances)
 export(SumOfVars)
+export(TransferConsensus)
+export(TransferDist)
+export(TransferDistSplits)
+export(TransferDistance)
 export(TreeDistPlot)
 export(TreeDistance)
 export(TreesConsistentWithTwoSplits)
@@ -152,6 +156,7 @@ export(entropy_int)
 export(is.HPart)
 importFrom(Rdpack,reprompt)
 importFrom(TreeTools,AllAncestors)
+importFrom(TreeTools,Consensus)
 importFrom(TreeTools,DropTip)
 importFrom(TreeTools,FirstMatchingSplit)
 importFrom(TreeTools,KeepTip)
@@ -179,6 +184,7 @@ importFrom(TreeTools,RootTree)
 importFrom(TreeTools,SplitFrequency)
 importFrom(TreeTools,SplitInformation)
 importFrom(TreeTools,SplitsInBinaryTree)
+importFrom(TreeTools,StarTree)
 importFrom(TreeTools,TipLabels)
 importFrom(TreeTools,TipsInSplits)
 importFrom(TreeTools,TreeIsRooted)

diff --git a/NEWS.md b/NEWS.md
@@ -1,4 +1,21 @@
-# TreeDist 2.13.0.9001
+# TreeDist 2.13.0.9002
+
+## New features
+
+- `TransferConsensus()` constructs a consensus tree that minimizes the sum
+  of transfer distances to a set of input trees, using a greedy
+  add-and-prune heuristic.  Unlike majority-rule consensus, which can be
+  highly unresolved when phylogenetic signal is diffuse, the transfer
+  consensus uses the finer-grained transfer distance to produce more
+  resolved trees.
+
+- `TransferDist()` computes the transfer dissimilarity between phylogenetic
+  trees, with scaled and unscaled variants.  Supports all-pairs, cross-pairs,
+  and single-pair computations.
+
+- LAP (Jonker–Volgenant linear assignment) and MCI (Mutual Clustering
+  Information) C++ implementations are now exposed via `inst/include/TreeDist/`
+  headers, allowing downstream packages to use `LinkingTo: TreeDist`.
 
 ## Performance
 
@@ -78,7 +95,7 @@ Typical speedups over v2.12.0 for tree sets where most splits are shared
 - Cross-pairs computations (`tree1` vs `tree2` where both are lists) now
   use the same optimized batch path as all-pairs computations.
 
-### KendallColijn distance
+### Kendall & Colijn distance
 
 - `KCVector()` reimplemented in C++, giving ~220× speedup per tree.
 

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -204,6 +204,77 @@ spr_table_7 <- function(sp1, sp2) {
     .Call(`_TreeDist_spr_table_7`, sp1, sp2)
 }
 
+#' Transfer consensus (C++ implementation)
+#'
+#' @param splits_list List of raw matrices (one per tree), each from as.Splits().
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer distance?
+#' @param greedy_best_flag Logical: TRUE for "best", FALSE for "first".
+#' @param init_majority Logical: TRUE to start from majority-rule splits.
+#'
+#' @return A `LogicalVector` of length n_splits indicating which pooled splits
+#'   are included in the consensus, plus attributes "raw_splits" (a raw matrix
+#'   of all unique splits) and "light_side" (integer vector).
+#' @keywords internal
+cpp_transfer_consensus <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_threads = 1L) {
+    .Call(`_TreeDist_cpp_transfer_consensus`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_threads)
+}
+
+#' @keywords internal
+cpp_tc_profile <- function(splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter, n_threads = 1L) {
+    .Call(`_TreeDist_cpp_tc_profile`, splits_list, n_tip, scale, greedy_best_flag, init_majority, n_iter, n_threads)
+}
+
+#' Per-pair transfer dissimilarity
+#'
+#' @param x,y Raw matrices representing splits (from as.Splits()).
+#' @param nTip Integer: number of tips.
+#'
+#' @return A list with components:
+#'   - score_scaled: scaled transfer dissimilarity (double)
+#'   - score_unscaled: unscaled transfer dissimilarity (double)
+#'   - `matching_xy`: integer vector, best match in y for each split in x (1-based, NA if sentinel)
+#'   - `matching_yx`: integer vector, best match in x for each split in y (1-based, NA if sentinel)
+#' @keywords internal
+cpp_transfer_dist <- function(x, y, nTip) {
+    .Call(`_TreeDist_cpp_transfer_dist`, x, y, nTip)
+}
+
+#' @keywords internal
+cpp_transfer_dist_scored <- function(x, y, nTip, scale) {
+    .Call(`_TreeDist_cpp_transfer_dist_scored`, x, y, nTip, scale)
+}
+
+#' All-pairs transfer dissimilarity (OpenMP)
+#'
+#' @param splits_list List of raw matrices (one per tree).
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer dissimilarity?
+#' @param n_threads Number of OpenMP threads.
+#'
+#' @return Numeric vector of length choose(N,2) in dist order.
+#' @keywords internal
+cpp_transfer_dist_all_pairs <- function(splits_list, n_tip, scale, n_threads) {
+    .Call(`_TreeDist_cpp_transfer_dist_all_pairs`, splits_list, n_tip, scale, n_threads)
+}
+
+#' Cross-pairs transfer dissimilarity (OpenMP)
+#'
+#' @param splits_a,splits_b Lists of raw matrices.
+#' @param n_tip Number of tips.
+#' @param scale Logical: use scaled transfer dissimilarity?
+#' @param n_threads Number of OpenMP threads.
+#'
+#' @return Numeric matrix of dimension `nA` x `nB`.
+#' @keywords internal
+cpp_transfer_dist_cross_pairs <- function(splits_a, splits_b, n_tip, scale, n_threads) {
+    .Call(`_TreeDist_cpp_transfer_dist_cross_pairs`, splits_a, splits_b, n_tip, scale, n_threads)
+}
+
+cpp_mci_impl_score <- function(x, y, n_tips) {
+    .Call(`_TreeDist_cpp_mci_impl_score`, x, y, n_tips)
+}
+
 cpp_robinson_foulds_distance <- function(x, y, nTip) {
     .Call(`_TreeDist_cpp_robinson_foulds_distance`, x, y, nTip)
 }

diff --git a/R/hierarchical_mutual_information.R b/R/hierarchical_mutual_information.R
@@ -66,6 +66,10 @@ HierarchicalMutualInfo <- function(tree1, tree2 = NULL, normalize = FALSE) {
       1
     }
   } else {
+    if (inherits(tree1, "phylo") && inherits(tree2, "phylo") &&
+        NTip(tree1) != NTip(tree2)) {
+      stop("Trees must have the same number of leaves")
+    }
     hp2 <- as.HPart(tree2, tree1)
     hmi <- HMI_xptr(hp1, hp2)
     if (isFALSE(normalize)) {
@@ -132,6 +136,8 @@ HH <- SelfHMI
 #' @rdname HierarchicalMutualInfo
 #' @export
 EHMI <- function(tree1, tree2, precision = 0.01, minResample = 36) {
+  if (minResample < 2L) stop("Must perform at least one resampling")
+  if (precision < 1e-8) stop("Tolerance too low")
   EHMI_xptr(as.HPart(tree1), as.HPart(tree2), as.numeric(precision),
                 as.integer(minResample)) / log(2)
 }
@@ -162,6 +168,8 @@ EHMI <- function(tree1, tree2, precision = 0.01, minResample = 36) {
 #' @rdname HierarchicalMutualInfo
 #' @export
 AHMI <- function(tree1, tree2, Mean = max, precision = 0.01, minResample = 36) {
+  if (minResample < 2L) stop("Must perform at least one resampling")
+  if (precision < 1e-8) stop("Tolerance too low")
   hp1 <- as.HPart(tree1)
   hp2 <- as.HPart(tree2, hp1)