Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: TreeTools
Title: Create, Modify and Analyse Phylogenetic Trees
Version: 2.1.0.9003
Version: 2.1.0.9004
Authors@R: c(
person("Martin R.", 'Smith', role = c("aut", "cre", "cph"),
email = "martin.smith@durham.ac.uk",
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# TreeTools 2.1.0.9004 (2026-03-12) #

- Rewrite popcount calculation for more efficient `TipsInSplits()`.

# TreeTools 2.1.0.9003 (2026-03-09) #

- `inst/include/TreeTools/tree_number.h` added to support downstream packages
Expand Down
6 changes: 6 additions & 0 deletions benchmark/_compare_results.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,18 @@ for (pr_file in pr_files) {
main_iqr)

threshold_percent <- 6 # Changes of ~5% are frequent
# Sub-millisecond benchmarks are dominated by system jitter on CI runners;
# require an absolute difference floor before flagging.
min_meaningful_diff <- 2e-4 # 0.2 ms (times are in seconds)
abs_diff <- abs(median_pr - median_main)

is_faster <- matched &&
abs_diff > min_meaningful_diff &&
median_pr < median_main - 2 * mad_main &&
median_pr < noise_range[[1]]

is_slower <- matched &&
abs_diff > min_meaningful_diff &&
median_pr > median_main + 2 * mad_main &&
median_pr > noise_range[[2]]

Expand Down
7 changes: 4 additions & 3 deletions benchmark/_init.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
library("TreeTools")

Benchmark <- function(..., min_iterations = NULL) {
result <- bench::mark(..., min_iterations = min_iterations %||% 3,
time_unit = "us")
Benchmark <- function(..., min_iterations = NULL, min_time = NULL) {
args <- list(..., min_iterations = min_iterations %||% 3, time_unit = "us")
if (!is.null(min_time)) args[["min_time"]] <- min_time
result <- do.call(bench::mark, args)
if (interactive()) {
print(result)
} else {
Expand Down
7 changes: 4 additions & 3 deletions benchmark/bench-PathLengths.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ source("benchmark/_init.R") # sets seed

tr80 <- rtree(80)

Benchmark(TreeTools:::path_lengths(tr80$edge, tr80$edge.length, FALSE))
Benchmark(PathLengths(tr80, full = TRUE))
Benchmark(TreeTools:::path_lengths(tr80$edge, tr80$edge.length, FALSE),
min_time = 2)
Benchmark(PathLengths(tr80, full = TRUE), min_time = 2)

tr80Unif <- tr80
tr80Unif[["edge.length"]] <- NULL
Benchmark(PathLengths(tr80Unif, full = TRUE))
Benchmark(PathLengths(tr80Unif, full = TRUE), min_time = 2)

tr2000 <- rtree(2000)
Benchmark(PathLengths(tr2000, full = TRUE))
29 changes: 13 additions & 16 deletions inst/include/TreeTools/SplitList.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,26 +38,25 @@ namespace TreeTools {
return T(1) << bit_pos;
}

#if __cplusplus >= 202002L
#include <bit> // C++20 header for std::popcount
// Hardware POPCNT: available on all x86-64 since 2008 (Nehalem / Barcelona).
// Inline asm emits the instruction directly, without requiring -mpopcnt.
#if (defined(__GNUC__) || defined(__clang__)) && defined(__x86_64__)
inline int32 count_bits(splitbit x) {
return static_cast<int32>(std::popcount(x));
uint64_t result;
__asm__ ("popcnt %1, %0" : "=r" (result) : "r" (x));
return static_cast<int32>(result);
}
// Option 2: Fallback for C++17 and older
#else
#if defined(__GNUC__) || defined(__clang__)
// GCC and Clang support __builtin_popcountll for long long
inline int32 count_bits(splitbit x) {
return static_cast<int32>(__builtin_popcountll(x));
}
#elif defined(_MSC_VER)
#elif defined(_MSC_VER) && defined(_M_X64)
#include <intrin.h>
inline int32 count_bits(splitbit x) {
return static_cast<int32>(__popcnt64(x));
}
#elif defined(__GNUC__) || defined(__clang__)
// Non-x86 (ARM, etc.): builtin maps to efficient native instruction
inline int32 count_bits(splitbit x) {
return static_cast<int32>(__builtin_popcountll(x));
}
#else
// A slower, but safe and highly portable fallback for all other compilers
// This is a last resort if no built-in is available.
inline int32_t count_bits(splitbit x) {
int32_t count = 0;
while (x != 0) {
Expand All @@ -66,9 +65,7 @@ namespace TreeTools {
}
return count;
}
#endif // Compiler check for builtins

#endif // C++20 check
#endif

class SplitList {
public:
Expand Down
4 changes: 4 additions & 0 deletions src/tips_in_splits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ Rcpp::IntegerVector tips_in_splits(Rcpp::RawMatrix splits) {
const unsigned char* splits_i = splits.begin() + i;
for (int32 bin = 0; bin < n_bin; ++bin) {
const unsigned char* in_bin = splits_i + bin * n_split;
// __builtin_popcount on 8-bit values; the inline asm POPCNT in
// count_bits (SplitList.h) handles 64-bit splitbit values.
// For 8-bit values, __builtin_popcount is fine — it's a tiny
// fraction of runtime and not worth inline asm.
ret[i] += static_cast<int>(__builtin_popcount(*in_bin));
}
}
Expand Down
Loading