-
Notifications
You must be signed in to change notification settings - Fork 1
/
10-timing.Rmd
136 lines (114 loc) · 4.49 KB
/
10-timing.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
---
title: "3.4: Timing benchmarks"
output:
rmarkdown::html_vignette:
default: yes
bibliography: ../inst/REFERENCES.bib
csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa-old-doi-prefix.csl
vignette: >
%\VignetteIndexEntry{3.4: Timing benchmarks}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
```{r pc-specs, echo = FALSE}
if (requireNamespace('benchmarkme', quietly = TRUE)) {
cpu <- benchmarkme::get_cpu()
ram <- benchmarkme::get_ram()
} else {
cpu <- list(model_name = "UNKNOWN", no_of_cores = "UNKNOWN")
ram <- "UNKNOWN"
}
```
```{r setup, echo=FALSE, message=FALSE}
origPar <- par()
# Note: For maximum speed, check compiler options in ~/.R/Makevars when building
# the package. Suggest setting PKG_CXXFLAGS = -Ofast -march=native -mtune=native
library('TreeDist')
path.dist <- phangorn::path.dist
SPR.dist <- phangorn::SPR.dist
TBRDist <- TBRDist::TBRDist
RF <- TreeDist::RobinsonFoulds
nTrees <- 45L # For c. 1000 comparisons
nRepeats <- 10L
nPairs <- nTrees * (nTrees - 1) / 2
SPRWalk <- function (nTip) {
set.seed(0L)
sprWalk <- vector('list', nTrees)
sprWalk[[1]] <- lastTree <- TreeTools::PectinateTree(nTip)
for (i in seq_len(nTrees)[-1]) {
sprWalk[[i]] <- lastTree <- TreeSearch::SPR(lastTree)
}
trees <- lapply(sprWalk, TreeTools::Preorder)
trees <- lapply(trees, TreeTools::Cladewise)
class(trees) <- 'multiPhylo'
trees
}
Times <- function (trees) {
splits <- TreeTools::as.Splits(trees)
timings <- microbenchmark::microbenchmark(
pid = SharedPhylogeneticInfo(splits, normalize = FALSE),
msid = MatchingSplitInfo(splits, normalize = FALSE),
cid = MutualClusteringInfo(splits, normalize = FALSE),
nye = NyeSimilarity(splits, normalize = FALSE),
jnc2 = JaccardRobinsonFoulds(splits, k = 2L, similarity = TRUE, allowConflict = FALSE, normalize = FALSE),
jnc4 = JaccardRobinsonFoulds(splits, k = 4L, similarity = TRUE, allowConflict = FALSE, normalize = FALSE),
jco2 = JaccardRobinsonFoulds(splits, k = 2L, similarity = TRUE, allowConflict = TRUE, normalize = FALSE),
jco4 = JaccardRobinsonFoulds(splits, k = 4L, similarity = TRUE, allowConflict = TRUE, normalize = FALSE),
ms = MatchingSplitDistance(splits),
qd = Quartet::ManyToManyQuartetAgreement(trees),
mast = TreeTools::PairwiseDistances(trees, MASTSize, rooted = FALSE),
nni = TreeTools::PairwiseDistances(trees, NNIDist, 7L),
spr = SPR.dist(trees),
tbr = TBRDist(trees, exact = FALSE),
rf = RF(trees),
icrf = InfoRobinsonFoulds(splits),
path = path.dist(trees),
kc = KendallColijn(trees),
es = KendallColijn(trees, Vector = SplitVector),
times = nRepeats
)
}
Plot <- function (timings, nTip) {
par(cex = 0.8)
boxplot(timings, border = TreeDistData::TreeDistCol(
as.character(summary(timings)[, 'expr'])),
xlab = 'Method', ylab = 'Total time elapsed / ms',
main = paste(nTip, 'leaves'))
}
```
The time taken to compare all `r nPairs` pairs of trees by each method
was calculated, replicating timing `r nRepeats` times.
The below are the results of a benchmarking exercise conducted on an
`r cpu$model_name` machine with
<!--`r cpu$no_of_cores` cores and-->
`r as.character(benchmarkme:::print.ram(ram))` of RAM,
comparing `r nTrees` trees obtained by performing successive subtree pruning and
regrafting rearrangements on a pectinate starting tree.
The values reported in Smith [-@Smith2020] were calculated on a modest desktop computer,
and will differ from those reported here, which will have been generated on
the hardware system used to render the documentation.
## Figure
```{r timings-20, echo = FALSE, output = 'asis', fig.width = 8, fig.height = 4}
times20 <- Times(SPRWalk(20))
Plot(times20, 20)
```
```{r timings-50, echo = FALSE, output = 'asis', fig.width = 8, fig.height = 4}
times50 <- Times(SPRWalk(50))
Plot(times50, 50)
```
## Tabulation
```{r timings-table, echo = FALSE}
results <- summary(times20, unit = 'us')
rownames(results) <- TreeDistData::tdMdAbbrevs[as.character(results[, 'expr'])]
results <- cbind(results[, 'mean', drop = FALSE],
summary(times50, unit = 'us')[, 'mean', drop = FALSE])
colnames(results) <- paste(c(20, 50), 'leaves')
twoSF <- formatC(as.matrix(signif(results[order(results[, 1]), ] / nPairs, 2)),
format = 'fg', digits = 2L)
TreeDistData::.TDDTable(DT::datatable, twoSF,
caption = "Mean time per comparison / µs")
```
```{r echo=FALSE}
suppressWarnings(par(origPar))
```
## Reference