From df69c2e350b84783d2213788026255838874e820 Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Wed, 16 Aug 2023 16:10:01 -0700 Subject: [PATCH 1/2] Export clock rate covariance matrix and std dev Exports the clock rate covariance matrix and std dev from the phylogenetic regression performed by TreeTime in augur refine. This information was already available in a TreeTime data structure and it is useful to users who do not know the std dev of their pathogen's clock rate already. Adds functional tests for behavior when no clock rate is provided by the user and when one is. In the latter case, we shouldn't have a covariance matrix. --- augur/refine.py | 6 ++++ tests/functional/refine/cram/timetree.t | 20 +++++++++++++ .../cram/timetree_with_fixed_clock_rate.t | 29 +++++++++++++++++++ 3 files changed, 55 insertions(+) create mode 100644 tests/functional/refine/cram/timetree_with_fixed_clock_rate.t diff --git a/augur/refine.py b/augur/refine.py index f5fcc2a17..ea0cf2eb3 100644 --- a/augur/refine.py +++ b/augur/refine.py @@ -259,6 +259,12 @@ def run(args): node_data['clock'] = {'rate': tt.date2dist.clock_rate, 'intercept': tt.date2dist.intercept, 'rtt_Tmrca': -tt.date2dist.intercept/tt.date2dist.clock_rate} + # Include the standard deviation of the clock rate, if the covariance + # matrix is available. + if hasattr(tt.date2dist, "cov") and tt.date2dist.cov is not None: + node_data["clock"]["cov"] = tt.date2dist.cov + node_data["clock"]["rate_std"] = np.sqrt(tt.date2dist.cov[0, 0]) + if args.coalescent=='skyline': try: skyline, conf = tt.merger_model.skyline_inferred(gen=args.gen_per_year, confidence=2) diff --git a/tests/functional/refine/cram/timetree.t b/tests/functional/refine/cram/timetree.t index 52fb8afbc..f59c35313 100644 --- a/tests/functional/refine/cram/timetree.t +++ b/tests/functional/refine/cram/timetree.t @@ -21,3 +21,23 @@ Confirm that TreeTime trees match expected topology and branch lengths. $ python3 "$TESTDIR/../../../../scripts/diff_trees.py" "$TESTDIR/../data/tree.nwk" tree.nwk --significant-digits 2 {} + +Confirm that JSON output includes details about the clock rate. + + $ grep -A 15 '\"clock\"' branch_lengths.json + "clock": { + "cov": [ + [ + .*, (re) + .* (re) + ], + [ + .*, (re) + .* (re) + ] + ], + "intercept": .*, (re) + "rate": .*, (re) + "rate_std": .*, (re) + "rtt_Tmrca": .* (re) + }, diff --git a/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t b/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t new file mode 100644 index 000000000..90f7461a0 --- /dev/null +++ b/tests/functional/refine/cram/timetree_with_fixed_clock_rate.t @@ -0,0 +1,29 @@ +Setup + + $ source "$TESTDIR"/_setup.sh + +Try building a time tree with a fixed clock rate and clock std dev. + + $ ${AUGUR} refine \ + > --tree "$TESTDIR/../data/tree_raw.nwk" \ + > --alignment "$TESTDIR/../data/aligned.fasta" \ + > --metadata "$TESTDIR/../data/metadata.tsv" \ + > --output-tree tree.nwk \ + > --output-node-data branch_lengths.json \ + > --timetree \ + > --coalescent opt \ + > --date-confidence \ + > --date-inference marginal \ + > --clock-rate 0.0012 \ + > --clock-std-dev 0.0002 \ + > --clock-filter-iqd 4 \ + > --seed 314159 &> /dev/null + +Confirm that JSON output does not include information about the clock rate std dev, since it was provided by the user. + + $ grep -A 4 '\"clock\"' branch_lengths.json + "clock": { + "intercept": .*, (re) + "rate": .*, (re) + "rtt_Tmrca": .* (re) + }, From c28535be2eb29ac7212272b0de2af2dc7bf14b3a Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Thu, 24 Aug 2023 13:20:07 -0700 Subject: [PATCH 2/2] Update changelog --- CHANGES.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 3bc599cfa..075e8486b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,11 +2,16 @@ ## __NEXT__ +### Features + +* refine: Export covariance matrix and standard deviation for clock rate regression in the node data JSON output when these values are calculated by TreeTime. These new values appear in the `clock` data structure of the JSON output as `cov` and `rate_std` keys, respectively. [#1284][] (@huddlej) + ### Bug fixes * distance: Improve documentation by describing how gaps get treated as indels and how users can ignore specific characters in distance calculations. [#1285][] (@huddlej) * Fix help output compatibility with non-Unicode streams. [#1290][] (@victorlin) +[#1284]: https://github.com/nextstrain/augur/pull/1284 [#1285]: https://github.com/nextstrain/augur/pull/1285 [#1290]: https://github.com/nextstrain/augur/pull/1290