Skip to content

Commit

Permalink
NNI Engine and Grafted DAG (#377)
Browse files Browse the repository at this point in the history
* Grafted DAG: This allows modifying the SubsplitDAG with the quick
adding and removing of proposed NNIs to DAG.

* NNI Engine: This is an engine for using Nearest Neighbor Interchanges to perform a systematic search on a DAG.
  * Finds all NNIs adjacent to DAG.
  * Adds all NNIs to DAG via Graft.
  * Evaluates likelihood of NNI. (Unfinished - left to Issue #405 and #404)
  * Filter NNIs to accept or reject. (Unfinished - left to Issue #405)
  * Add accepted NNIs to DAG.
  * Remove adjacent NNIs from Graft and Add back NNIs adjacent to new Accepted NNIs. Return to Step c).

    Closes #372
  • Loading branch information
davidrich27 committed Mar 3, 2022
1 parent 6c5c4ac commit 58579dc
Show file tree
Hide file tree
Showing 23 changed files with 1,525 additions and 245 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ _modules
_sources
.RData
.Rhistory
.vscode

# Python packaging
dist/
Expand All @@ -59,3 +58,9 @@ src/CMakeLists.txt
# OS Metadata
.DS_Store
._.DS_Store

# Developer Tools
.vscode
*.code-workspace
.gdbinit

4 changes: 3 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,12 @@ add_library(bito-core SHARED
src/gp_engine.cpp
src/gp_instance.cpp
src/gp_operation.cpp
src/graft_dag.cpp
src/mersenne_twister.cpp
src/node.cpp
src/numerical_utils.cpp
src/nni_engine.cpp
src/nni_operation.cpp
src/parser.cpp
src/phylo_model.cpp
src/psp_indexer.cpp
Expand All @@ -91,7 +94,6 @@ add_library(bito-core SHARED
src/site_pattern.cpp
src/stick_breaking_transform.cpp
src/subsplit_dag.cpp
src/subsplit_dag_nni.cpp
src/substitution_model.cpp
src/taxon_name_munging.cpp
src/tidy_subsplit_dag.cpp
Expand Down
8 changes: 8 additions & 0 deletions data/four_taxon.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>x0
AAAT
>x1
AAAT
>x2
CATT
>x3
CATT
2 changes: 2 additions & 0 deletions data/four_taxon_simple_after_nni.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
(x0,(x1,(x2,x3)));
(x0,((x1,x2),x3));
1 change: 1 addition & 0 deletions data/four_taxon_simple_before_nni_1.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(x0,(x1,(x2,x3)));
1 change: 1 addition & 0 deletions data/four_taxon_simple_before_nni_2.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(x0,(x3,(x2,x1)));
1 change: 1 addition & 0 deletions data/four_taxon_simple_before_nni_2b.nwk
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
(x0,((x1,x2),x3));
6 changes: 3 additions & 3 deletions src/bitset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ class Bitset {
//
// Make a "leaf subsplit" (pairs given nonempty_clade with an empty_clade).
static Bitset LeafSubsplitOfNonemptyClade(const Bitset &nonempty_clade);
// Make a "leaf subsplit" of a given parent subsplit. The leftside clade of
// the parent subsplit must be non-empty and the rightside clade must be a singleton.
// Make a "leaf subsplit" of a given parent subsplit. The left clade of
// the parent subsplit must be non-empty and the right clade must be a singleton.
static Bitset LeafSubsplitOfParentSubsplit(const Bitset &parent_subsplit);
// Make the UCA (universal common ancestor) subsplit of the DAG root node with the
// given taxon count. Since subsplit bitsets are always big-small, the DAG root node
Expand Down Expand Up @@ -424,7 +424,7 @@ TEST_CASE("Bitset: Clades, Subsplits, PCSPs") {
CHECK_EQ(Bitset("101010").SubsplitIsLeftChildOf(Bitset("111000")), true);
// #350 commented out code
// CHECK_EQ(Bitset::SubsplitIsChildOfWhichParentClade(Bitset("111000"),
// Bitset("101010")),true);
// Bitset("101010")), true);
CHECK_EQ(Bitset("00100001").SubsplitIsRightChildOf(Bitset("11000011")), true);
// CHECK_EQ(Bitset::SubsplitIsChildOfWhichParentClade(Bitset("000111"),
// Bitset("101010")), false);
Expand Down
177 changes: 146 additions & 31 deletions src/gp_doctest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,22 @@
#include "rooted_sbn_instance.hpp"
#include "stopwatch.hpp"
#include "tidy_subsplit_dag.hpp"
#include "nni_engine.hpp"
#include "plv_handler.hpp"

using namespace GPOperations; // NOLINT

// #350 consider use of GPCSP.
// GPCSP stands for generalized PCSP-- see text.
// #350 remove all uses of GPCSP.

// Let the "venus" node be the common ancestor of mars and saturn.
enum HelloGPCSP { jupiter, mars, saturn, venus, rootsplit, root };

// *** GPInstances used for testing ***

GPInstance GPInstanceOfFiles(const std::string& fasta_path,
const std::string& newick_path) {
GPInstance inst("_ignore/mmapped_plv.data");
GPInstance GPInstanceOfFiles(
const std::string& fasta_path, const std::string& newick_path,
const std::string mmap_filepath = std::string("_ignore/mmapped_plv.data")) {
GPInstance inst(mmap_filepath);
inst.ReadFastaFile(fasta_path);
inst.ReadNewickFile(newick_path);
inst.MakeEngine();
Expand Down Expand Up @@ -829,28 +830,33 @@ TEST_CASE("GPInstance: Only add child node tests") {
CHECK_EQ(dag.GetEdgeRange(dag.GetDAGNode(11).GetBitset(), false).second, 5);
}

TEST_CASE("GPInstance: SubsplitDAG NNI (Nearest Neighbor Interchange)") {
// This test builds a DAG, tests if engine generates the same set of adjacent NNIs and
// manually created set. Then adds a node pair to DAG, and tests if engine updates
// correctly.
TEST_CASE("NNI Engine: Adjacent NNI Maintenance") {
// Simple DAG that contains a shared edge, internal leafward fork, and an internal
// rootward fork.
const std::string fasta_path = "data/six_taxon.fasta";
auto inst = GPInstanceOfFiles(fasta_path, "data/six_taxon_rooted_simple.nwk");
SubsplitDAG& dag = inst.GetDAG();
GPDAG& dag = inst.GetDAG();
GPEngine& gp_engine = *inst.GetEngine();

NNISet correct_adjacent_nnis;

auto set_of_nnis = SetOfNNIs();
auto set_of_nnis_2 = SetOfNNIs();
auto correct_set_of_nnis = SetOfNNIs();
auto nni_engine = NNIEngine(dag, gp_engine);
auto nni_engine_2 = NNIEngine(dag, gp_engine);

// Build NNI Set from current DAG state.
SyncSetOfNNIsWithDAG(set_of_nnis, dag);
// Build adjacent NNIs from current DAG state.
nni_engine.SyncAdjacentNNIsWithDAG();

// Functions for quick manual insertion/removal for Correct NNI Set.
auto InsertNNI = [&correct_set_of_nnis](Bitset parent, Bitset child) {
auto InsertNNI = [&correct_adjacent_nnis](Bitset parent, Bitset child) {
auto nni = NNIOperation(parent, child);
correct_set_of_nnis.Insert(nni);
correct_adjacent_nnis.insert(nni);
};
auto RemoveNNI = [&correct_set_of_nnis](Bitset parent, Bitset child) {
auto RemoveNNI = [&correct_adjacent_nnis](Bitset parent, Bitset child) {
auto nni = NNIOperation(parent, child);
correct_set_of_nnis.Erase(nni);
correct_adjacent_nnis.erase(nni);
};

// For images and notes describing this part of the test case, see
Expand Down Expand Up @@ -884,19 +890,19 @@ TEST_CASE("GPInstance: SubsplitDAG NNI (Nearest Neighbor Interchange)") {
Bitset::Subsplit("000100", "000001")); // (4|35)-(3|5)
InsertNNI(Bitset::Subsplit("000100", "000011"),
Bitset::Subsplit("000010", "000001")); // (3|45)-(4|5)
// Check that `BuildSetOfNNIs()` added correct set of nnis.
CHECK_EQ(set_of_nnis, correct_set_of_nnis);

// Now we add a node pair to DAG so we can check UpdateSetOfNNIsAfterAddNodePair.
// Check that `BuildNNISet()` added correct set of nnis.
auto adjacent_nnis = nni_engine.GetAdjacentNNIs();
CHECK_EQ(adjacent_nnis, correct_adjacent_nnis);

// Now we add a node pair to DAG so we can check UpdateAdjacentNNIsAfterAddNodePair.
// see https://github.com/phylovi/bito/pull/366#issuecomment-922781415
auto nni_to_add =
std::make_pair(Bitset::Subsplit("000110", "001001"),
Bitset::Subsplit("001000", "000001")); // (34|25)-(2|5)
dag.AddNodePair(nni_to_add.first, nni_to_add.second);
NNIOperation nni_to_add(Bitset::Subsplit("000110", "001001"),
Bitset::Subsplit("001000", "000001")); // (34|25)-(2|5)
dag.AddNodePair(nni_to_add);

// Update NNI.
UpdateSetOfNNIsAfterDAGAddNodePair(set_of_nnis, dag, nni_to_add.first,
nni_to_add.second);
nni_engine.UpdateAdjacentNNIsAfterDAGAddNodePair(nni_to_add);
// Add parents of parent (edge 8) to NNI Set.
InsertNNI(Bitset::Subsplit("001001", "110110"),
Bitset::Subsplit("110000", "000110")); // (25|0134)-(01|34)
Expand All @@ -909,13 +915,122 @@ TEST_CASE("GPInstance: SubsplitDAG NNI (Nearest Neighbor Interchange)") {
Bitset::Subsplit("001001", "000100")); // (4|235)-(25|3)
// No parents of child (edge 20) to add to NNI Set (see notes).
// These should not be equal, as it has not yet removed the pair just added to DAG.
CHECK_NE(set_of_nnis, correct_set_of_nnis);
CHECK_NE(nni_engine.GetAdjacentNNIs(), correct_adjacent_nnis);
// Remove NNI added to DAG from NNI Set.
RemoveNNI(nni_to_add.first, nni_to_add.second);
// Check that `UpdateSetOfNNIsAfterAddNodePair()` updated correctly.
CHECK_EQ(set_of_nnis, correct_set_of_nnis);
RemoveNNI(nni_to_add.parent_, nni_to_add.child_);
// Check that `UpdateAdjacentNNIsAfterAddNodePair()` updated correctly.
CHECK_EQ(nni_engine.GetAdjacentNNIs(), correct_adjacent_nnis);

// Build NNI Set from current DAG state from scratch.
SyncSetOfNNIsWithDAG(set_of_nnis_2, dag);
CHECK_EQ(set_of_nnis_2, correct_set_of_nnis);
nni_engine_2.SyncAdjacentNNIsWithDAG();
CHECK_EQ(nni_engine_2.GetAdjacentNNIs(), correct_adjacent_nnis);
}

// Tests DAG equality after adding different NNIs and built from different taxon
// orderings. Test described at:
// https://github.com/phylovi/bito/pull/377#issuecomment-1035410447
TEST_CASE("NNI Engine: Add NNI Test") {
// Fasta contains simple sequences for four taxa: x0,x1,x2,x3.
const std::string fasta_path = "data/four_taxon.fasta";
// dag_A_1 is a DAG that contains pair_1.
auto inst_A_1 =
GPInstanceOfFiles(fasta_path, "data/four_taxon_simple_before_nni_1.nwk",
"_ignore/mmapped_plv_A_1.data");
GPDAG& dag_A_1 = inst_A_1.GetDAG();
// dag_A_2 is a DAG that contains pair_2.
auto inst_A_2 =
GPInstanceOfFiles(fasta_path, "data/four_taxon_simple_before_nni_2.nwk",
"_ignore/mmapped_plv_A_2.data");
GPDAG& dag_A_2 = inst_A_2.GetDAG();
// dag_A_2b is a DAG that contains pair_2 with a different taxon mapping.
auto inst_A_2b =
GPInstanceOfFiles(fasta_path, "data/four_taxon_simple_before_nni_2b.nwk",
"_ignore/mmapped_plv_A_2.data");
GPDAG& dag_A_2b = inst_A_2b.GetDAG();
// dag_B is a DAG containing dag_A_1 after adding node pair_2.
auto inst_B = GPInstanceOfFiles(fasta_path, "data/four_taxon_simple_after_nni.nwk",
"_ignore/mmapped_plv_B.data");
GPDAG& dag_B = inst_B.GetDAG();
// pair_1: NNI pair missing from dag_A_1.
NNIOperation pair_1(Bitset::Subsplit("0110", "0001"), // 12|3
Bitset::Subsplit("0100", "0010")); // 1|2
// pair_2: NNI pair missing from dag_A_2.
NNIOperation pair_2(Bitset::Subsplit("0110", "0001"), // 12|3
Bitset::Subsplit("0100", "0010")); // 1|2
// pair_2b: NNI pair missing from dag_A_2b.
NNIOperation pair_2b(Bitset::Subsplit("0100", "0011"), // 1|23
Bitset::Subsplit("0010", "0001")); // 2|3
// Before adding missing NNIs, dag_A_2 variants are equal, but dag_A_1 and dag_A_2 are
// different.
CHECK_EQ(dag_A_1, dag_A_1);
CHECK_EQ(dag_A_2, dag_A_2b);
CHECK_NE(dag_A_1, dag_A_2);
// Add missing NNIs.
dag_A_1.AddNodePair(pair_1);
dag_A_2.AddNodePair(pair_2);
dag_A_2b.AddNodePair(pair_2b);
// After adding missing NNIs, all DAGs are equal to dag_B.
CHECK_EQ(dag_A_1, dag_B);
CHECK_EQ(dag_A_2, dag_B);
CHECK_EQ(dag_A_2b, dag_B);
}

// This compares DAGs after adding NNIs via AddNodePair vs GraftNodePair.
TEST_CASE("NNI Engine: AddNodePair vs GraftNodePair") {
// Access ith NNI from NNI set.
auto GetWhichNNIFromSet = [](const NNISet& nni_set, const size_t which_nni) {
auto nni_set_ptr = nni_set.begin();
for (size_t i = 0; i < which_nni; i++) {
nni_set_ptr++;
}
return *nni_set_ptr;
};
// Simple DAG that contains a shared edge, internal leafward fork, and an internal
// rootward fork.
const std::string fasta_path = "data/six_taxon.fasta";
const std::string newick_path = "data/six_taxon_rooted_simple.nwk";
// Instance that will be unaltered.
auto pre_inst = GPInstanceOfFiles(fasta_path, newick_path);
GPDAG& pre_dag = pre_inst.GetDAG();
GPEngine& pre_gp_engine = *pre_inst.GetEngine();
// Instance that is used by grafted DAG.
auto inst_to_graft = GPInstanceOfFiles(fasta_path, newick_path);
GPDAG& dag_to_graft = inst_to_graft.GetDAG();
GraftDAG graft_dag = GraftDAG(dag_to_graft);
// Find all viable NNIs for DAG.
auto nni_engine = NNIEngine(pre_dag, pre_gp_engine);
nni_engine.SyncAdjacentNNIsWithDAG();
size_t nni_count = nni_engine.GetAdjacentNNICount();
// Check DAG and Grafted DAG equal before adding any NNIs.
CHECK_MESSAGE(GraftDAG::CompareToDAG(graft_dag, pre_dag) == 0,
"GraftDAG not equal to DAG before AddNodePair.");
// Test DAGs equivalent after adding NNI individually.
for (size_t i = 0; i < nni_count; i++) {
auto nni_to_add = GetWhichNNIFromSet(nni_engine.GetAdjacentNNIs(), i);
// New temp DAG.
auto inst = GPInstanceOfFiles(fasta_path, newick_path);
GPDAG& dag = inst.GetDAG();
// Add NNI to DAG and GraftDAG and compare results.
dag.AddNodePair(nni_to_add.parent_, nni_to_add.child_);
graft_dag.AddGraftNodePair(nni_to_add.parent_, nni_to_add.child_);
CHECK_MESSAGE(GraftDAG::CompareToDAG(graft_dag, dag) == 0,
"GraftDAG not equal to DAG after adding NNIs.");
// Clear NNIs from GraftDAG and compare to initial DAG.
graft_dag.RemoveAllGrafts();
CHECK_MESSAGE(GraftDAG::CompareToDAG(graft_dag, pre_dag) == 0,
"GraftDAG not equal to initial DAG after removing all NNIs.");
}
// Test DAGs not equivalent when adding different NNIs.
{
auto nni_1 = GetWhichNNIFromSet(nni_engine.GetAdjacentNNIs(), 0);
auto nni_2 = GetWhichNNIFromSet(nni_engine.GetAdjacentNNIs(), 1);
// New temp DAG.
auto inst = GPInstanceOfFiles(fasta_path, newick_path);
GPDAG& dag = inst.GetDAG();
// Add NNI to DAG and GraftDAG and compare results.
dag.AddNodePair(nni_1.parent_, nni_1.child_);
graft_dag.AddGraftNodePair(nni_2.parent_, nni_2.child_);
CHECK_MESSAGE(GraftDAG::CompareToDAG(graft_dag, dag) != 0,
"GraftDAG is equal to DAG after adding different NNIs.");
}
}
11 changes: 11 additions & 0 deletions src/gp_instance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,3 +392,14 @@ void GPInstance::SubsplitDAGToDot(const std::string &out_path, bool show_index_l
}
out_stream.close();
}

void GPInstance::MakeNNIEngine() {
nni_engine_ = std::make_unique<NNIEngine>(dag_, *engine_);
}

NNIEngine &GPInstance::GetNNIEngine() {
Assert(nni_engine_, "GPInstance::GetNNIEngine() when nni_engine has not been made.");
return *nni_engine_;
}

StringVector GPInstance::GetTaxonNames() { return tree_collection_.TaxonNames(); }
24 changes: 17 additions & 7 deletions src/gp_instance.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "gp_engine.hpp"
#include "rooted_tree_collection.hpp"
#include "site_pattern.hpp"
#include "nni_engine.hpp"

class GPInstance {
public:
Expand Down Expand Up @@ -82,14 +83,14 @@ class GPInstance {
// Export the subsplit DAG as a DOT file.
void SubsplitDAGToDot(const std::string &out_path, bool show_index_labels = true);

private:
std::string mmap_file_path_;
Alignment alignment_;
std::unique_ptr<GPEngine> engine_;
RootedTreeCollection tree_collection_;
GPDAG dag_;
static constexpr size_t plv_count_per_node_ = 6;
// Initialize NNI Evaluation Engine.
void MakeNNIEngine();
// Get NNI Evaluation Engine.
NNIEngine &GetNNIEngine();
// Get taxon names.
StringVector GetTaxonNames();

private:
void ClearTreeCollectionAssociatedState();
void CheckSequencesLoaded() const;
void CheckTreesLoaded() const;
Expand All @@ -99,4 +100,13 @@ class GPInstance {
RootedTreeCollection TreesWithGPBranchLengthsOfTopologies(
Node::NodePtrVec &&topologies) const;
StringDoubleVector PrettyIndexedVector(EigenConstVectorXdRef v);

std::string mmap_file_path_;
Alignment alignment_;
std::unique_ptr<GPEngine> engine_;
RootedTreeCollection tree_collection_;
GPDAG dag_;
static constexpr size_t plv_count_per_node_ = 6;

std::unique_ptr<NNIEngine> nni_engine_;
};
Loading

0 comments on commit 58579dc

Please sign in to comment.