From c4b223ebdccfae9c063905ee5495b0badbfd44fb Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Thu, 8 Jul 2021 13:36:43 +0000 Subject: [PATCH 1/7] (ml5717) Initial draft implementation of tskit tree integration --- Cargo.lock | 184 ++++++++++++++++++- Cargo.toml | 2 + necsim/plugins/tskit/Cargo.toml | 22 +++ necsim/plugins/tskit/src/lib.rs | 6 + necsim/plugins/tskit/src/provenance.rs | 121 ++++++++++++ necsim/plugins/tskit/src/reporter.rs | 245 +++++++++++++++++++++++++ 6 files changed, 574 insertions(+), 6 deletions(-) create mode 100644 necsim/plugins/tskit/Cargo.toml create mode 100644 necsim/plugins/tskit/src/lib.rs create mode 100644 necsim/plugins/tskit/src/provenance.rs create mode 100644 necsim/plugins/tskit/src/reporter.rs diff --git a/Cargo.lock b/Cargo.lock index 30be93579..2adc27bbd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,7 +91,7 @@ dependencies = [ "cfg-if 0.1.10", "clang-sys", "clap", - "env_logger", + "env_logger 0.7.1", "lazy_static", "lazycell", "log", @@ -100,7 +100,30 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex", + "shlex 0.1.1", + "which", +] + +[[package]] +name = "bindgen" +version = "0.58.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f8523b410d7187a43085e7e064416ea32ded16bd0a4e6fc025e21616d01258f" +dependencies = [ + "bitflags", + "cexpr", + "clang-sys", + "clap", + "env_logger 0.8.4", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex 1.0.0", "which", ] @@ -138,6 +161,9 @@ name = "cc" version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" +dependencies = [ + "jobserver", +] [[package]] name = "cexpr" @@ -160,6 +186,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + [[package]] name = "clang-sys" version = "1.3.0" @@ -414,7 +453,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" dependencies = [ "atty", - "humantime", + "humantime 1.3.0", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime 2.1.0", "log", "regex", "termcolor", @@ -446,6 +498,16 @@ name = "final" version = "0.1.0" source = "git+https://github.com/MomoLangenstein/final?rev=a2dbfd6#a2dbfd623863cc6b5963021ec11373d35da42945" +[[package]] +name = "findshlibs" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37affc18a9c7cf90b6cf6a7700dab60439a8f138ac5ebc5f12b98281d8f687c9" +dependencies = [ + "lazy_static", + "libc", +] + [[package]] name = "flate2" version = "1.0.22" @@ -526,12 +588,33 @@ dependencies = [ "quick-error", ] +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "ident_case" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + [[package]] name = "jpeg-decoder" version = "0.1.22" @@ -645,7 +728,7 @@ name = "mpi-sys" version = "0.2.0" source = "git+https://github.com/rsmpi/rsmpi?rev=e9b1844#e9b18441bb967862f46f853b4d5ffda8f5c55b33" dependencies = [ - "bindgen", + "bindgen 0.55.1", "build-probe-mpi 0.1.1 (git+https://github.com/rsmpi/rsmpi?rev=e9b1844)", "cc", ] @@ -831,6 +914,20 @@ dependencies = [ "serde", ] +[[package]] +name = "necsim-plugins-tskit" +version = "0.1.0" +dependencies = [ + "findshlibs", + "necsim-core", + "necsim-core-bond", + "necsim-plugins-core", + "serde", + "serde_json", + "tskit", + "uname", +] + [[package]] name = "nom" version = "5.1.2" @@ -850,6 +947,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.14" @@ -1298,6 +1405,12 @@ dependencies = [ "tiff", ] +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + [[package]] name = "scopeguard" version = "1.1.0" @@ -1347,6 +1460,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "serde_path_to_error" version = "0.1.5" @@ -1371,6 +1495,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" +[[package]] +name = "shlex" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" + [[package]] name = "slab" version = "0.4.5" @@ -1383,6 +1513,12 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" +[[package]] +name = "streaming-iterator" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "303235c177994a476226b80d076bd333b7b560fb05bd242a10609d11b07f81f5" + [[package]] name = "strsim" version = "0.8.0" @@ -1494,6 +1630,17 @@ dependencies = [ "weezl", ] +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + [[package]] name = "tiny-keccak" version = "2.0.2" @@ -1512,6 +1659,31 @@ dependencies = [ "serde", ] +[[package]] +name = "tskit" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e651630d438d19b70a51e4be845f085825c4d17194714a13ce1795732b75c02d" +dependencies = [ + "bindgen 0.58.1", + "bitflags", + "cc", + "chrono", + "libc", + "pkg-config", + "streaming-iterator", + "thiserror", +] + +[[package]] +name = "uname" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b72f89f0ca32e4db1c04e2a72f5345d59796d4866a1ee0609084569f73683dc8" +dependencies = [ + "libc", +] + [[package]] name = "unicode-segmentation" version = "1.8.0" @@ -1550,9 +1722,9 @@ checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "weezl" diff --git a/Cargo.toml b/Cargo.toml index 2d0efe821..8c9080cb7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "necsim/plugins/metacommunity", "necsim/plugins/statistics", "necsim/plugins/species", + "necsim/plugins/tskit", "necsim/partitioning/core", "necsim/partitioning/monolithic", @@ -39,6 +40,7 @@ default-members = [ "necsim/plugins/metacommunity", "necsim/plugins/statistics", "necsim/plugins/species", + "necsim/plugins/tskit", ] [profile.release] diff --git a/necsim/plugins/tskit/Cargo.toml b/necsim/plugins/tskit/Cargo.toml new file mode 100644 index 000000000..7b4b11afa --- /dev/null +++ b/necsim/plugins/tskit/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "necsim-plugins-tskit" +version = "0.1.0" +authors = ["Momo Langenstein "] +license = "MIT OR Apache-2.0" +edition = "2018" + +[lib] +crate-type = ["cdylib"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +necsim-core = { path = "../../core" } +necsim-core-bond = { path = "../../core/bond" } +necsim-plugins-core = { path = "../core", features = ["export"] } + +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0.64" +tskit = { version = "0.3.0", features = ["provenance"] } +uname = "0.1.1" +findshlibs = "0.8.0" diff --git a/necsim/plugins/tskit/src/lib.rs b/necsim/plugins/tskit/src/lib.rs new file mode 100644 index 000000000..2c043f06b --- /dev/null +++ b/necsim/plugins/tskit/src/lib.rs @@ -0,0 +1,6 @@ +#![deny(clippy::pedantic)] + +mod provenance; +mod reporter; + +necsim_plugins_core::export_plugin!(Tree => reporter::TskitTreeReporter); diff --git a/necsim/plugins/tskit/src/provenance.rs b/necsim/plugins/tskit/src/provenance.rs new file mode 100644 index 000000000..7671e5e26 --- /dev/null +++ b/necsim/plugins/tskit/src/provenance.rs @@ -0,0 +1,121 @@ +use std::{collections::HashMap, io}; + +use findshlibs::{SharedLibrary, TargetSharedLibrary}; +use serde::Serialize; + +#[allow(clippy::module_name_repetitions)] +#[derive(Serialize)] +pub struct TskitProvenance { + schema_version: String, + software: TskitProvenanceSoftware, + parameters: TskitProvenanceParameters, + environment: TskitProvenanceEnvironment, +} + +impl TskitProvenance { + pub fn try_new() -> io::Result { + Ok(Self { + schema_version: "1.0.0".to_owned(), + software: TskitProvenanceSoftware::try_new()?, + parameters: TskitProvenanceParameters::new(), + environment: TskitProvenanceEnvironment::try_new()?, + }) + } +} + +#[derive(Serialize)] +struct TskitProvenanceSoftware { + name: String, + version: String, +} + +impl TskitProvenanceSoftware { + pub fn try_new() -> io::Result { + let executable = std::env::current_exe()?.canonicalize()?; + + let output = std::process::Command::new(&executable).arg("-V").output()?; + + let version_str = String::from_utf8_lossy(&output.stdout); + let mut version = version_str.split_whitespace(); + + Ok(Self { + name: version + .next() + .map_or_else(|| executable.to_string_lossy().into_owned(), str::to_owned), + version: version + .next() + .map_or_else(|| "???".to_owned(), str::to_owned), + }) + } +} + +#[derive(Serialize)] +struct TskitProvenanceParameters { + args: Vec, +} + +impl TskitProvenanceParameters { + pub fn new() -> Self { + Self { + args: std::env::args().collect(), + } + } +} + +#[derive(Serialize)] +struct TskitProvenanceEnvironment { + os: TskitProvenanceEnvironmentOs, + #[allow(clippy::zero_sized_map_values)] + libraries: HashMap, +} + +impl TskitProvenanceEnvironment { + pub fn try_new() -> io::Result { + #[allow(clippy::zero_sized_map_values)] + let mut libraries = HashMap::new(); + + TargetSharedLibrary::each(|lib| { + if let Ok(library) = TskitProvenanceEnvironmentLibrary::try_new(lib.name()) { + libraries.insert(lib.name().to_string_lossy().into_owned(), library); + } + }); + + Ok(Self { + os: TskitProvenanceEnvironmentOs::try_new()?, + libraries, + }) + } +} + +#[derive(Serialize)] +struct TskitProvenanceEnvironmentOs { + system: String, + node: String, + release: String, + version: String, + machine: String, +} + +impl TskitProvenanceEnvironmentOs { + pub fn try_new() -> io::Result { + let uname = uname::uname()?; + + Ok(Self { + system: uname.sysname, + node: uname.nodename, + release: uname.release, + version: uname.version, + machine: uname.machine, + }) + } +} + +#[derive(Serialize)] +struct TskitProvenanceEnvironmentLibrary {} + +impl TskitProvenanceEnvironmentLibrary { + #[allow(clippy::unnecessary_wraps)] + pub fn try_new(_library: &std::ffi::OsStr) -> io::Result { + Ok(Self {}) + } +} diff --git a/necsim/plugins/tskit/src/reporter.rs b/necsim/plugins/tskit/src/reporter.rs new file mode 100644 index 000000000..9d1b838a7 --- /dev/null +++ b/necsim/plugins/tskit/src/reporter.rs @@ -0,0 +1,245 @@ +use std::{ + collections::{hash_map::Entry, HashMap, VecDeque}, + convert::TryFrom, + fmt, + fs::OpenOptions, + io, +}; + +use serde::{Deserialize, Serialize, Serializer}; +use tskit::{ + provenance::Provenance, TableCollection, TableOutputOptions, TableSortOptions, + TreeSequenceFlags, +}; + +use necsim_core::{ + event::{DispersalEvent, SpeciationEvent}, + impl_finalise, impl_report, + landscape::IndexedLocation, + lineage::GlobalLineageReference, + reporter::Reporter, +}; +use necsim_core_bond::NonNegativeF64; + +const TSK_SEQUENCE_MIN: f64 = 0.0_f64; +const TSK_SEQUENCE_MAX: f64 = 1.0_f64; + +#[allow(clippy::module_name_repetitions)] +#[derive(Deserialize)] +#[serde(try_from = "TskitTreeReporterArgs")] +pub struct TskitTreeReporter { + last_parent_prior_time: Option<(GlobalLineageReference, NonNegativeF64)>, + last_speciation_event: Option, + last_dispersal_event: Option, + + origins: HashMap, + children: HashMap>, + + table: TableCollection, + + output: String, +} + +impl Serialize for TskitTreeReporter { + fn serialize(&self, serializer: S) -> Result { + TskitTreeReporterArgs { + output: self.output.clone(), + } + .serialize(serializer) + } +} + +impl fmt::Debug for TskitTreeReporter { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("TskitTreeReporter") + .field("output", &self.output) + .finish() + } +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +#[serde(rename = "TskitTreeReporter")] +struct TskitTreeReporterArgs { + output: String, +} + +impl TryFrom for TskitTreeReporter { + type Error = io::Error; + + fn try_from(args: TskitTreeReporterArgs) -> Result { + // Preliminary argument parsing check if the output is a writable file + let file = OpenOptions::new() + .create(true) + .append(true) + .open(&args.output)?; + std::mem::drop(file); + + let table = TableCollection::new(TSK_SEQUENCE_MAX) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?; + + Ok(Self { + last_parent_prior_time: None, + last_speciation_event: None, + last_dispersal_event: None, + + origins: HashMap::new(), + children: HashMap::new(), + + table, + + output: args.output, + }) + } +} + +impl Reporter for TskitTreeReporter { + impl_report!(speciation(&mut self, speciation: Used) { + if speciation.prior_time == 0.0_f64 { + self.store_individual_origin(&speciation.global_lineage_reference, &speciation.origin); + } + + if Some(speciation) == self.last_speciation_event.as_ref() { + if let Some((parent, prior_time)) = &self.last_parent_prior_time { + if prior_time != &speciation.prior_time { + let parent = parent.clone(); + self.store_individual_coalescence(&speciation.global_lineage_reference, parent, speciation.prior_time.get()); + } + } + } else { + self.store_individual_speciation(&speciation.global_lineage_reference, speciation.event_time.get()); + } + + self.last_speciation_event = Some(speciation.clone()); + self.last_parent_prior_time = Some( + (speciation.global_lineage_reference.clone(), speciation.prior_time) + ); + }); + + impl_report!(dispersal(&mut self, dispersal: Used) { + if dispersal.prior_time == 0.0_f64 { + self.store_individual_origin(&dispersal.global_lineage_reference, &dispersal.origin); + } + + if Some(dispersal) == self.last_dispersal_event.as_ref() { + if let Some((parent, prior_time)) = &self.last_parent_prior_time { + if prior_time != &dispersal.prior_time { + let parent = parent.clone(); + self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.prior_time.get()); + } + } + } else if let Some(parent) = dispersal.interaction.parent() { + self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.event_time.get()); + } + + self.last_dispersal_event = Some(dispersal.clone()); + self.last_parent_prior_time = Some( + (dispersal.global_lineage_reference.clone(), dispersal.prior_time) + ); + }); + + impl_report!(progress(&mut self, _progress: Ignored) {}); + + impl_finalise!((mut self) { + self.table.full_sort(TableSortOptions::NONE).unwrap(); + + self.table.tree_sequence(TreeSequenceFlags::BUILD_INDEXES).unwrap().dump(&self.output, TableOutputOptions::NONE).unwrap(); + }); + + fn initialise(&mut self) -> Result<(), String> { + let provenance = + crate::provenance::TskitProvenance::try_new().map_err(|err| err.to_string())?; + let provenance_json = serde_json::to_string(&provenance).map_err(|err| err.to_string())?; + + self.table + .add_provenance(&provenance_json) + .map_err(|err| err.to_string()) + .map(|_| ()) + } +} + +impl crate::reporter::TskitTreeReporter { + fn store_individual_origin( + &mut self, + reference: &GlobalLineageReference, + location: &IndexedLocation, + ) { + self.origins.insert(reference.clone(), location.clone()); + } + + fn store_individual_speciation(&mut self, reference: &GlobalLineageReference, time: f64) { + let parent_id = if let Some(origin) = self.origins.remove(reference) { + self.table + .add_individual( + 0_u32, + &[ + f64::from(origin.location().x()), + f64::from(origin.location().y()), + f64::from(origin.index()), + ], + &[], + ) + .unwrap() + } else { + return; + }; + + let parent_node_id = self + .table + .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, parent_id) + .unwrap(); + + let mut stack = VecDeque::from(vec![(reference.clone(), parent_id, parent_node_id)]); + + while let Some((parent, parent_id, parent_node_id)) = stack.pop_front() { + if let Some(children) = self.children.remove(&parent) { + for (child, time) in children { + if let Some(origin) = self.origins.remove(&child) { + let child_id = self + .table + .add_individual( + 0_u32, + &[ + f64::from(origin.location().x()), + f64::from(origin.location().y()), + f64::from(origin.index()), + ], + &[parent_id], + ) + .unwrap(); + + let child_node_id = self + .table + .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, child_id) + .unwrap(); + + self.table + .add_edge( + TSK_SEQUENCE_MIN, + TSK_SEQUENCE_MAX, + parent_node_id, + child_node_id, + ) + .unwrap(); + + stack.push_back((child, child_id, child_node_id)); + } + } + } + } + } + + fn store_individual_coalescence( + &mut self, + child: &GlobalLineageReference, + parent: GlobalLineageReference, + time: f64, + ) { + match self.children.entry(parent) { + Entry::Occupied(mut entry) => entry.get_mut().push((child.clone(), time)), + Entry::Vacant(entry) => { + entry.insert(vec![(child.clone(), time)]); + }, + } + } +} From d9f220cadaf74ecff532a2a0f11e01d144e68595 Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Thu, 8 Jul 2021 14:21:28 +0000 Subject: [PATCH 2/7] (ml5717) Added some documenting comments --- necsim/plugins/tskit/src/lib.rs | 1 + necsim/plugins/tskit/src/provenance.rs | 5 +++++ necsim/plugins/tskit/src/reporter.rs | 11 +++++++++++ 3 files changed, 17 insertions(+) diff --git a/necsim/plugins/tskit/src/lib.rs b/necsim/plugins/tskit/src/lib.rs index 2c043f06b..2790feec2 100644 --- a/necsim/plugins/tskit/src/lib.rs +++ b/necsim/plugins/tskit/src/lib.rs @@ -3,4 +3,5 @@ mod provenance; mod reporter; +// Register the reporter plugin necsim_plugins_core::export_plugin!(Tree => reporter::TskitTreeReporter); diff --git a/necsim/plugins/tskit/src/provenance.rs b/necsim/plugins/tskit/src/provenance.rs index 7671e5e26..1fc1b02ff 100644 --- a/necsim/plugins/tskit/src/provenance.rs +++ b/necsim/plugins/tskit/src/provenance.rs @@ -3,6 +3,7 @@ use std::{collections::HashMap, io}; use findshlibs::{SharedLibrary, TargetSharedLibrary}; use serde::Serialize; +/// tskit's provenance JSON schema format root for version 1.0.0 #[allow(clippy::module_name_repetitions)] #[derive(Serialize)] pub struct TskitProvenance { @@ -38,6 +39,7 @@ impl TskitProvenanceSoftware { let version_str = String::from_utf8_lossy(&output.stdout); let mut version = version_str.split_whitespace(); + // Split a version string such as 'man 2.9.1' into 'man' and '2.9.1' Ok(Self { name: version .next() @@ -74,6 +76,7 @@ impl TskitProvenanceEnvironment { #[allow(clippy::zero_sized_map_values)] let mut libraries = HashMap::new(); + // Create a map of all dynamically loaded libraries TargetSharedLibrary::each(|lib| { if let Ok(library) = TskitProvenanceEnvironmentLibrary::try_new(lib.name()) { libraries.insert(lib.name().to_string_lossy().into_owned(), library); @@ -116,6 +119,8 @@ struct TskitProvenanceEnvironmentLibrary {} impl TskitProvenanceEnvironmentLibrary { #[allow(clippy::unnecessary_wraps)] pub fn try_new(_library: &std::ffi::OsStr) -> io::Result { + // TODO: Future work might deduce version information etc. + Ok(Self {}) } } diff --git a/necsim/plugins/tskit/src/reporter.rs b/necsim/plugins/tskit/src/reporter.rs index 9d1b838a7..474e2d881 100644 --- a/necsim/plugins/tskit/src/reporter.rs +++ b/necsim/plugins/tskit/src/reporter.rs @@ -21,6 +21,7 @@ use necsim_core::{ }; use necsim_core_bond::NonNegativeF64; +// An arbitrary genome sequence interval const TSK_SEQUENCE_MIN: f64 = 0.0_f64; const TSK_SEQUENCE_MAX: f64 = 1.0_f64; @@ -32,7 +33,9 @@ pub struct TskitTreeReporter { last_speciation_event: Option, last_dispersal_event: Option, + // Original (present-time) locations of all lineages origins: HashMap, + // Children lineages of all parents, used to create tskit individuals in order children: HashMap>, table: TableCollection, @@ -143,10 +146,12 @@ impl Reporter for TskitTreeReporter { impl_finalise!((mut self) { self.table.full_sort(TableSortOptions::NONE).unwrap(); + // Output the tree sequence to the specified `output` file self.table.tree_sequence(TreeSequenceFlags::BUILD_INDEXES).unwrap().dump(&self.output, TableOutputOptions::NONE).unwrap(); }); fn initialise(&mut self) -> Result<(), String> { + // Capture and record the provenance information inside the table let provenance = crate::provenance::TskitProvenance::try_new().map_err(|err| err.to_string())?; let provenance_json = serde_json::to_string(&provenance).map_err(|err| err.to_string())?; @@ -168,6 +173,7 @@ impl crate::reporter::TskitTreeReporter { } fn store_individual_speciation(&mut self, reference: &GlobalLineageReference, time: f64) { + // Insert the speciating parent lineage as an individual let parent_id = if let Some(origin) = self.origins.remove(reference) { self.table .add_individual( @@ -184,6 +190,7 @@ impl crate::reporter::TskitTreeReporter { return; }; + // Create the speciation node let parent_node_id = self .table .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, parent_id) @@ -191,10 +198,12 @@ impl crate::reporter::TskitTreeReporter { let mut stack = VecDeque::from(vec![(reference.clone(), parent_id, parent_node_id)]); + // Iteratively insert the parent's successors in breadth first order while let Some((parent, parent_id, parent_node_id)) = stack.pop_front() { if let Some(children) = self.children.remove(&parent) { for (child, time) in children { if let Some(origin) = self.origins.remove(&child) { + // Insert the coalesced child lineage as an individual let child_id = self .table .add_individual( @@ -208,11 +217,13 @@ impl crate::reporter::TskitTreeReporter { ) .unwrap(); + // Create the coalescence node let child_node_id = self .table .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, child_id) .unwrap(); + // Add the parent-child relation between the nodes self.table .add_edge( TSK_SEQUENCE_MIN, From c4dcea534224ea05d19937a54817e02360ef9e01 Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Thu, 8 Jul 2021 17:29:32 +0000 Subject: [PATCH 3/7] (ml5717) Some code refactoring + added lineage reference metadata --- necsim/core/src/lineage.rs | 14 ++ necsim/plugins/tskit/src/lib.rs | 4 +- necsim/plugins/tskit/src/reporter.rs | 256 ---------------------- necsim/plugins/tskit/src/tree/metadata.rs | 54 +++++ necsim/plugins/tskit/src/tree/mod.rs | 90 ++++++++ necsim/plugins/tskit/src/tree/reporter.rs | 59 +++++ necsim/plugins/tskit/src/tree/table.rs | 142 ++++++++++++ 7 files changed, 361 insertions(+), 258 deletions(-) delete mode 100644 necsim/plugins/tskit/src/reporter.rs create mode 100644 necsim/plugins/tskit/src/tree/metadata.rs create mode 100644 necsim/plugins/tskit/src/tree/mod.rs create mode 100644 necsim/plugins/tskit/src/tree/reporter.rs create mode 100644 necsim/plugins/tskit/src/tree/table.rs diff --git a/necsim/core/src/lineage.rs b/necsim/core/src/lineage.rs index 06b250484..e7c164030 100644 --- a/necsim/core/src/lineage.rs +++ b/necsim/core/src/lineage.rs @@ -19,6 +19,20 @@ use crate::{ #[repr(transparent)] pub struct GlobalLineageReference(NonZeroOneU64); +impl GlobalLineageReference { + #[doc(hidden)] + #[must_use] + pub unsafe fn into_inner(self) -> NonZeroOneU64 { + self.0 + } + + #[doc(hidden)] + #[must_use] + pub unsafe fn from_inner(inner: NonZeroOneU64) -> Self { + Self(inner) + } +} + impl fmt::Display for GlobalLineageReference { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.0.get() - 2) diff --git a/necsim/plugins/tskit/src/lib.rs b/necsim/plugins/tskit/src/lib.rs index 2790feec2..c34165e3b 100644 --- a/necsim/plugins/tskit/src/lib.rs +++ b/necsim/plugins/tskit/src/lib.rs @@ -1,7 +1,7 @@ #![deny(clippy::pedantic)] mod provenance; -mod reporter; +mod tree; // Register the reporter plugin -necsim_plugins_core::export_plugin!(Tree => reporter::TskitTreeReporter); +necsim_plugins_core::export_plugin!(Tree => tree::TskitTreeReporter); diff --git a/necsim/plugins/tskit/src/reporter.rs b/necsim/plugins/tskit/src/reporter.rs deleted file mode 100644 index 474e2d881..000000000 --- a/necsim/plugins/tskit/src/reporter.rs +++ /dev/null @@ -1,256 +0,0 @@ -use std::{ - collections::{hash_map::Entry, HashMap, VecDeque}, - convert::TryFrom, - fmt, - fs::OpenOptions, - io, -}; - -use serde::{Deserialize, Serialize, Serializer}; -use tskit::{ - provenance::Provenance, TableCollection, TableOutputOptions, TableSortOptions, - TreeSequenceFlags, -}; - -use necsim_core::{ - event::{DispersalEvent, SpeciationEvent}, - impl_finalise, impl_report, - landscape::IndexedLocation, - lineage::GlobalLineageReference, - reporter::Reporter, -}; -use necsim_core_bond::NonNegativeF64; - -// An arbitrary genome sequence interval -const TSK_SEQUENCE_MIN: f64 = 0.0_f64; -const TSK_SEQUENCE_MAX: f64 = 1.0_f64; - -#[allow(clippy::module_name_repetitions)] -#[derive(Deserialize)] -#[serde(try_from = "TskitTreeReporterArgs")] -pub struct TskitTreeReporter { - last_parent_prior_time: Option<(GlobalLineageReference, NonNegativeF64)>, - last_speciation_event: Option, - last_dispersal_event: Option, - - // Original (present-time) locations of all lineages - origins: HashMap, - // Children lineages of all parents, used to create tskit individuals in order - children: HashMap>, - - table: TableCollection, - - output: String, -} - -impl Serialize for TskitTreeReporter { - fn serialize(&self, serializer: S) -> Result { - TskitTreeReporterArgs { - output: self.output.clone(), - } - .serialize(serializer) - } -} - -impl fmt::Debug for TskitTreeReporter { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - fmt.debug_struct("TskitTreeReporter") - .field("output", &self.output) - .finish() - } -} - -#[derive(Serialize, Deserialize)] -#[serde(deny_unknown_fields)] -#[serde(rename = "TskitTreeReporter")] -struct TskitTreeReporterArgs { - output: String, -} - -impl TryFrom for TskitTreeReporter { - type Error = io::Error; - - fn try_from(args: TskitTreeReporterArgs) -> Result { - // Preliminary argument parsing check if the output is a writable file - let file = OpenOptions::new() - .create(true) - .append(true) - .open(&args.output)?; - std::mem::drop(file); - - let table = TableCollection::new(TSK_SEQUENCE_MAX) - .map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?; - - Ok(Self { - last_parent_prior_time: None, - last_speciation_event: None, - last_dispersal_event: None, - - origins: HashMap::new(), - children: HashMap::new(), - - table, - - output: args.output, - }) - } -} - -impl Reporter for TskitTreeReporter { - impl_report!(speciation(&mut self, speciation: Used) { - if speciation.prior_time == 0.0_f64 { - self.store_individual_origin(&speciation.global_lineage_reference, &speciation.origin); - } - - if Some(speciation) == self.last_speciation_event.as_ref() { - if let Some((parent, prior_time)) = &self.last_parent_prior_time { - if prior_time != &speciation.prior_time { - let parent = parent.clone(); - self.store_individual_coalescence(&speciation.global_lineage_reference, parent, speciation.prior_time.get()); - } - } - } else { - self.store_individual_speciation(&speciation.global_lineage_reference, speciation.event_time.get()); - } - - self.last_speciation_event = Some(speciation.clone()); - self.last_parent_prior_time = Some( - (speciation.global_lineage_reference.clone(), speciation.prior_time) - ); - }); - - impl_report!(dispersal(&mut self, dispersal: Used) { - if dispersal.prior_time == 0.0_f64 { - self.store_individual_origin(&dispersal.global_lineage_reference, &dispersal.origin); - } - - if Some(dispersal) == self.last_dispersal_event.as_ref() { - if let Some((parent, prior_time)) = &self.last_parent_prior_time { - if prior_time != &dispersal.prior_time { - let parent = parent.clone(); - self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.prior_time.get()); - } - } - } else if let Some(parent) = dispersal.interaction.parent() { - self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.event_time.get()); - } - - self.last_dispersal_event = Some(dispersal.clone()); - self.last_parent_prior_time = Some( - (dispersal.global_lineage_reference.clone(), dispersal.prior_time) - ); - }); - - impl_report!(progress(&mut self, _progress: Ignored) {}); - - impl_finalise!((mut self) { - self.table.full_sort(TableSortOptions::NONE).unwrap(); - - // Output the tree sequence to the specified `output` file - self.table.tree_sequence(TreeSequenceFlags::BUILD_INDEXES).unwrap().dump(&self.output, TableOutputOptions::NONE).unwrap(); - }); - - fn initialise(&mut self) -> Result<(), String> { - // Capture and record the provenance information inside the table - let provenance = - crate::provenance::TskitProvenance::try_new().map_err(|err| err.to_string())?; - let provenance_json = serde_json::to_string(&provenance).map_err(|err| err.to_string())?; - - self.table - .add_provenance(&provenance_json) - .map_err(|err| err.to_string()) - .map(|_| ()) - } -} - -impl crate::reporter::TskitTreeReporter { - fn store_individual_origin( - &mut self, - reference: &GlobalLineageReference, - location: &IndexedLocation, - ) { - self.origins.insert(reference.clone(), location.clone()); - } - - fn store_individual_speciation(&mut self, reference: &GlobalLineageReference, time: f64) { - // Insert the speciating parent lineage as an individual - let parent_id = if let Some(origin) = self.origins.remove(reference) { - self.table - .add_individual( - 0_u32, - &[ - f64::from(origin.location().x()), - f64::from(origin.location().y()), - f64::from(origin.index()), - ], - &[], - ) - .unwrap() - } else { - return; - }; - - // Create the speciation node - let parent_node_id = self - .table - .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, parent_id) - .unwrap(); - - let mut stack = VecDeque::from(vec![(reference.clone(), parent_id, parent_node_id)]); - - // Iteratively insert the parent's successors in breadth first order - while let Some((parent, parent_id, parent_node_id)) = stack.pop_front() { - if let Some(children) = self.children.remove(&parent) { - for (child, time) in children { - if let Some(origin) = self.origins.remove(&child) { - // Insert the coalesced child lineage as an individual - let child_id = self - .table - .add_individual( - 0_u32, - &[ - f64::from(origin.location().x()), - f64::from(origin.location().y()), - f64::from(origin.index()), - ], - &[parent_id], - ) - .unwrap(); - - // Create the coalescence node - let child_node_id = self - .table - .add_node(tskit::TSK_NODE_IS_SAMPLE, time, tskit::TSK_NULL, child_id) - .unwrap(); - - // Add the parent-child relation between the nodes - self.table - .add_edge( - TSK_SEQUENCE_MIN, - TSK_SEQUENCE_MAX, - parent_node_id, - child_node_id, - ) - .unwrap(); - - stack.push_back((child, child_id, child_node_id)); - } - } - } - } - } - - fn store_individual_coalescence( - &mut self, - child: &GlobalLineageReference, - parent: GlobalLineageReference, - time: f64, - ) { - match self.children.entry(parent) { - Entry::Occupied(mut entry) => entry.get_mut().push((child.clone(), time)), - Entry::Vacant(entry) => { - entry.insert(vec![(child.clone(), time)]); - }, - } - } -} diff --git a/necsim/plugins/tskit/src/tree/metadata.rs b/necsim/plugins/tskit/src/tree/metadata.rs new file mode 100644 index 000000000..2ca8b78d5 --- /dev/null +++ b/necsim/plugins/tskit/src/tree/metadata.rs @@ -0,0 +1,54 @@ +use std::{ + array::TryFromSliceError, + convert::{TryFrom, TryInto}, + io, +}; + +use necsim_core_bond::NonZeroOneU64; +use tskit::metadata::{MetadataError, MetadataRoundtrip}; + +use necsim_core::lineage::GlobalLineageReference; + +#[allow(clippy::module_name_repetitions)] +#[repr(transparent)] +pub struct GlobalLineageMetadata(GlobalLineageReference); + +impl MetadataRoundtrip for GlobalLineageMetadata { + fn encode(&self) -> Result, MetadataError> { + // Store the internal u64 without the +2 offset + Ok((unsafe { self.0.clone().into_inner() }.get() - 2) + .to_le_bytes() + .to_vec()) + } + + fn decode(metadata: &[u8]) -> Result + where + Self: Sized, + { + // Ensure that `metadata` contains exactly eight bytes + let value_bytes: [u8; 8] = metadata.try_into().map_err(|err: TryFromSliceError| { + MetadataError::RoundtripError { + value: Box::new(io::Error::new(io::ErrorKind::InvalidData, err.to_string())), + } + })?; + + // Convert the bytes into an u64 with the needed +2 offset + let value = u64::from_le_bytes(value_bytes) + 2; + + // Create the internal `NonZeroOneU64` representation of the reference + let value_inner = + NonZeroOneU64::try_from(value).map_err(|err| MetadataError::RoundtripError { + value: Box::new(io::Error::new(io::ErrorKind::InvalidData, err.to_string())), + })?; + + Ok(Self(unsafe { + GlobalLineageReference::from_inner(value_inner) + })) + } +} + +impl GlobalLineageMetadata { + pub fn new(reference: &GlobalLineageReference) -> &Self { + unsafe { &*(reference as *const GlobalLineageReference).cast() } + } +} diff --git a/necsim/plugins/tskit/src/tree/mod.rs b/necsim/plugins/tskit/src/tree/mod.rs new file mode 100644 index 000000000..57fafc88b --- /dev/null +++ b/necsim/plugins/tskit/src/tree/mod.rs @@ -0,0 +1,90 @@ +use std::{collections::HashMap, convert::TryFrom, fmt, fs::OpenOptions, io}; + +use serde::{Deserialize, Serialize, Serializer}; +use tskit::TableCollection; + +use necsim_core::{ + event::{DispersalEvent, SpeciationEvent}, + landscape::IndexedLocation, + lineage::GlobalLineageReference, +}; +use necsim_core_bond::NonNegativeF64; + +mod metadata; +mod reporter; +mod table; + +// An arbitrary genome sequence interval +const TSK_SEQUENCE_MIN: f64 = 0.0_f64; +const TSK_SEQUENCE_MAX: f64 = 1.0_f64; + +#[allow(clippy::module_name_repetitions)] +#[derive(Deserialize)] +#[serde(try_from = "TskitTreeReporterArgs")] +pub struct TskitTreeReporter { + last_parent_prior_time: Option<(GlobalLineageReference, NonNegativeF64)>, + last_speciation_event: Option, + last_dispersal_event: Option, + + // Original (present-time) locations of all lineages + origins: HashMap, + // Children lineages of all parents, used to create tskit individuals in order + children: HashMap>, + + table: TableCollection, + + output: String, +} + +impl Serialize for TskitTreeReporter { + fn serialize(&self, serializer: S) -> Result { + TskitTreeReporterArgs { + output: self.output.clone(), + } + .serialize(serializer) + } +} + +impl fmt::Debug for TskitTreeReporter { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + fmt.debug_struct("TskitTreeReporter") + .field("output", &self.output) + .finish() + } +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +#[serde(rename = "TskitTreeReporter")] +struct TskitTreeReporterArgs { + output: String, +} + +impl TryFrom for TskitTreeReporter { + type Error = io::Error; + + fn try_from(args: TskitTreeReporterArgs) -> Result { + // Preliminary argument parsing check if the output is a writable file + let file = OpenOptions::new() + .create(true) + .append(true) + .open(&args.output)?; + std::mem::drop(file); + + let table = TableCollection::new(TSK_SEQUENCE_MAX) + .map_err(|err| io::Error::new(io::ErrorKind::Other, err.to_string()))?; + + Ok(Self { + last_parent_prior_time: None, + last_speciation_event: None, + last_dispersal_event: None, + + origins: HashMap::new(), + children: HashMap::new(), + + table, + + output: args.output, + }) + } +} diff --git a/necsim/plugins/tskit/src/tree/reporter.rs b/necsim/plugins/tskit/src/tree/reporter.rs new file mode 100644 index 000000000..3f222a716 --- /dev/null +++ b/necsim/plugins/tskit/src/tree/reporter.rs @@ -0,0 +1,59 @@ +use necsim_core::{impl_finalise, impl_report, reporter::Reporter}; + +use super::TskitTreeReporter; + +impl Reporter for TskitTreeReporter { + impl_report!(speciation(&mut self, speciation: Used) { + if speciation.prior_time == 0.0_f64 { + self.store_individual_origin(&speciation.global_lineage_reference, &speciation.origin); + } + + if Some(speciation) == self.last_speciation_event.as_ref() { + if let Some((parent, prior_time)) = &self.last_parent_prior_time { + if prior_time != &speciation.prior_time { + let parent = parent.clone(); + self.store_individual_coalescence(&speciation.global_lineage_reference, parent, speciation.prior_time.get()); + } + } + } else { + self.store_individual_speciation(&speciation.global_lineage_reference, speciation.event_time.get()); + } + + self.last_speciation_event = Some(speciation.clone()); + self.last_parent_prior_time = Some( + (speciation.global_lineage_reference.clone(), speciation.prior_time) + ); + }); + + impl_report!(dispersal(&mut self, dispersal: Used) { + if dispersal.prior_time == 0.0_f64 { + self.store_individual_origin(&dispersal.global_lineage_reference, &dispersal.origin); + } + + if Some(dispersal) == self.last_dispersal_event.as_ref() { + if let Some((parent, prior_time)) = &self.last_parent_prior_time { + if prior_time != &dispersal.prior_time { + let parent = parent.clone(); + self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.prior_time.get()); + } + } + } else if let Some(parent) = &dispersal.interaction.parent() { + self.store_individual_coalescence(&dispersal.global_lineage_reference, parent.clone(), dispersal.event_time.get()); + } + + self.last_dispersal_event = Some(dispersal.clone()); + self.last_parent_prior_time = Some( + (dispersal.global_lineage_reference.clone(), dispersal.prior_time) + ); + }); + + impl_report!(progress(&mut self, _progress: Ignored) {}); + + impl_finalise!((mut self) { + self.output_tree_sequence(); + }); + + fn initialise(&mut self) -> Result<(), String> { + self.store_provenance() + } +} diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs new file mode 100644 index 000000000..dfeb9a220 --- /dev/null +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -0,0 +1,142 @@ +use std::collections::{hash_map::Entry, VecDeque}; + +use tskit::{provenance::Provenance, TableOutputOptions, TableSortOptions, TreeSequenceFlags}; + +use necsim_core::{landscape::IndexedLocation, lineage::GlobalLineageReference}; + +use super::{ + metadata::GlobalLineageMetadata, TskitTreeReporter, TSK_SEQUENCE_MAX, TSK_SEQUENCE_MIN, +}; + +impl TskitTreeReporter { + pub(super) fn store_individual_origin( + &mut self, + reference: &GlobalLineageReference, + location: &IndexedLocation, + ) { + self.origins.insert(reference.clone(), location.clone()); + } + + pub(super) fn store_individual_speciation( + &mut self, + parent: &GlobalLineageReference, + time: f64, + ) { + // Insert the speciating parent lineage as an individual + let parent_id = if let Some(origin) = self.origins.remove(parent) { + self.table + .add_individual_with_metadata( + 0_u32, + &[ + f64::from(origin.location().x()), + f64::from(origin.location().y()), + f64::from(origin.index()), + ], + &[], + Some(GlobalLineageMetadata::new(parent)), + ) + .unwrap() + } else { + return; + }; + + // Create the speciation node + let parent_node_id = self + .table + .add_node_with_metadata( + tskit::TSK_NODE_IS_SAMPLE, + time, + tskit::TSK_NULL, + parent_id, + Some(GlobalLineageMetadata::new(parent)), + ) + .unwrap(); + + let mut stack = VecDeque::from(vec![(parent.clone(), parent_id, parent_node_id)]); + + // Iteratively insert the parent's successors in breadth first order + while let Some((parent, parent_id, parent_node_id)) = stack.pop_front() { + if let Some(children) = self.children.remove(&parent) { + for (child, time) in children { + if let Some(origin) = self.origins.remove(&child) { + // Insert the coalesced child lineage as an individual + let child_id = self + .table + .add_individual_with_metadata( + 0_u32, + &[ + f64::from(origin.location().x()), + f64::from(origin.location().y()), + f64::from(origin.index()), + ], + &[parent_id], + Some(GlobalLineageMetadata::new(&child)), + ) + .unwrap(); + + // Create the coalescence node + let child_node_id = self + .table + .add_node_with_metadata( + tskit::TSK_NODE_IS_SAMPLE, + time, + tskit::TSK_NULL, + child_id, + Some(GlobalLineageMetadata::new(&child)), + ) + .unwrap(); + + // Add the parent-child relation between the nodes + self.table + .add_edge( + TSK_SEQUENCE_MIN, + TSK_SEQUENCE_MAX, + parent_node_id, + child_node_id, + ) + .unwrap(); + + stack.push_back((child, child_id, child_node_id)); + } + } + } + } + } + + pub(super) fn store_individual_coalescence( + &mut self, + child: &GlobalLineageReference, + parent: GlobalLineageReference, + time: f64, + ) { + match self.children.entry(parent) { + Entry::Occupied(mut entry) => entry.get_mut().push((child.clone(), time)), + Entry::Vacant(entry) => { + entry.insert(vec![(child.clone(), time)]); + }, + } + } + + pub(super) fn store_provenance(&mut self) -> Result<(), String> { + // Capture and record the provenance information inside the table + let provenance = + crate::provenance::TskitProvenance::try_new().map_err(|err| err.to_string())?; + let provenance_json = serde_json::to_string(&provenance).map_err(|err| err.to_string())?; + + self.table + .add_provenance(&provenance_json) + .map_err(|err| err.to_string()) + .map(|_| ()) + } + + pub(super) fn output_tree_sequence(mut self) { + self.table.full_sort(TableSortOptions::NONE).unwrap(); + + // Output the tree sequence to the specified `output` file + self.table + .tree_sequence(TreeSequenceFlags::BUILD_INDEXES) + .unwrap() + .dump(&self.output, TableOutputOptions::NONE) + .unwrap(); + } +} From 3b7e7bd1e1b67b4d4e76c0ee7a8aa22344f2369b Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Sat, 10 Jul 2021 18:22:52 +0000 Subject: [PATCH 4/7] (ml5717) Fixed tskit reporter for independent lineages --- Cargo.lock | 34 ++++ necsim/plugins/tskit/Cargo.toml | 8 + necsim/plugins/tskit/build.rs | 87 +++++++++ necsim/plugins/tskit/src/provenance.rs | 52 +++++ necsim/plugins/tskit/src/tree/mod.rs | 15 +- necsim/plugins/tskit/src/tree/reporter.rs | 8 +- necsim/plugins/tskit/src/tree/table.rs | 223 +++++++++++++--------- 7 files changed, 333 insertions(+), 94 deletions(-) create mode 100644 necsim/plugins/tskit/build.rs diff --git a/Cargo.lock b/Cargo.lock index 2adc27bbd..e789367f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -537,6 +537,28 @@ dependencies = [ "wasi", ] +[[package]] +name = "git-version" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6b0decc02f4636b9ccad390dcbe77b722a77efedfa393caf8379a51d5c61899" +dependencies = [ + "git-version-macro", + "proc-macro-hack", +] + +[[package]] +name = "git-version-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe69f1cbdb6e28af2bac214e943b99ce8a0a06b447d15d3e61161b0423139f3f" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "glob" version = "0.3.0" @@ -919,9 +941,12 @@ name = "necsim-plugins-tskit" version = "0.1.0" dependencies = [ "findshlibs", + "git-version", "necsim-core", "necsim-core-bond", "necsim-plugins-core", + "rustc_version", + "semver", "serde", "serde_json", "tskit", @@ -1037,6 +1062,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + [[package]] name = "proc-macro2" version = "1.0.32" @@ -1428,6 +1459,9 @@ name = "semver" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" +dependencies = [ + "serde", +] [[package]] name = "serde" diff --git a/necsim/plugins/tskit/Cargo.toml b/necsim/plugins/tskit/Cargo.toml index 7b4b11afa..dda55d961 100644 --- a/necsim/plugins/tskit/Cargo.toml +++ b/necsim/plugins/tskit/Cargo.toml @@ -20,3 +20,11 @@ serde_json = "1.0.64" tskit = { version = "0.3.0", features = ["provenance"] } uname = "0.1.1" findshlibs = "0.8.0" + +rustc_version = "0.4.0" +semver = { version = "1.0.3", features = ["serde"] } +git-version = "0.3.4" + +[build-dependencies] +rustc_version = "0.4.0" +semver = "1.0.3" diff --git a/necsim/plugins/tskit/build.rs b/necsim/plugins/tskit/build.rs new file mode 100644 index 000000000..2dce7a3df --- /dev/null +++ b/necsim/plugins/tskit/build.rs @@ -0,0 +1,87 @@ +use std::{env, fs::File, io::Write, path}; + +use rustc_version::Channel; + +/// Based on Sebastian Waisbrot's MIT licensed `rustc-version-runtime` crate: +/// https://github.com/seppo0010/rustc-version-runtime-rs +fn main() { + let mut path = path::PathBuf::from(env::var_os("OUT_DIR").unwrap()); + path.push("rustc_version.rs"); + let mut file = File::create(&path).unwrap(); + + writeln!( + file, + "use rustc_version::{{Channel, LlvmVersion, Version, VersionMeta}}; +use semver::{{BuildMetadata, Prerelease}};\n" + ) + .unwrap(); + let version = rustc_version::version_meta().expect("Failed to read the rustc version."); + + writeln!( + file, + "#[allow(dead_code)] +/// Returns the `rustc` `SemVer` version. +pub fn version() -> Version {{ + version_meta().semver +}} + +#[allow(dead_code)] +/// Returns the `rustc` `SemVer` version and additional metadata +/// like the git short hash and build date. +pub fn version_meta() -> VersionMeta {{ + VersionMeta {{ + semver: semver::Version {{ + major: {major}, + minor: {minor}, + patch: {patch}, + pre: Prerelease::new(\"{pre}\").unwrap(), + build: BuildMetadata::new(\"{build}\").unwrap(), + }}, + commit_hash: {commit_hash}, + commit_date: {commit_date}, + build_date: {build_date}, + channel: Channel::{channel}, + host: \"{host}\".to_owned(), + short_version_string: \"{short_version_string}\".to_owned(), + llvm_version: {llvm_version}, + }} +}}", + major = version.semver.major, + minor = version.semver.minor, + patch = version.semver.patch, + pre = version.semver.pre, + build = version.semver.build, + commit_hash = version + .commit_hash + .map(|h| format!("Some(\"{}\".to_owned())", h)) + .unwrap_or_else(|| "None".to_owned()), + commit_date = version + .commit_date + .map(|h| format!("Some(\"{}\".to_owned())", h)) + .unwrap_or_else(|| "None".to_owned()), + build_date = version + .build_date + .map(|h| format!("Some(\"{}\".to_owned())", h)) + .unwrap_or_else(|| "None".to_owned()), + channel = match version.channel { + Channel::Dev => "Dev", + Channel::Nightly => "Nightly", + Channel::Beta => "Beta", + Channel::Stable => "Stable", + }, + host = version.host, + short_version_string = version.short_version_string, + llvm_version = version + .llvm_version + .map(|h| format!( + "Some(LlvmVersion {{ + major: {major}, + minor: {minor}, + }})", + major = h.major, + minor = h.minor + )) + .unwrap_or_else(|| "None".to_owned()), + ) + .unwrap(); +} diff --git a/necsim/plugins/tskit/src/provenance.rs b/necsim/plugins/tskit/src/provenance.rs index 1fc1b02ff..bd71aafcb 100644 --- a/necsim/plugins/tskit/src/provenance.rs +++ b/necsim/plugins/tskit/src/provenance.rs @@ -28,6 +28,8 @@ impl TskitProvenance { struct TskitProvenanceSoftware { name: String, version: String, + #[serde(skip_serializing_if = "Option::is_none")] + commit: Option, } impl TskitProvenanceSoftware { @@ -47,6 +49,10 @@ impl TskitProvenanceSoftware { version: version .next() .map_or_else(|| "???".to_owned(), str::to_owned), + commit: match git_version::git_version!(fallback = "unknown") { + "unknown" => None, + version => Some(version.to_owned()), + }, }) } } @@ -69,6 +75,8 @@ struct TskitProvenanceEnvironment { os: TskitProvenanceEnvironmentOs, #[allow(clippy::zero_sized_map_values)] libraries: HashMap, + #[serde(with = "self::rustc_version::VersionMetaDef")] + rustc: ::rustc_version::VersionMeta, } impl TskitProvenanceEnvironment { @@ -86,6 +94,7 @@ impl TskitProvenanceEnvironment { Ok(Self { os: TskitProvenanceEnvironmentOs::try_new()?, libraries, + rustc: rustc_version::version_meta(), }) } } @@ -124,3 +133,46 @@ impl TskitProvenanceEnvironmentLibrary { Ok(Self {}) } } + +mod rustc_version { + include!(concat!(env!("OUT_DIR"), "/rustc_version.rs")); + + #[derive(serde::Serialize)] + #[serde(remote = "rustc_version::VersionMeta")] + pub(super) struct VersionMetaDef { + pub semver: Version, + #[serde(skip_serializing_if = "Option::is_none")] + pub commit_hash: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub commit_date: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub build_date: Option, + #[serde(serialize_with = "serialize_channel")] + pub channel: Channel, + pub host: String, + pub short_version_string: String, + #[serde(skip_serializing_if = "Option::is_none")] + #[serde(serialize_with = "serialize_llvm_version")] + pub llvm_version: Option, + } + + #[allow(clippy::trivially_copy_pass_by_ref)] + fn serialize_channel( + channel: &Channel, + serializer: S, + ) -> Result { + serializer.collect_str(&format!("{:?}", channel)) + } + + fn serialize_llvm_version( + llvm_version: &Option, + serializer: S, + ) -> Result { + #[allow(clippy::option_if_let_else)] + if let Some(llvm_version) = llvm_version { + serializer.collect_str(llvm_version) + } else { + serializer.serialize_none() + } + } +} diff --git a/necsim/plugins/tskit/src/tree/mod.rs b/necsim/plugins/tskit/src/tree/mod.rs index 57fafc88b..16eb5fc7d 100644 --- a/necsim/plugins/tskit/src/tree/mod.rs +++ b/necsim/plugins/tskit/src/tree/mod.rs @@ -18,6 +18,11 @@ mod table; const TSK_SEQUENCE_MIN: f64 = 0.0_f64; const TSK_SEQUENCE_MAX: f64 = 1.0_f64; +#[derive(Copy, Clone)] +struct TskitIndividualID(tskit::tsk_id_t); +#[derive(Copy, Clone)] +struct TskitNodeID(tskit::tsk_id_t); + #[allow(clippy::module_name_repetitions)] #[derive(Deserialize)] #[serde(try_from = "TskitTreeReporterArgs")] @@ -28,8 +33,12 @@ pub struct TskitTreeReporter { // Original (present-time) locations of all lineages origins: HashMap, - // Children lineages of all parents, used to create tskit individuals in order - children: HashMap>, + // Children lineages of a parent, used if parent is unknown at coalescence + children: HashMap>, + // Child -> Parent lineage mapping + parents: HashMap, + // Lineage to tskit mapping, used if parent is known before coalescence + tskit_ids: HashMap, table: TableCollection, @@ -81,6 +90,8 @@ impl TryFrom for TskitTreeReporter { origins: HashMap::new(), children: HashMap::new(), + parents: HashMap::new(), + tskit_ids: HashMap::new(), table, diff --git a/necsim/plugins/tskit/src/tree/reporter.rs b/necsim/plugins/tskit/src/tree/reporter.rs index 3f222a716..e5bf80a82 100644 --- a/necsim/plugins/tskit/src/tree/reporter.rs +++ b/necsim/plugins/tskit/src/tree/reporter.rs @@ -12,11 +12,11 @@ impl Reporter for TskitTreeReporter { if let Some((parent, prior_time)) = &self.last_parent_prior_time { if prior_time != &speciation.prior_time { let parent = parent.clone(); - self.store_individual_coalescence(&speciation.global_lineage_reference, parent, speciation.prior_time.get()); + self.store_individual_coalescence(&speciation.global_lineage_reference, &parent, speciation.prior_time); } } } else { - self.store_individual_speciation(&speciation.global_lineage_reference, speciation.event_time.get()); + self.store_individual_speciation(&speciation.global_lineage_reference, speciation.event_time.into()); } self.last_speciation_event = Some(speciation.clone()); @@ -34,11 +34,11 @@ impl Reporter for TskitTreeReporter { if let Some((parent, prior_time)) = &self.last_parent_prior_time { if prior_time != &dispersal.prior_time { let parent = parent.clone(); - self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.prior_time.get()); + self.store_individual_coalescence(&dispersal.global_lineage_reference, &parent, dispersal.prior_time); } } } else if let Some(parent) = &dispersal.interaction.parent() { - self.store_individual_coalescence(&dispersal.global_lineage_reference, parent.clone(), dispersal.event_time.get()); + self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.event_time.into()); } self.last_dispersal_event = Some(dispersal.clone()); diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs index dfeb9a220..2b0612c7b 100644 --- a/necsim/plugins/tskit/src/tree/table.rs +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -1,13 +1,20 @@ -use std::collections::{hash_map::Entry, VecDeque}; +use std::collections::VecDeque; -use tskit::{provenance::Provenance, TableOutputOptions, TableSortOptions, TreeSequenceFlags}; +use necsim_core_bond::NonNegativeF64; +use tskit::{ + metadata::MetadataRoundtrip, provenance::Provenance, TableOutputOptions, TableSortOptions, + TreeSequenceFlags, +}; use necsim_core::{landscape::IndexedLocation, lineage::GlobalLineageReference}; use super::{ - metadata::GlobalLineageMetadata, TskitTreeReporter, TSK_SEQUENCE_MAX, TSK_SEQUENCE_MIN, + metadata::GlobalLineageMetadata, TskitIndividualID, TskitNodeID, TskitTreeReporter, + TSK_SEQUENCE_MAX, TSK_SEQUENCE_MIN, }; +const TSK_FLAGS_EMPTY: tskit::tsk_flags_t = 0_u32; + impl TskitTreeReporter { pub(super) fn store_individual_origin( &mut self, @@ -20,100 +27,55 @@ impl TskitTreeReporter { pub(super) fn store_individual_speciation( &mut self, parent: &GlobalLineageReference, - time: f64, + time: NonNegativeF64, ) { - // Insert the speciating parent lineage as an individual - let parent_id = if let Some(origin) = self.origins.remove(parent) { - self.table - .add_individual_with_metadata( - 0_u32, - &[ - f64::from(origin.location().x()), - f64::from(origin.location().y()), - f64::from(origin.index()), - ], - &[], - Some(GlobalLineageMetadata::new(parent)), - ) - .unwrap() - } else { - return; - }; - - // Create the speciation node - let parent_node_id = self - .table - .add_node_with_metadata( - tskit::TSK_NODE_IS_SAMPLE, - time, - tskit::TSK_NULL, - parent_id, - Some(GlobalLineageMetadata::new(parent)), - ) - .unwrap(); - - let mut stack = VecDeque::from(vec![(parent.clone(), parent_id, parent_node_id)]); + // Resolve the actual parent, irrespective of duplicate individuals + let mut parent = parent; + while let Some(parent_parent) = self.parents.get(parent) { + parent = parent_parent; + } + let parent = parent.clone(); - // Iteratively insert the parent's successors in breadth first order - while let Some((parent, parent_id, parent_node_id)) = stack.pop_front() { - if let Some(children) = self.children.remove(&parent) { - for (child, time) in children { - if let Some(origin) = self.origins.remove(&child) { - // Insert the coalesced child lineage as an individual - let child_id = self - .table - .add_individual_with_metadata( - 0_u32, - &[ - f64::from(origin.location().x()), - f64::from(origin.location().y()), - f64::from(origin.index()), - ], - &[parent_id], - Some(GlobalLineageMetadata::new(&child)), - ) - .unwrap(); - - // Create the coalescence node - let child_node_id = self - .table - .add_node_with_metadata( - tskit::TSK_NODE_IS_SAMPLE, - time, - tskit::TSK_NULL, - child_id, - Some(GlobalLineageMetadata::new(&child)), - ) - .unwrap(); - - // Add the parent-child relation between the nodes - self.table - .add_edge( - TSK_SEQUENCE_MIN, - TSK_SEQUENCE_MAX, - parent_node_id, - child_node_id, - ) - .unwrap(); - - stack.push_back((child, child_id, child_node_id)); - } - } - } + // Insert the speciating parent lineage, then store its successors, too + if let Some((parent_individual, parent_node)) = self.store_lineage(&parent, time, None) { + self.store_children_of_parent(&parent, parent_individual, parent_node); } } pub(super) fn store_individual_coalescence( &mut self, child: &GlobalLineageReference, - parent: GlobalLineageReference, - time: f64, + parent: &GlobalLineageReference, + time: NonNegativeF64, ) { - match self.children.entry(parent) { - Entry::Occupied(mut entry) => entry.get_mut().push((child.clone(), time)), - Entry::Vacant(entry) => { - entry.insert(vec![(child.clone(), time)]); - }, + // Resolve the actual child, irrespective of duplicate individuals + let mut child = child; + while let Some(child_parent) = self.parents.get(child) { + child = child_parent; + } + let child = child.clone(); + + // Resolve the actual parent, irrespective of duplicate individuals + let mut parent = parent; + while let Some(parent_parent) = self.parents.get(parent) { + parent = parent_parent; + } + let parent = parent.clone(); + + self.parents.insert(child.clone(), parent.clone()); + + if let Some((parent_individual, parent_node)) = self.tskit_ids.get(&parent).copied() { + // The parent has already been inserted + // -> immediately store child and its successors + if let Some((child_individual, child_node)) = + self.store_lineage(&child, time, Some((parent_individual, parent_node))) + { + self.store_children_of_parent(&child, child_individual, child_node); + } + } else { + // The parent has not been inserted yet + // -> postpone insertion and remember the child + self.children.entry(parent).or_default().push((child, time)); } } @@ -140,3 +102,88 @@ impl TskitTreeReporter { .unwrap(); } } + +impl TskitTreeReporter { + /// Store a lineage as a `tskit` individual and birth node, optionally with + /// a parent relationship + fn store_lineage( + &mut self, + reference: &GlobalLineageReference, + time: NonNegativeF64, + parent: Option<(TskitIndividualID, TskitNodeID)>, + ) -> Option<(TskitIndividualID, TskitNodeID)> { + let origin = self.origins.remove(reference)?; + let location = [ + f64::from(origin.location().x()), + f64::from(origin.location().y()), + f64::from(origin.index()), + ]; + let metadata: Option<&dyn MetadataRoundtrip> = Some(GlobalLineageMetadata::new(reference)); + + // Insert the lineage as an individual + let individual_id = if let Some((parent_individual, _parent_node)) = parent { + self.table.add_individual_with_metadata( + TSK_FLAGS_EMPTY, + &location, + &[parent_individual.0], + metadata, + ) + } else { + self.table + .add_individual_with_metadata(TSK_FLAGS_EMPTY, &location, &[], metadata) + } + .map(TskitIndividualID) + .unwrap(); + + // Create corresponding node + let node_id = self + .table + .add_node_with_metadata( + tskit::TSK_NODE_IS_SAMPLE, + time.get(), + tskit::TSK_NULL, + individual_id.0, + metadata, + ) + .map(TskitNodeID) + .unwrap(); + + if let Some((_parent_individual, parent_node)) = parent { + // Add the parent-child relation between the nodes + self.table + .add_edge(TSK_SEQUENCE_MIN, TSK_SEQUENCE_MAX, parent_node.0, node_id.0) + .unwrap(); + } + + // Store the individual and node for potential late coalescences + self.tskit_ids + .insert(reference.clone(), (individual_id, node_id)); + + Some((individual_id, node_id)) + } + + /// Store all the children lineages of the parent lineage + /// as `tskit` individuals with birth nodes + fn store_children_of_parent( + &mut self, + parent: &GlobalLineageReference, + parent_individual: TskitIndividualID, + parent_node: TskitNodeID, + ) { + let mut stack = VecDeque::from(vec![(parent.clone(), parent_individual, parent_node)]); + + // Iteratively insert the parent's successors in breadth first order + while let Some((parent, parent_individual, parent_node)) = stack.pop_front() { + if let Some(children) = self.children.remove(&parent) { + for (child, time) in children { + // Insert the coalesced child lineage + if let Some((child_individual, child_node)) = + self.store_lineage(&child, time, Some((parent_individual, parent_node))) + { + stack.push_back((child, child_individual, child_node)); + } + } + } + } + } +} From df9b608de26dfb475acd5a3aa3c149f2e6288e93 Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Mon, 2 Aug 2021 06:33:02 +0000 Subject: [PATCH 5/7] (ml5717) Experiment with WIP tskit newtype IDs --- Cargo.lock | 3 +- necsim/plugins/tskit/Cargo.toml | 2 +- necsim/plugins/tskit/src/tree/mod.rs | 9 ++---- necsim/plugins/tskit/src/tree/table.rs | 44 +++++++++++--------------- 4 files changed, 23 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e789367f4..0f44ff2e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1696,8 +1696,7 @@ dependencies = [ [[package]] name = "tskit" version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e651630d438d19b70a51e4be845f085825c4d17194714a13ce1795732b75c02d" +source = "git+https://github.com/tskit-dev/tskit-rust?rev=0366800#036680050bdca732f3be4db439dfcfafd144d79c" dependencies = [ "bindgen 0.58.1", "bitflags", diff --git a/necsim/plugins/tskit/Cargo.toml b/necsim/plugins/tskit/Cargo.toml index dda55d961..98ff48968 100644 --- a/necsim/plugins/tskit/Cargo.toml +++ b/necsim/plugins/tskit/Cargo.toml @@ -17,7 +17,7 @@ necsim-plugins-core = { path = "../core", features = ["export"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0.64" -tskit = { version = "0.3.0", features = ["provenance"] } +tskit = { git = "https://github.com/tskit-dev/tskit-rust", rev = "0366800", features = ["provenance"] } uname = "0.1.1" findshlibs = "0.8.0" diff --git a/necsim/plugins/tskit/src/tree/mod.rs b/necsim/plugins/tskit/src/tree/mod.rs index 16eb5fc7d..32d1fae6e 100644 --- a/necsim/plugins/tskit/src/tree/mod.rs +++ b/necsim/plugins/tskit/src/tree/mod.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, convert::TryFrom, fmt, fs::OpenOptions, io}; use serde::{Deserialize, Serialize, Serializer}; -use tskit::TableCollection; +use tskit::{IndividualId, NodeId, TableCollection}; use necsim_core::{ event::{DispersalEvent, SpeciationEvent}, @@ -18,11 +18,6 @@ mod table; const TSK_SEQUENCE_MIN: f64 = 0.0_f64; const TSK_SEQUENCE_MAX: f64 = 1.0_f64; -#[derive(Copy, Clone)] -struct TskitIndividualID(tskit::tsk_id_t); -#[derive(Copy, Clone)] -struct TskitNodeID(tskit::tsk_id_t); - #[allow(clippy::module_name_repetitions)] #[derive(Deserialize)] #[serde(try_from = "TskitTreeReporterArgs")] @@ -38,7 +33,7 @@ pub struct TskitTreeReporter { // Child -> Parent lineage mapping parents: HashMap, // Lineage to tskit mapping, used if parent is known before coalescence - tskit_ids: HashMap, + tskit_ids: HashMap, table: TableCollection, diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs index 2b0612c7b..adbae4ccf 100644 --- a/necsim/plugins/tskit/src/tree/table.rs +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -2,15 +2,14 @@ use std::collections::VecDeque; use necsim_core_bond::NonNegativeF64; use tskit::{ - metadata::MetadataRoundtrip, provenance::Provenance, TableOutputOptions, TableSortOptions, + provenance::Provenance, IndividualId, NodeId, TableOutputOptions, TableSortOptions, TreeSequenceFlags, }; use necsim_core::{landscape::IndexedLocation, lineage::GlobalLineageReference}; use super::{ - metadata::GlobalLineageMetadata, TskitIndividualID, TskitNodeID, TskitTreeReporter, - TSK_SEQUENCE_MAX, TSK_SEQUENCE_MIN, + metadata::GlobalLineageMetadata, TskitTreeReporter, TSK_SEQUENCE_MAX, TSK_SEQUENCE_MIN, }; const TSK_FLAGS_EMPTY: tskit::tsk_flags_t = 0_u32; @@ -110,48 +109,43 @@ impl TskitTreeReporter { &mut self, reference: &GlobalLineageReference, time: NonNegativeF64, - parent: Option<(TskitIndividualID, TskitNodeID)>, - ) -> Option<(TskitIndividualID, TskitNodeID)> { + parent: Option<(IndividualId, NodeId)>, + ) -> Option<(IndividualId, NodeId)> { let origin = self.origins.remove(reference)?; let location = [ f64::from(origin.location().x()), f64::from(origin.location().y()), f64::from(origin.index()), ]; - let metadata: Option<&dyn MetadataRoundtrip> = Some(GlobalLineageMetadata::new(reference)); + let metadata = GlobalLineageMetadata::new(reference); + let parents = if let Some((parent_individual, _parent_node)) = &parent { + std::slice::from_ref(parent_individual) + } else { + &[] + }; // Insert the lineage as an individual - let individual_id = if let Some((parent_individual, _parent_node)) = parent { - self.table.add_individual_with_metadata( - TSK_FLAGS_EMPTY, - &location, - &[parent_individual.0], - metadata, - ) - } else { - self.table - .add_individual_with_metadata(TSK_FLAGS_EMPTY, &location, &[], metadata) - } - .map(TskitIndividualID) - .unwrap(); + let individual_id = self + .table + .add_individual_with_some_metadata(TSK_FLAGS_EMPTY, &location, parents, metadata) + .unwrap(); // Create corresponding node let node_id = self .table - .add_node_with_metadata( + .add_node_with_some_metadata( tskit::TSK_NODE_IS_SAMPLE, time.get(), tskit::TSK_NULL, - individual_id.0, + individual_id, metadata, ) - .map(TskitNodeID) .unwrap(); if let Some((_parent_individual, parent_node)) = parent { // Add the parent-child relation between the nodes self.table - .add_edge(TSK_SEQUENCE_MIN, TSK_SEQUENCE_MAX, parent_node.0, node_id.0) + .add_edge(TSK_SEQUENCE_MIN, TSK_SEQUENCE_MAX, parent_node, node_id) .unwrap(); } @@ -167,8 +161,8 @@ impl TskitTreeReporter { fn store_children_of_parent( &mut self, parent: &GlobalLineageReference, - parent_individual: TskitIndividualID, - parent_node: TskitNodeID, + parent_individual: IndividualId, + parent_node: NodeId, ) { let mut stack = VecDeque::from(vec![(parent.clone(), parent_individual, parent_node)]); From f1c638492301a50333404dba907ad8f4b796b4a4 Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Sun, 24 Oct 2021 15:22:12 +0000 Subject: [PATCH 6/7] (ml5717) Upgraded to tskit 0.5 --- Cargo.lock | 86 +++++++++++++++++++---- necsim/plugins/tskit/Cargo.toml | 18 ++--- necsim/plugins/tskit/src/tree/metadata.rs | 5 +- necsim/plugins/tskit/src/tree/reporter.rs | 4 +- necsim/plugins/tskit/src/tree/table.rs | 6 +- 5 files changed, 91 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f44ff2e6..fb96964a9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -87,7 +87,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b13ce559e6433d360c26305643803cb52cfbabbc2b9c47ce04a58493dfb443" dependencies = [ "bitflags", - "cexpr", + "cexpr 0.4.0", "cfg-if 0.1.10", "clang-sys", "clap", @@ -106,12 +106,12 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.58.1" +version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f8523b410d7187a43085e7e064416ea32ded16bd0a4e6fc025e21616d01258f" +checksum = "453c49e5950bb0eb63bb3df640e31618846c89d5b7faa54040d76e98e0134375" dependencies = [ "bitflags", - "cexpr", + "cexpr 0.5.0", "clang-sys", "clap", "env_logger 0.8.4", @@ -123,7 +123,7 @@ dependencies = [ "quote", "regex", "rustc-hash", - "shlex 1.0.0", + "shlex 1.1.0", "which", ] @@ -133,6 +133,18 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitvec" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8942c8d352ae1838c9dda0b0ca2ab657696ef2232a20147cf1b30ae1a9cb4321" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + [[package]] name = "build-probe-mpi" version = "0.1.1" @@ -171,7 +183,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" dependencies = [ - "nom", + "nom 5.1.2", +] + +[[package]] +name = "cexpr" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db507a7679252d2276ed0dd8113c6875ec56d3089f9225b2b42c30cc1f8e5c89" +dependencies = [ + "nom 6.1.2", ] [[package]] @@ -500,12 +521,14 @@ source = "git+https://github.com/MomoLangenstein/final?rev=a2dbfd6#a2dbfd623863c [[package]] name = "findshlibs" -version = "0.8.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37affc18a9c7cf90b6cf6a7700dab60439a8f138ac5ebc5f12b98281d8f687c9" +checksum = "d691fdb3f817632d259d09220d4cf0991dbb2c9e59e044a02a59194bf6e14484" dependencies = [ + "cc", "lazy_static", "libc", + "winapi", ] [[package]] @@ -526,6 +549,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "funty" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed34cd105917e91daa4da6b3728c47b068749d6a62c59811f06ed2ac71d9da7" + [[package]] name = "getrandom" version = "0.2.3" @@ -963,6 +992,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "nom" +version = "6.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7413f999671bd4745a7b624bd370a569fb6bc574b23c83a3c5ed2e453f3d5e2" +dependencies = [ + "bitvec", + "funty", + "memchr", + "version_check", +] + [[package]] name = "ntapi" version = "0.3.6" @@ -1117,6 +1158,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "radium" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "941ba9d78d8e2f7ce474c015eea4d9c6d25b6a3327f9832ee29a4de27f91bbb8" + [[package]] name = "rand" version = "0.8.4" @@ -1531,9 +1578,9 @@ checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" [[package]] name = "shlex" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42a568c8f2cd051a4d283bd6eb0343ac214c1b0f1ac19f93e1175b2dee38c73d" +checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" [[package]] name = "slab" @@ -1615,6 +1662,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + [[package]] name = "termcolor" version = "1.1.2" @@ -1695,10 +1748,11 @@ dependencies = [ [[package]] name = "tskit" -version = "0.3.0" -source = "git+https://github.com/tskit-dev/tskit-rust?rev=0366800#036680050bdca732f3be4db439dfcfafd144d79c" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90eec8af23c494ac60c90589cf3afbb9fca13e0894cc832fba79c6554d9fbf82" dependencies = [ - "bindgen 0.58.1", + "bindgen 0.59.1", "bitflags", "cc", "chrono", @@ -1804,3 +1858,9 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "wyz" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85e60b0d1b5f99db2556934e21937020776a5d31520bf169e851ac44e6420214" diff --git a/necsim/plugins/tskit/Cargo.toml b/necsim/plugins/tskit/Cargo.toml index 98ff48968..560d3eadb 100644 --- a/necsim/plugins/tskit/Cargo.toml +++ b/necsim/plugins/tskit/Cargo.toml @@ -16,15 +16,15 @@ necsim-core-bond = { path = "../../core/bond" } necsim-plugins-core = { path = "../core", features = ["export"] } serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0.64" -tskit = { git = "https://github.com/tskit-dev/tskit-rust", rev = "0366800", features = ["provenance"] } -uname = "0.1.1" -findshlibs = "0.8.0" +serde_json = "1.0" +tskit = { version = "0.5", features = ["provenance"] } +uname = "0.1" +findshlibs = "0.10" -rustc_version = "0.4.0" -semver = { version = "1.0.3", features = ["serde"] } -git-version = "0.3.4" +rustc_version = "0.4" +semver = { version = "1.0", features = ["serde"] } +git-version = "0.3" [build-dependencies] -rustc_version = "0.4.0" -semver = "1.0.3" +rustc_version = "0.4" +semver = "1.0" diff --git a/necsim/plugins/tskit/src/tree/metadata.rs b/necsim/plugins/tskit/src/tree/metadata.rs index 2ca8b78d5..ffb883659 100644 --- a/necsim/plugins/tskit/src/tree/metadata.rs +++ b/necsim/plugins/tskit/src/tree/metadata.rs @@ -5,7 +5,7 @@ use std::{ }; use necsim_core_bond::NonZeroOneU64; -use tskit::metadata::{MetadataError, MetadataRoundtrip}; +use tskit::metadata::{IndividualMetadata, MetadataError, MetadataRoundtrip, NodeMetadata}; use necsim_core::lineage::GlobalLineageReference; @@ -47,6 +47,9 @@ impl MetadataRoundtrip for GlobalLineageMetadata { } } +impl IndividualMetadata for GlobalLineageMetadata {} +impl NodeMetadata for GlobalLineageMetadata {} + impl GlobalLineageMetadata { pub fn new(reference: &GlobalLineageReference) -> &Self { unsafe { &*(reference as *const GlobalLineageReference).cast() } diff --git a/necsim/plugins/tskit/src/tree/reporter.rs b/necsim/plugins/tskit/src/tree/reporter.rs index e5bf80a82..6baed4f6f 100644 --- a/necsim/plugins/tskit/src/tree/reporter.rs +++ b/necsim/plugins/tskit/src/tree/reporter.rs @@ -37,8 +37,8 @@ impl Reporter for TskitTreeReporter { self.store_individual_coalescence(&dispersal.global_lineage_reference, &parent, dispersal.prior_time); } } - } else if let Some(parent) = &dispersal.interaction.parent() { - self.store_individual_coalescence(&dispersal.global_lineage_reference, parent, dispersal.event_time.into()); + } else if let Some(parent) = dispersal.interaction.parent() { + self.store_individual_coalescence(&dispersal.global_lineage_reference, &parent, dispersal.event_time.into()); } self.last_dispersal_event = Some(dispersal.clone()); diff --git a/necsim/plugins/tskit/src/tree/table.rs b/necsim/plugins/tskit/src/tree/table.rs index adbae4ccf..13712c0f4 100644 --- a/necsim/plugins/tskit/src/tree/table.rs +++ b/necsim/plugins/tskit/src/tree/table.rs @@ -127,16 +127,16 @@ impl TskitTreeReporter { // Insert the lineage as an individual let individual_id = self .table - .add_individual_with_some_metadata(TSK_FLAGS_EMPTY, &location, parents, metadata) + .add_individual_with_metadata(TSK_FLAGS_EMPTY, &location, parents, metadata) .unwrap(); // Create corresponding node let node_id = self .table - .add_node_with_some_metadata( + .add_node_with_metadata( tskit::TSK_NODE_IS_SAMPLE, time.get(), - tskit::TSK_NULL, + tskit::PopulationId::NULL, individual_id, metadata, ) From ef8a6bcd7b9475e534ba689cb0218d684aa8529d Mon Sep 17 00:00:00 2001 From: Momo Langenstein Date: Tue, 9 Nov 2021 07:57:44 +0000 Subject: [PATCH 7/7] (ml5717) Switched HashMap to FnvHasher --- Cargo.lock | 4 ++++ necsim/impls/no-std/Cargo.toml | 1 + .../alias/dynamic/indexed/mod.rs | 19 ++++++++++------- .../alias/dynamic/indexed/tests.rs | 8 +++---- .../coherent/globally/almost_infinite/mod.rs | 6 ++++-- .../coherent/globally/gillespie/mod.rs | 5 +++-- .../coherent/locally/classical/mod.rs | 6 ++++-- necsim/plugins/metacommunity/Cargo.toml | 1 + necsim/plugins/metacommunity/src/lib.rs | 4 +++- necsim/plugins/species/Cargo.toml | 1 + necsim/plugins/species/src/species/mod.rs | 9 ++++---- necsim/plugins/tskit/Cargo.toml | 1 + necsim/plugins/tskit/src/tree/mod.rs | 21 ++++++++++++------- 13 files changed, 56 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb96964a9..11d5c23ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -828,6 +828,7 @@ dependencies = [ "contracts", "displaydoc", "final", + "fnv", "hashbrown", "libm", "log", @@ -937,6 +938,7 @@ dependencies = [ name = "necsim-plugins-metacommunity" version = "0.1.0" dependencies = [ + "fnv", "log", "necsim-core", "necsim-plugins-core", @@ -948,6 +950,7 @@ dependencies = [ name = "necsim-plugins-species" version = "0.1.0" dependencies = [ + "fnv", "log", "necsim-core", "necsim-core-bond", @@ -970,6 +973,7 @@ name = "necsim-plugins-tskit" version = "0.1.0" dependencies = [ "findshlibs", + "fnv", "git-version", "necsim-core", "necsim-core-bond", diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 7ad3fca57..7e70c1d5d 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -26,6 +26,7 @@ serde = { version = "1.0", default-features = false, features = ["alloc", "deriv log = "0.4" displaydoc = { version = "0.2", default-features = false, features = [] } final = { git = "https://github.com/MomoLangenstein/final", rev = "a2dbfd6" } +fnv = { version = "1.0", default-features = false, features = [] } [target.'cfg(target_os = "cuda")'.dependencies] rust-cuda = { git = "https://github.com/MomoLangenstein/rust-cuda", branch = "main", features = ["derive"], optional = true } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/mod.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/mod.rs index e6d8c9f9a..c978f42a6 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/mod.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/mod.rs @@ -5,6 +5,7 @@ use core::{ hash::Hash, num::{NonZeroU128, NonZeroUsize}, }; +use fnv::FnvBuildHasher; use hashbrown::HashMap; @@ -26,7 +27,7 @@ struct RejectionSamplingGroup { pub struct DynamicAliasMethodIndexedSampler { exponents: Vec, groups: Vec>, - lookup: HashMap, + lookup: HashMap, min_exponent: i16, total_weight: u128, } @@ -47,7 +48,7 @@ impl RejectionSamplingGroup { unsafe fn sample_pop_inplace>( &mut self, - lookup: &mut HashMap, + lookup: &mut HashMap, rng: &mut G, ) -> (Option<&mut Self>, E) { if let [event] = &self.events[..] { @@ -86,7 +87,7 @@ impl RejectionSamplingGroup { #[cfg(test)] fn sample_pop>( mut self, - lookup: &mut HashMap, + lookup: &mut HashMap, rng: &mut G, ) -> (Option, E) { match unsafe { self.sample_pop_inplace(lookup, rng) } { @@ -98,7 +99,7 @@ impl RejectionSamplingGroup { unsafe fn remove_inplace( &mut self, index: usize, - lookup: &mut HashMap, + lookup: &mut HashMap, ) -> Option<&mut Self> { self.events.swap_remove(index); let weight = self.weights.swap_remove(index); @@ -119,7 +120,11 @@ impl RejectionSamplingGroup { } #[cfg(test)] - fn remove(mut self, index: usize, lookup: &mut HashMap) -> Option { + fn remove( + mut self, + index: usize, + lookup: &mut HashMap, + ) -> Option { if unsafe { self.remove_inplace(index, lookup) }.is_some() { Some(self) } else { @@ -158,7 +163,7 @@ impl DynamicAliasMethodIndexedSampler { Self { exponents: Vec::new(), groups: Vec::new(), - lookup: HashMap::new(), + lookup: HashMap::default(), min_exponent: 0_i16, total_weight: 0_u128, } @@ -172,7 +177,7 @@ impl DynamicAliasMethodIndexedSampler { Self { exponents: Vec::with_capacity(capacity_log2_approx), groups: Vec::with_capacity(capacity_log2_approx), - lookup: HashMap::with_capacity(capacity), + lookup: HashMap::with_capacity_and_hasher(capacity, FnvBuildHasher::default()), min_exponent: 0_i16, total_weight: 0_u128, } diff --git a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/tests.rs b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/tests.rs index 0134a51cd..a2cb41a86 100644 --- a/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/tests.rs +++ b/necsim/impls/no-std/src/cogs/active_lineage_sampler/alias/dynamic/indexed/tests.rs @@ -22,7 +22,7 @@ fn singular_event_group() { assert_eq!(&group.weights, &[1_u64]); assert_eq!(group.total_weight, 1_u128); - assert!(group.remove(0, &mut HashMap::new()).is_none()); + assert!(group.remove(0, &mut HashMap::default()).is_none()); } #[test] @@ -30,7 +30,7 @@ fn singular_event_group() { fn add_remove_event_group() { let mut group = RejectionSamplingGroup::new(0_u8, 1_u64); - let mut lookup = HashMap::new(); + let mut lookup = HashMap::default(); lookup.insert( 0_u8, EventLocation { @@ -137,7 +137,7 @@ fn add_remove_event_group() { ); assert_eq!( - group.sample_pop(&mut HashMap::new(), &mut DummyRng::new(vec![0.0, 0.0])), + group.sample_pop(&mut HashMap::default(), &mut DummyRng::new(vec![0.0, 0.0])), (None, 2_u8) ); } @@ -151,7 +151,7 @@ fn sample_single_group() { decompose_weight(PositiveF64::new(6.0 / 12.0).unwrap()).mantissa, ); - let mut lookup = HashMap::new(); + let mut lookup = HashMap::default(); for i in 1..6 { assert_eq!( diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/almost_infinite/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/almost_infinite/mod.rs index c58a8e6e9..81d862851 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/almost_infinite/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/almost_infinite/mod.rs @@ -1,5 +1,6 @@ use core::{marker::PhantomData, ops::Index}; +use fnv::FnvBuildHasher; use hashbrown::hash_map::HashMap; use slab::Slab; @@ -20,7 +21,7 @@ mod store; #[derive(Debug)] pub struct AlmostInfiniteLineageStore { lineages_store: Slab, - location_to_lineage_reference: HashMap, + location_to_lineage_reference: HashMap, _marker: PhantomData, } @@ -46,7 +47,8 @@ impl AlmostInfiniteLineageStore { let lineages_amount_hint = origin_sampler.full_upper_bound_size_hint() as usize; let mut lineages_store = Slab::with_capacity(lineages_amount_hint); - let mut location_to_lineage_references = HashMap::with_capacity(lineages_amount_hint); + let mut location_to_lineage_references = + HashMap::with_capacity_and_hasher(lineages_amount_hint, FnvBuildHasher::default()); for lineage in origin_sampler { location_to_lineage_references.insert( diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs index 6e34ef70f..59901efe1 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/globally/gillespie/mod.rs @@ -2,6 +2,7 @@ use core::{marker::PhantomData, ops::Index}; use alloc::vec::Vec; +use fnv::FnvBuildHasher; use hashbrown::hash_map::HashMap; use slab::Slab; @@ -21,7 +22,7 @@ pub struct GillespieLineageStore> { lineages_store: Slab, location_to_lineage_references: Array2D>, indexed_location_to_lineage_reference: - HashMap, + HashMap, _marker: PhantomData<(M, H)>, } @@ -55,7 +56,7 @@ impl<'h, M: MathsCore, H: 'h + Habitat> GillespieLineageStore { ); let mut indexed_location_to_lineage_reference = - HashMap::with_capacity(lineages_amount_hint); + HashMap::with_capacity_and_hasher(lineages_amount_hint, FnvBuildHasher::default()); let x_from = landscape_extent.x(); let y_from = landscape_extent.y(); diff --git a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/mod.rs b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/mod.rs index aa0c4abae..c0aca557a 100644 --- a/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/mod.rs +++ b/necsim/impls/no-std/src/cogs/lineage_store/coherent/locally/classical/mod.rs @@ -1,5 +1,6 @@ use core::{marker::PhantomData, ops::Index}; +use fnv::FnvBuildHasher; use hashbrown::hash_map::HashMap; use slab::Slab; @@ -17,7 +18,8 @@ mod store; #[derive(Debug)] pub struct ClassicalLineageStore> { lineages_store: Slab, - indexed_location_to_lineage_reference: HashMap, + indexed_location_to_lineage_reference: + HashMap, _marker: PhantomData<(M, H)>, } @@ -43,7 +45,7 @@ impl<'h, M: MathsCore, H: 'h + Habitat> ClassicalLineageStore { let mut lineages_store = Slab::with_capacity(lineages_amount_hint); let mut indexed_location_to_lineage_reference = - HashMap::with_capacity(lineages_amount_hint); + HashMap::with_capacity_and_hasher(lineages_amount_hint, FnvBuildHasher::default()); for lineage in origin_sampler { let indexed_location = lineage.indexed_location.clone(); diff --git a/necsim/plugins/metacommunity/Cargo.toml b/necsim/plugins/metacommunity/Cargo.toml index 5974df96d..d98afcd8c 100644 --- a/necsim/plugins/metacommunity/Cargo.toml +++ b/necsim/plugins/metacommunity/Cargo.toml @@ -17,3 +17,4 @@ necsim-plugins-core = { path = "../core", features = ["export"] } serde = { version = "1.0", features = ["derive"] } log = { version = "0.4" } rand = "0.8" +fnv = "1.0" diff --git a/necsim/plugins/metacommunity/src/lib.rs b/necsim/plugins/metacommunity/src/lib.rs index 5fcddbb66..da85ce27c 100644 --- a/necsim/plugins/metacommunity/src/lib.rs +++ b/necsim/plugins/metacommunity/src/lib.rs @@ -5,6 +5,7 @@ extern crate log; use std::{collections::HashSet, fmt, num::NonZeroU64}; +use fnv::FnvBuildHasher; use rand::{rngs::StdRng, Rng, SeedableRng}; use serde::{Deserialize, Serialize}; @@ -102,7 +103,8 @@ impl Reporter for MetacommunityMigrationReporter { let mut rng = StdRng::seed_from_u64(self.seed); - let mut unique_migration_targets = HashSet::new(); + let mut unique_migration_targets = + HashSet::with_capacity_and_hasher(self.migrations, FnvBuildHasher::default()); for _ in 0..self.migrations { unique_migration_targets.insert(rng.gen_range(0..metacommunity_size.get())); diff --git a/necsim/plugins/species/Cargo.toml b/necsim/plugins/species/Cargo.toml index a156f0a06..f6ecbf70c 100644 --- a/necsim/plugins/species/Cargo.toml +++ b/necsim/plugins/species/Cargo.toml @@ -18,3 +18,4 @@ necsim-plugins-core = { path = "../core", features = ["export"] } serde = { version = "1.0", features = ["derive"] } log = { version = "0.4" } rusqlite = "0.26" +fnv = "1.0" diff --git a/necsim/plugins/species/src/species/mod.rs b/necsim/plugins/species/src/species/mod.rs index 318e2146c..bff17c82c 100644 --- a/necsim/plugins/species/src/species/mod.rs +++ b/necsim/plugins/species/src/species/mod.rs @@ -1,5 +1,6 @@ use std::{collections::HashMap, convert::TryFrom, fmt, path::PathBuf}; +use fnv::FnvBuildHasher; use rusqlite::Connection; use serde::{Deserialize, Serialize}; @@ -26,9 +27,9 @@ pub struct SpeciesLocationsReporter { // Original (present-time) locations of all lineages origins: Vec<(GlobalLineageReference, IndexedLocation)>, // Child -> Parent lineage mapping - parents: HashMap, + parents: HashMap, // Species originator -> Species identities mapping - species: HashMap, + species: HashMap, output: PathBuf, table: String, @@ -79,8 +80,8 @@ impl TryFrom for SpeciesLocationsReporter { last_dispersal_event: None, origins: Vec::new(), - parents: HashMap::new(), - species: HashMap::new(), + parents: HashMap::default(), + species: HashMap::default(), output: args.output, table: args.table, diff --git a/necsim/plugins/tskit/Cargo.toml b/necsim/plugins/tskit/Cargo.toml index 560d3eadb..af2258041 100644 --- a/necsim/plugins/tskit/Cargo.toml +++ b/necsim/plugins/tskit/Cargo.toml @@ -20,6 +20,7 @@ serde_json = "1.0" tskit = { version = "0.5", features = ["provenance"] } uname = "0.1" findshlibs = "0.10" +fnv = "1.0" rustc_version = "0.4" semver = { version = "1.0", features = ["serde"] } diff --git a/necsim/plugins/tskit/src/tree/mod.rs b/necsim/plugins/tskit/src/tree/mod.rs index 32d1fae6e..c4d9b1acc 100644 --- a/necsim/plugins/tskit/src/tree/mod.rs +++ b/necsim/plugins/tskit/src/tree/mod.rs @@ -1,5 +1,6 @@ use std::{collections::HashMap, convert::TryFrom, fmt, fs::OpenOptions, io}; +use fnv::FnvBuildHasher; use serde::{Deserialize, Serialize, Serializer}; use tskit::{IndividualId, NodeId, TableCollection}; @@ -27,13 +28,17 @@ pub struct TskitTreeReporter { last_dispersal_event: Option, // Original (present-time) locations of all lineages - origins: HashMap, + origins: HashMap, // Children lineages of a parent, used if parent is unknown at coalescence - children: HashMap>, + children: HashMap< + GlobalLineageReference, + Vec<(GlobalLineageReference, NonNegativeF64)>, + FnvBuildHasher, + >, // Child -> Parent lineage mapping - parents: HashMap, + parents: HashMap, // Lineage to tskit mapping, used if parent is known before coalescence - tskit_ids: HashMap, + tskit_ids: HashMap, table: TableCollection, @@ -83,10 +88,10 @@ impl TryFrom for TskitTreeReporter { last_speciation_event: None, last_dispersal_event: None, - origins: HashMap::new(), - children: HashMap::new(), - parents: HashMap::new(), - tskit_ids: HashMap::new(), + origins: HashMap::default(), + children: HashMap::default(), + parents: HashMap::default(), + tskit_ids: HashMap::default(), table,