diff --git a/.gitattributes b/.gitattributes index 3c507eb27..7500e2c01 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ -*.graphml text eol=lf \ No newline at end of file +*.graphml text eol=lf +third-party-licenses.html linguist-generated +/graphannis/tests/data/ linguist-generated=true diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index f3410a9ac..195ef6073 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -1,9 +1,6 @@ name: Verify on: - push: - branches: - - main pull_request: jobs: @@ -66,4 +63,3 @@ jobs: ``` ${{env.COVERAGE_INFO}} ``` - \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index c4068c3fa..c722a843d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - New optional `file` option for the `[logging]` section in the webservice configuration. Can be used to additionally output all log messages to the given file. +- Add number of root nodes to graph storage statistics. This changes the way +most of the graph storages store their statistics. You can use old imported data +files, but to make use of the new information you queries, you have to +**reimport** your corpora. - `Graph:ensure_loaded_parallel` returns the actually loaded components that did exist. @@ -23,6 +27,8 @@ exist. - Less frequent corpus cache status updates in log. Before, every corpus access could trigger an entry into the log which is not desired under heavy load. +- Improve query execution planning by assuming all annotations can be matched in +regular expressions without a prefix. ## [3.7.1] - 2025-04-14 diff --git a/cli/src/bin/annis.rs b/cli/src/bin/annis.rs index a055918e2..9205f43e2 100644 --- a/cli/src/bin/annis.rs +++ b/cli/src/bin/annis.rs @@ -321,7 +321,7 @@ impl AnnisRunner { format = ExportFormat::GraphMLZip; } else if file_ext.to_string_lossy() == ".graphml" && self.current_corpus.len() != 1 { bail!( - r##"You need to select a *single* corpus first with the \"corpus\" command when exporting to a GraphML file. + r##"You need to select a *single* corpus first with the \"corpus\" command when exporting to a GraphML file. To export multiple corpora, select a directory as output or a ZIP file (ending with .zip)"## ); } @@ -442,7 +442,7 @@ impl AnnisRunner { "unsorted" => ResultOrder::NotSorted, _ => { return Err(anyhow!( - "Non-existing order with name {}. + "Non-existing order with name {}. Must be one of \"normal\", \"inverted\", \"random\", \"unsorted\"", args )); diff --git a/cli/tests/cli.rs b/cli/tests/cli.rs index 4830293c2..bf94d1680 100644 --- a/cli/tests/cli.rs +++ b/cli/tests/cli.rs @@ -26,7 +26,7 @@ fn show_corpus_info() -> Result<(), Box> { cmd.arg("../graphannis/tests/data/") .arg("-c") - .arg("corpus sample-disk-based-3.3") + .arg("corpus sample-disk-based-3.8") .arg("-c") .arg("preload") .arg("-c") diff --git a/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap b/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap index fac1ea16c..b7a158610 100644 --- a/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_fully_loaded.snap @@ -20,6 +20,7 @@ exit_code: 0 sample-disk-based-1.5 (not loaded) sample-disk-based-3.2 (not loaded) sample-disk-based-3.3 (fully loaded) +sample-disk-based-3.8 (not loaded) sample-memory-based-1.5 (not loaded) sample-memory-based-3.2 (not loaded) sample-memory-based-3.3 (not loaded) diff --git a/cli/tests/snapshots/cli__list_corpora_not_loaded.snap b/cli/tests/snapshots/cli__list_corpora_not_loaded.snap index 091f4f73c..7fd24b45b 100644 --- a/cli/tests/snapshots/cli__list_corpora_not_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_not_loaded.snap @@ -13,10 +13,10 @@ exit_code: 0 sample-disk-based-1.5 (not loaded) sample-disk-based-3.2 (not loaded) sample-disk-based-3.3 (not loaded) +sample-disk-based-3.8 (not loaded) sample-memory-based-1.5 (not loaded) sample-memory-based-3.2 (not loaded) sample-memory-based-3.3 (not loaded) graphANNIS says good-bye! ----- stderr ----- - diff --git a/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap b/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap index 58256a6c3..f8b2994b4 100644 --- a/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap +++ b/cli/tests/snapshots/cli__list_corpora_partially_loaded.snap @@ -21,6 +21,7 @@ result: 44 matches in 4 documents sample-disk-based-1.5 (not loaded) sample-disk-based-3.2 (not loaded) sample-disk-based-3.3 (partially loaded) +sample-disk-based-3.8 (not loaded) sample-memory-based-1.5 (not loaded) sample-memory-based-3.2 (not loaded) sample-memory-based-3.3 (not loaded) diff --git a/cli/tests/snapshots/cli__show_corpus_info.snap b/cli/tests/snapshots/cli__show_corpus_info.snap index e99870165..876b412df 100644 --- a/cli/tests/snapshots/cli__show_corpus_info.snap +++ b/cli/tests/snapshots/cli__show_corpus_info.snap @@ -5,7 +5,7 @@ info: args: - "../graphannis/tests/data/" - "-c" - - corpus sample-disk-based-3.3 + - corpus sample-disk-based-3.8 - "-c" - preload - "-c" @@ -14,59 +14,59 @@ info: success: true exit_code: 0 ----- stdout ----- -12:00:00[INFO] Loaded corpus sample-disk-based-3.3 -12:00:00[INFO] Corpus cache after preloading sample-disk-based-3.3: 100MB / 300MB - loaded corpora [sample-disk-based-3.3] +12:00:00[INFO] Loaded corpus sample-disk-based-3.8 +12:00:00[INFO] Corpus cache after preloading sample-disk-based-3.8: 100MB / 300MB - loaded corpora [sample-disk-based-3.8] 12:00:00[INFO] Preloaded corpus in 10 ms Status: "fully loaded" Token search shortcut possible: true ------------ Component Coverage//: 0 annnotations -Stats: nodes=92, avg_fan_out=2.17, max_fan_out=11, max_depth=1 +Stats: nodes=92, root nodes=48, avg_fan_out=2.17, max_fan_out=11, fan_out_99%=11, inv_fan_out_99%=9, max_depth=1 Implementation: DiskAdjacencyListV1 Status: "fully loaded" ------------ Component Coverage/annis/: 0 annnotations -Stats: nodes=0, avg_fan_out=0.00, max_fan_out=0, max_depth=1, tree +Stats: nodes=0, root nodes=0, avg_fan_out=0.00, max_fan_out=0, fan_out_99%=0, inv_fan_out_99%=0, max_depth=1, tree Implementation: DiskAdjacencyListV1 Status: "fully loaded" ------------ Component Coverage/default_ns/: 0 annnotations -Stats: nodes=56, avg_fan_out=0.93, max_fan_out=10, max_depth=1 +Stats: nodes=56, root nodes=12, avg_fan_out=0.93, max_fan_out=10, fan_out_99%=10, inv_fan_out_99%=2, max_depth=1 Implementation: DiskAdjacencyListV1 Status: "fully loaded" ------------ Component Coverage/annis/inherited-coverage: 0 annnotations -Stats: nodes=0, avg_fan_out=0.00, max_fan_out=0, max_depth=1, tree +Stats: nodes=0, root nodes=0, avg_fan_out=0.00, max_fan_out=0, fan_out_99%=0, inv_fan_out_99%=0, max_depth=1, tree Implementation: DiskAdjacencyListV1 Status: "fully loaded" ------------ Component Dominance/syntax/: 0 annnotations -Stats: nodes=92, avg_fan_out=0.96, max_fan_out=3, max_depth=9, tree +Stats: nodes=92, root nodes=4, avg_fan_out=0.96, max_fan_out=3, fan_out_99%=3, inv_fan_out_99%=1, max_depth=9, tree Implementation: PrePostOrderO16L8V1 Status: "fully loaded" ------------ Component Pointing/default_ns/anaphoric: 0 annnotations -Stats: nodes=8, avg_fan_out=0.50, max_fan_out=1, max_depth=1, tree +Stats: nodes=8, root nodes=4, avg_fan_out=0.50, max_fan_out=1, fan_out_99%=1, inv_fan_out_99%=1, max_depth=1, tree Implementation: DiskPathV1_D15 Status: "fully loaded" ------------ Component Ordering/annis/: 0 annnotations -Stats: nodes=44, avg_fan_out=0.91, max_fan_out=1, max_depth=10, tree +Stats: nodes=44, root nodes=4, avg_fan_out=0.91, max_fan_out=1, fan_out_99%=1, inv_fan_out_99%=1, max_depth=10, tree Implementation: DiskPathV1_D15 Status: "fully loaded" ------------ Component LeftToken/annis/: 0 annnotations -Stats: nodes=92, avg_fan_out=0.65, max_fan_out=1, max_depth=1 +Stats: nodes=92, root nodes=60, avg_fan_out=0.65, max_fan_out=1, fan_out_99%=1, inv_fan_out_99%=3, max_depth=1 Implementation: DiskPathV1_D15 Status: "fully loaded" ------------ Component RightToken/annis/: 0 annnotations -Stats: nodes=84, avg_fan_out=0.71, max_fan_out=1, max_depth=1 +Stats: nodes=84, root nodes=60, avg_fan_out=0.71, max_fan_out=1, fan_out_99%=1, inv_fan_out_99%=8, max_depth=1 Implementation: DiskPathV1_D15 Status: "fully loaded" ------------ Component PartOf/annis/: 0 annnotations -Stats: nodes=115, avg_fan_out=0.99, max_fan_out=1, max_depth=4 +Stats: nodes=115, root nodes=104, avg_fan_out=0.99, max_fan_out=1, fan_out_99%=1, inv_fan_out_99%=26, max_depth=4 Implementation: DiskPathV1_D15 Status: "fully loaded" ------------ diff --git a/core/src/annostorage/inmemory.rs b/core/src/annostorage/inmemory.rs index 6d881061f..7b8446a38 100644 --- a/core/src/annostorage/inmemory.rs +++ b/core/src/annostorage/inmemory.rs @@ -720,7 +720,6 @@ where if let Some(anno_key) = self.anno_keys.get_symbol(&anno_key) { if let Some(histo) = self.histogram_bounds.get(&anno_key) { // find the range in which the value is contained - // we need to make sure the histogram is not empty -> should have at least two bounds if histo.len() >= 2 { sum_histogram_buckets += histo.len() - 1; @@ -752,6 +751,10 @@ where fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result { let full_match_pattern = util::regex_full_match(pattern); + // Get the total number of annotations with the namespace/name. We + // can't get larger than this number + let total = self.number_of_annotations_by_name(ns, name)?; + // Try to parse the regular expression let parsed = regex_syntax::Parser::new().parse(&full_match_pattern); if let Ok(parsed) = parsed { @@ -770,11 +773,13 @@ where guessed_count += self.guess_max_count(ns, name, lower_val, &upper_val)?; } } + } else { + // For regular expressions without a prefix the worst case would be `.*[X].*` where `[X]` are the most common characters. + // Assume that a generic percentage (here 5%) of all nodes match the regex. + // TODO: find better ways of estimating this constant + guessed_count = (0.05 * (total as f64)) as usize; } - // Get the total number of annotations with the namespace/name. We - // can't get larger than this number - let total = self.number_of_annotations_by_name(ns, name)?; Ok(guessed_count.min(total)) } else { Ok(0) diff --git a/core/src/annostorage/ondisk.rs b/core/src/annostorage/ondisk.rs index f947dce6d..069588830 100644 --- a/core/src/annostorage/ondisk.rs +++ b/core/src/annostorage/ondisk.rs @@ -895,6 +895,10 @@ where fn guess_max_count_regex(&self, ns: Option<&str>, name: &str, pattern: &str) -> Result { let full_match_pattern = util::regex_full_match(pattern); + // Get the total number of annotations with the namespace/name. We + // can't get larger than this number + let total = self.number_of_annotations_by_name(ns, name)?; + // Try to parse the regular expression let parsed = Parser::new().parse(&full_match_pattern); if let Ok(parsed) = parsed { @@ -913,11 +917,13 @@ where guessed_count += self.guess_max_count(ns, name, lower_val, &upper_val)?; } } + } else { + // For regular expressions without a prefix the worst case would be `.*[X].*` where `[X]` are the most common characters. + // Assume that a generic percentage (here 5%) of all nodes match the regex. + // TODO: find better ways of estimating this constant + guessed_count = (0.05 * (total as f64)) as usize; } - // Get the total number of annotations with the namespace/name. We - // can't get larger than this number - let total = self.number_of_annotations_by_name(ns, name)?; Ok(guessed_count.min(total)) } else { Ok(0) diff --git a/core/src/graph/storage/adjacencylist.rs b/core/src/graph/storage/adjacencylist.rs index 9a105acc9..c70f19f2f 100644 --- a/core/src/graph/storage/adjacencylist.rs +++ b/core/src/graph/storage/adjacencylist.rs @@ -7,7 +7,12 @@ use crate::{ types::{AnnoKey, Annotation, Edge, NodeID}, }; -use super::{EdgeContainer, GraphStatistic, GraphStorage, WriteableGraphStorage}; +use super::{ + deserialize_gs_field, + legacy::{self, AdjacencyListStorageV1}, + load_statistics_from_location, save_statistics_to_toml, serialize_gs_field, EdgeContainer, + GraphStatistic, GraphStorage, WriteableGraphStorage, +}; use itertools::Itertools; use rustc_hash::FxHashSet; use serde::Deserialize; @@ -123,13 +128,34 @@ impl GraphStorage for AdjacencyListStorage { where for<'de> Self: std::marker::Sized + Deserialize<'de>, { - let mut result: Self = super::default_deserialize_gs(location)?; + let legacy_path = location.join("component.bin"); + let mut result: Self = if legacy_path.is_file() { + let component: AdjacencyListStorageV1 = deserialize_gs_field(location, "component")?; + Self { + stats: component.stats.map(GraphStatistic::from), + edges: component.edges, + inverse_edges: component.inverse_edges, + annos: component.annos, + } + } else { + let stats = load_statistics_from_location(location)?; + Self { + edges: deserialize_gs_field(location, "edges")?, + inverse_edges: deserialize_gs_field(location, "inverse_edges")?, + annos: deserialize_gs_field(location, "annos")?, + stats, + } + }; + result.annos.after_deserialization(); Ok(result) } fn save_to(&self, location: &Path) -> Result<()> { - super::default_serialize_gs(self, location)?; + serialize_gs_field(&self.edges, "edges", location)?; + serialize_gs_field(&self.inverse_edges, "inverse_edges", location)?; + serialize_gs_field(&self.annos, "annos", location)?; + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } @@ -334,6 +360,7 @@ impl WriteableGraphStorage for AdjacencyListStorage { cyclic: false, rooted_tree: true, nodes: 0, + root_nodes: 0, dfs_visit_ratio: 0.0, }; @@ -370,6 +397,7 @@ impl WriteableGraphStorage for AdjacencyListStorage { } } } + stats.root_nodes = roots.len(); let fan_outs = get_fan_outs(&self.edges); let sum_fan_out: usize = fan_outs.iter().sum(); @@ -446,5 +474,16 @@ impl WriteableGraphStorage for AdjacencyListStorage { } } +impl From for AdjacencyListStorage { + fn from(value: legacy::AdjacencyListStorageV1) -> Self { + Self { + edges: value.edges, + inverse_edges: value.inverse_edges, + annos: value.annos, + stats: value.stats.map(GraphStatistic::from), + } + } +} + #[cfg(test)] mod tests; diff --git a/core/src/graph/storage/dense_adjacency.rs b/core/src/graph/storage/dense_adjacency.rs index 1f5d6355e..3d86425a6 100644 --- a/core/src/graph/storage/dense_adjacency.rs +++ b/core/src/graph/storage/dense_adjacency.rs @@ -1,4 +1,7 @@ -use super::{EdgeContainer, GraphStatistic, GraphStorage}; +use super::{ + deserialize_gs_field, legacy::DenseAdjacencyListStorageV1, load_statistics_from_location, + save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, +}; use crate::{ annostorage::{ inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, @@ -219,13 +222,35 @@ impl GraphStorage for DenseAdjacencyListStorage { where for<'de> Self: std::marker::Sized + Deserialize<'de>, { - let mut result: Self = super::default_deserialize_gs(location)?; + let legacy_path = location.join("component.bin"); + let mut result: Self = if legacy_path.is_file() { + let component: DenseAdjacencyListStorageV1 = + deserialize_gs_field(location, "component")?; + Self { + edges: component.edges, + inverse_edges: component.inverse_edges, + annos: component.annos, + stats: component.stats.map(GraphStatistic::from), + } + } else { + let stats = load_statistics_from_location(location)?; + Self { + edges: deserialize_gs_field(location, "edges")?, + inverse_edges: deserialize_gs_field(location, "inverse_edges")?, + annos: deserialize_gs_field(location, "annos")?, + stats, + } + }; + result.annos.after_deserialization(); Ok(result) } fn save_to(&self, location: &Path) -> Result<()> { - super::default_serialize_gs(self, location)?; + serialize_gs_field(&self.edges, "edges", location)?; + serialize_gs_field(&self.inverse_edges, "inverse_edges", location)?; + serialize_gs_field(&self.annos, "annos", location)?; + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } } diff --git a/core/src/graph/storage/disk_adjacency.rs b/core/src/graph/storage/disk_adjacency.rs index d86fcee22..f38b9bdfb 100644 --- a/core/src/graph/storage/disk_adjacency.rs +++ b/core/src/graph/storage/disk_adjacency.rs @@ -143,12 +143,7 @@ impl GraphStorage for DiskAdjacencyListStorage { where Self: std::marker::Sized, { - // Read stats - let stats_path = location.join("edge_stats.bin"); - let f_stats = std::fs::File::open(stats_path)?; - let input = std::io::BufReader::new(f_stats); - let stats = bincode::deserialize_from(input)?; - + let stats = load_statistics_from_location(location)?; let result = DiskAdjacencyListStorage { edges: DiskMap::new( Some(&location.join("edges.bin")), @@ -179,12 +174,7 @@ impl GraphStorage for DiskAdjacencyListStorage { self.inverse_edges .write_to(&location.join("inverse_edges.bin"))?; self.annos.save_annotations_to(location)?; - // Write stats with bincode - let stats_path = location.join("edge_stats.bin"); - let f_stats = std::fs::File::create(stats_path)?; - let mut writer = std::io::BufWriter::new(f_stats); - bincode::serialize_into(&mut writer, &self.stats)?; - + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } @@ -368,6 +358,7 @@ impl WriteableGraphStorage for DiskAdjacencyListStorage { cyclic: false, rooted_tree: true, nodes: 0, + root_nodes: 0, dfs_visit_ratio: 0.0, }; @@ -404,6 +395,7 @@ impl WriteableGraphStorage for DiskAdjacencyListStorage { roots.remove(&e.target); } } + stats.root_nodes = roots.len(); let fan_outs = get_fan_outs(&self.edges)?; let sum_fan_out: usize = fan_outs.iter().sum(); diff --git a/core/src/graph/storage/disk_path.rs b/core/src/graph/storage/disk_path.rs index 04292c522..9264bd616 100644 --- a/core/src/graph/storage/disk_path.rs +++ b/core/src/graph/storage/disk_path.rs @@ -16,7 +16,10 @@ use crate::{ util::disk_collections::{DiskMap, EvictionStrategy, DEFAULT_BLOCK_CACHE_CAPACITY}, }; -use super::{EdgeContainer, GraphStatistic, GraphStorage}; +use super::{ + load_statistics_from_location, save_statistics_to_toml, EdgeContainer, GraphStatistic, + GraphStorage, +}; use binary_layout::prelude::*; pub(crate) const MAX_DEPTH: usize = 15; @@ -371,11 +374,7 @@ impl GraphStorage for DiskPathStorage { location.join(crate::annostorage::ondisk::SUBFOLDER_NAME), ))?; - // Read stats - let stats_path = location.join("edge_stats.bin"); - let f_stats = std::fs::File::open(stats_path)?; - let input = std::io::BufReader::new(f_stats); - let stats = bincode::deserialize_from(input)?; + let stats = load_statistics_from_location(location)?; Ok(Self { paths, @@ -413,11 +412,7 @@ impl GraphStorage for DiskPathStorage { // Save edge annotations self.annos.save_annotations_to(location)?; - // Write stats with bincode - let stats_path = location.join("edge_stats.bin"); - let f_stats = std::fs::File::create(stats_path)?; - let mut writer = std::io::BufWriter::new(f_stats); - bincode::serialize_into(&mut writer, &self.stats)?; + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } diff --git a/core/src/graph/storage/legacy.rs b/core/src/graph/storage/legacy.rs new file mode 100644 index 000000000..ad011bd48 --- /dev/null +++ b/core/src/graph/storage/legacy.rs @@ -0,0 +1,104 @@ +//! Legacy structures of graph storages. Old versions of graph storages need to +//! be kept for compatibility reasons, but are not further developed. If +//! possible, only the legacy data structure is kept, the graph storage is +//! converted into a newer version and there is no specific implementation for +//! the old data structure. + +use std::collections::HashMap; + +use rustc_hash::FxHashMap; + +use crate::{ + annostorage::inmemory::AnnoStorageImpl, + types::{Edge, NodeID, NumValue}, +}; + +use super::{ + linear::RelativePosition, + prepost::{OrderVecEntry, PrePost}, + GraphStatistic, +}; + +/// Some general statistical numbers specific to a graph component +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct GraphStatisticV1 { + /// True if the component contains any cycle. + pub cyclic: bool, + + /// True if the component consists of [rooted trees](https://en.wikipedia.org/wiki/Tree_(graph_theory)). + pub rooted_tree: bool, + + /// Number of nodes in this graph storage (both source and target nodes). + pub nodes: usize, + + /// Average fan out. + pub avg_fan_out: f64, + /// Max fan-out of 99% of the data. + pub fan_out_99_percentile: usize, + + /// Max inverse fan-out of 99% of the data. + pub inverse_fan_out_99_percentile: usize, + + /// Maximal number of children of a node. + pub max_fan_out: usize, + /// Maximum length from a root node to a terminal node. + pub max_depth: usize, + + /// Only valid for acyclic graphs: the average number of times a DFS will visit each node. + pub dfs_visit_ratio: f64, +} + +impl From for GraphStatistic { + fn from(value: GraphStatisticV1) -> Self { + let root_nodes = if value.nodes > 0 { 1 } else { 0 }; + Self { + cyclic: value.cyclic, + rooted_tree: value.rooted_tree, + nodes: value.nodes, + root_nodes, + avg_fan_out: value.avg_fan_out, + fan_out_99_percentile: value.fan_out_99_percentile, + inverse_fan_out_99_percentile: value.inverse_fan_out_99_percentile, + max_fan_out: value.max_fan_out, + max_depth: value.max_depth, + dfs_visit_ratio: value.dfs_visit_ratio, + } + } +} + +/// An adjacency list based storage that uses the [`GraphStatisticV1`] +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct AdjacencyListStorageV1 { + pub(crate) edges: HashMap>, + pub(crate) inverse_edges: HashMap>, + pub(crate) annos: AnnoStorageImpl, + pub(crate) stats: Option, +} + +/// An adjacency list based storage that uses the [`GraphStatisticV1`] and is +/// optimized for graphs where almost all nodes have an outgoing edge. +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct DenseAdjacencyListStorageV1 { + pub(crate) edges: Vec>, + pub(crate) inverse_edges: HashMap>, + pub(crate) annos: AnnoStorageImpl, + pub(crate) stats: Option, +} + +/// A graph storage for linar graphs that uses the [`GraphStatisticV1`]. +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct LinearGraphStorageV1 { + pub(crate) node_to_pos: HashMap>, + pub(crate) node_chains: HashMap>, + pub(crate) annos: AnnoStorageImpl, + pub(crate) stats: Option, +} + +/// A graph storage for trees that uses the [`GraphStatisticV1`] and indexes graphs using the pre/post order. +#[derive(Serialize, Deserialize, Clone)] +pub(crate) struct PrePostOrderStorageV1 { + pub(crate) node_to_order: FxHashMap>>, + pub(crate) order_to_node: Vec>, + pub(crate) annos: AnnoStorageImpl, + pub(crate) stats: Option, +} diff --git a/core/src/graph/storage/linear.rs b/core/src/graph/storage/linear.rs index f3a9b155d..d3f037002 100644 --- a/core/src/graph/storage/linear.rs +++ b/core/src/graph/storage/linear.rs @@ -1,4 +1,7 @@ -use super::{EdgeContainer, GraphStatistic, GraphStorage}; +use super::{ + deserialize_gs_field, legacy::LinearGraphStorageV1, load_statistics_from_location, + save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, +}; use crate::{ annostorage::{ inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, @@ -12,7 +15,7 @@ use serde::{Deserialize, Serialize}; use std::{clone::Clone, collections::HashMap, path::Path}; #[derive(Serialize, Deserialize, Clone)] -struct RelativePosition { +pub(crate) struct RelativePosition { pub root: NodeID, pub pos: PosT, } @@ -165,13 +168,35 @@ where where for<'de> Self: std::marker::Sized + Deserialize<'de>, { - let mut result: Self = super::default_deserialize_gs(location)?; + let legacy_path = location.join("component.bin"); + let mut result: Self = if legacy_path.is_file() { + let component: LinearGraphStorageV1 = + deserialize_gs_field(location, "component")?; + Self { + node_to_pos: component.node_to_pos, + node_chains: component.node_chains, + annos: component.annos, + stats: component.stats.map(GraphStatistic::from), + } + } else { + let stats = load_statistics_from_location(location)?; + Self { + node_to_pos: deserialize_gs_field(location, "node_to_pos")?, + node_chains: deserialize_gs_field(location, "node_chains")?, + annos: deserialize_gs_field(location, "annos")?, + stats, + } + }; + result.annos.after_deserialization(); Ok(result) } fn save_to(&self, location: &Path) -> Result<()> { - super::default_serialize_gs(self, location)?; + serialize_gs_field(&self.node_to_pos, "node_to_pos", location)?; + serialize_gs_field(&self.node_chains, "node_chains", location)?; + serialize_gs_field(&self.annos, "annos", location)?; + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } diff --git a/core/src/graph/storage/mod.rs b/core/src/graph/storage/mod.rs index f9a426e33..52f0498fc 100644 --- a/core/src/graph/storage/mod.rs +++ b/core/src/graph/storage/mod.rs @@ -7,6 +7,8 @@ pub mod prepost; pub mod registry; pub mod union; +pub(crate) mod legacy; + use crate::annostorage::{EdgeAnnotationStorage, NodeAnnotationStorage}; use crate::{ annostorage::AnnotationStorage, @@ -29,6 +31,9 @@ pub struct GraphStatistic { /// Number of nodes in this graph storage (both source and target nodes). pub nodes: usize, + /// Number of root nodes in this graph storage. + pub root_nodes: usize, + /// Average fan out. pub avg_fan_out: f64, /// Max fan-out of 99% of the data. @@ -50,8 +55,8 @@ impl std::fmt::Display for GraphStatistic { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "nodes={}, avg_fan_out={:.2}, max_fan_out={}, max_depth={}", - self.nodes, self.avg_fan_out, self.max_fan_out, self.max_depth + "nodes={}, root nodes={}, avg_fan_out={:.2}, max_fan_out={}, fan_out_99%={}, inv_fan_out_99%={}, max_depth={}", + self.nodes, self.root_nodes, self.avg_fan_out, self.max_fan_out, self.fan_out_99_percentile, self.inverse_fan_out_99_percentile, self.max_depth )?; if self.cyclic { write!(f, ", cyclic")?; @@ -190,30 +195,64 @@ pub trait GraphStorage: EdgeContainer { fn save_to(&self, location: &Path) -> Result<()>; } -pub fn default_serialize_gs(gs: &GS, location: &Path) -> Result<()> +pub fn serialize_gs_field(field: &T, field_name: &str, location: &Path) -> Result<()> where - GS: Serialize, + T: Serialize, { - let data_path = location.join("component.bin"); + let data_path = location.join(format!("{field_name}.bin")); let f_data = std::fs::File::create(data_path)?; let mut writer = std::io::BufWriter::new(f_data); - bincode::serialize_into(&mut writer, gs)?; + bincode::serialize_into(&mut writer, field)?; Ok(()) } -pub fn default_deserialize_gs(location: &Path) -> Result +pub fn deserialize_gs_field(location: &Path, field_name: &str) -> Result where - for<'de> GS: std::marker::Sized + Deserialize<'de>, + for<'de> T: std::marker::Sized + Deserialize<'de>, { - let data_path = location.join("component.bin"); + let data_path = location.join(format!("{field_name}.bin")); let f_data = std::fs::File::open(data_path)?; let input = std::io::BufReader::new(f_data); let result = bincode::deserialize_from(input)?; - Ok(result) } +const STATISTICS_FILE_NAME: &str = "stats.toml"; + +pub fn load_statistics_from_location(location: &Path) -> Result> { + let stats_path_toml = location.join(STATISTICS_FILE_NAME); + let legacy_stats_path_bin = location.join("edge_stats.bin"); + + let stats = if stats_path_toml.is_file() { + let file_content = std::fs::read_to_string(stats_path_toml)?; + let stats: GraphStatistic = toml::from_str(&file_content)?; + Some(stats) + } else if legacy_stats_path_bin.is_file() { + let f_stats = std::fs::File::open(legacy_stats_path_bin)?; + let input = std::io::BufReader::new(f_stats); + // This is a legacy file which needs an older version of the struct + let legacy_stats: Option = bincode::deserialize_from(input)?; + legacy_stats.map(|s| s.into()) + } else { + None + }; + Ok(stats) +} + +pub fn save_statistics_to_toml(location: &Path, stats: Option<&GraphStatistic>) -> Result<()> { + let file_path = location.join(STATISTICS_FILE_NAME); + if file_path.is_file() { + std::fs::remove_file(&file_path)?; + } + + if let Some(stats) = stats { + let file_content = toml::to_string(stats)?; + std::fs::write(file_path, file_content)?; + } + Ok(()) +} + /// Trait for accessing graph storages which can be written to. pub trait WriteableGraphStorage: GraphStorage { /// Add an edge to this graph storage. diff --git a/core/src/graph/storage/prepost.rs b/core/src/graph/storage/prepost.rs index 3dd22cb81..6f3ee559a 100644 --- a/core/src/graph/storage/prepost.rs +++ b/core/src/graph/storage/prepost.rs @@ -1,4 +1,7 @@ -use super::{EdgeContainer, GraphStatistic, GraphStorage}; +use super::{ + deserialize_gs_field, legacy::PrePostOrderStorageV1, load_statistics_from_location, + save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, +}; use crate::{ annostorage::{ inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, @@ -20,7 +23,7 @@ pub struct PrePost { } #[derive(Serialize, Deserialize, Clone, Debug)] -enum OrderVecEntry { +pub(crate) enum OrderVecEntry { None, Pre { post: OrderT, @@ -197,13 +200,36 @@ where where for<'de> Self: std::marker::Sized + Deserialize<'de>, { - let mut result: Self = super::default_deserialize_gs(location)?; + let legacy_path = location.join("component.bin"); + let mut result: Self = if legacy_path.is_file() { + let component: PrePostOrderStorageV1 = + deserialize_gs_field(location, "component")?; + Self { + node_to_order: component.node_to_order, + order_to_node: component.order_to_node, + annos: component.annos, + stats: component.stats.map(GraphStatistic::from), + } + } else { + let stats = load_statistics_from_location(location)?; + Self { + node_to_order: deserialize_gs_field(location, "node_to_order")?, + order_to_node: deserialize_gs_field(location, "order_to_node")?, + annos: deserialize_gs_field(location, "annos")?, + stats, + } + }; + result.annos.after_deserialization(); + Ok(result) } fn save_to(&self, location: &Path) -> Result<()> { - super::default_serialize_gs(self, location)?; + serialize_gs_field(&self.node_to_order, "node_to_order", location)?; + serialize_gs_field(&self.order_to_node, "order_to_node", location)?; + serialize_gs_field(&self.annos, "annos", location)?; + save_statistics_to_toml(location, self.stats.as_ref())?; Ok(()) } diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index d7adc63e7..5551f63c7 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -119,6 +119,7 @@ fn get_cost_estimates<'a>( } } +/// Returns true if it is estimated to switch the operands in a join. fn should_switch_operand_order( op_spec: &BinaryOperatorSpecEntry, node2cost: &BTreeMap, diff --git a/graphannis/src/annis/db/aql/operators/edge_op.rs b/graphannis/src/annis/db/aql/operators/edge_op.rs index 3c56d0a60..c22f204ac 100644 --- a/graphannis/src/annis/db/aql/operators/edge_op.rs +++ b/graphannis/src/annis/db/aql/operators/edge_op.rs @@ -41,17 +41,42 @@ impl BaseEdgeOp { let gs_for_component = db.get_graphstorage(c).ok_or_else(|| { GraphAnnisError::ImpossibleSearch(format!("Component {} does not exist", &c)) })?; + gs.push(gs_for_component); } - Ok(BaseEdgeOp { - gs, - spec, - max_nodes_estimate: db.get_node_annos().guess_max_count( + + let all_part_of_components = spec + .components + .iter() + .all(|c| c.get_type() == AnnotationComponentType::PartOf); + + let max_nodes_estimate = if all_part_of_components && gs.len() == 1 { + // PartOf components have a very skewed distribution of root nodes + // vs. the actual possible targets, thus do not use all nodes as + // population but only the non-roots. + if let Some(stats) = gs[0].get_statistics() { + stats.nodes - stats.root_nodes + } else { + // Fallback to guessing by using the node type + db.get_node_annos().guess_max_count( + Some(&NODE_TYPE_KEY.ns), + &NODE_TYPE_KEY.name, + "corpus", + "datasource", + )? + } + } else { + db.get_node_annos().guess_max_count( Some(&NODE_TYPE_KEY.ns), &NODE_TYPE_KEY.name, "node", "node", - )?, + )? + }; + Ok(BaseEdgeOp { + gs, + spec, + max_nodes_estimate, inverse: false, }) } diff --git a/graphannis/src/annis/db/exec/mod.rs b/graphannis/src/annis/db/exec/mod.rs index eb02fb017..fe27ea482 100644 --- a/graphannis/src/annis/db/exec/mod.rs +++ b/graphannis/src/annis/db/exec/mod.rs @@ -12,8 +12,11 @@ use std::sync::Arc; #[derive(Debug, Clone)] pub struct CostEstimate { + /// The estimated number of tuples produces by this execution step. pub output: usize, + /// Sum of all processed tuples including the ones of the sub-steps. pub intermediate_sum: usize, + /// Simplistic estimated number of tuples that are processed in a join. pub processed_in_step: usize, } @@ -54,13 +57,13 @@ impl ExecutionNodeDesc { pub fn empty_with_fragment( node_nr: usize, query_fragment: String, - est_size: Option, + estimated_output: usize, ) -> ExecutionNodeDesc { let mut node_pos = BTreeMap::new(); node_pos.insert(node_nr, 0); - let cost = est_size.map(|output| CostEstimate { - output, + let cost = Some(CostEstimate { + output: estimated_output, intermediate_sum: 0, processed_in_step: 0, }); diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 1048c0538..206d3d7dc 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -569,7 +569,7 @@ impl<'a> NodeSearch<'a> { desc: Some(ExecutionNodeDesc::empty_with_fragment( common_args.node_nr, common_args.query_fragment, - Some(est_output), + est_output, )), node_search_desc: Arc::new(NodeSearchDesc { qname: ( @@ -676,7 +676,7 @@ impl<'a> NodeSearch<'a> { desc: Some(ExecutionNodeDesc::empty_with_fragment( common_args.node_nr, common_args.query_fragment.clone(), - Some(est_output), + est_output, )), node_search_desc: Arc::new(NodeSearchDesc { qname: (qname.0, Some(qname.1)), @@ -772,7 +772,7 @@ impl<'a> NodeSearch<'a> { desc: Some(ExecutionNodeDesc::empty_with_fragment( common_args.node_nr, common_args.query_fragment, - Some(est_output), + est_output, )), node_search_desc: Arc::new(NodeSearchDesc { qname: (qname.0, Some(qname.1)), @@ -920,7 +920,7 @@ impl<'a> NodeSearch<'a> { desc: Some(ExecutionNodeDesc::empty_with_fragment( common_args.node_nr, common_args.query_fragment.clone(), - Some(est_output), + est_output, )), node_search_desc: Arc::new(NodeSearchDesc { qname: ( @@ -978,7 +978,7 @@ impl<'a> NodeSearch<'a> { desc: Some(ExecutionNodeDesc::empty_with_fragment( common_args.node_nr, common_args.query_fragment.clone(), - Some(est_output), + est_output, )), node_search_desc: Arc::new(NodeSearchDesc { qname: ( diff --git a/graphannis/tests/data/sample-disk-based-3.8/corpus-config.toml b/graphannis/tests/data/sample-disk-based-3.8/corpus-config.toml new file mode 100644 index 000000000..97cc8d4f2 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/corpus-config.toml @@ -0,0 +1,78 @@ +[context] +default = 5 +sizes = [0, 1, 2, 5, 10, 20, 25, 50] + +[view] +page_size = 10 + +[[visualizers]] +vis_type = "kwic" +display_name = "kwic" +visibility = "permanent" + +[[visualizers]] +element = "node" +layer = "default_ns" +vis_type = "grid" +display_name = "grid (default_ns)" +visibility = "hidden" + +[[visualizers]] +element = "node" +layer = "syntax" +vis_type = "tree" +display_name = "tree (syntax)" +visibility = "hidden" + +[visualizers.mappings] +edge_type = "null" +node_anno_ns = "default_ns" +node_key = "const" + +[[visualizers]] +element = "edge" +layer = "default_ns" +vis_type = "arch_dependency" +display_name = "anaphoric (default_ns)" +visibility = "hidden" + +[[visualizers]] +element = "node" +layer = "tiger" +vis_type = "tree" +display_name = "tree" +visibility = "hidden" + +[[visualizers]] +element = "node" +layer = "exmaralda" +vis_type = "grid" +display_name = "exmaralda" +visibility = "hidden" + +[[visualizers]] +element = "node" +layer = "mmax" +vis_type = "grid" +display_name = "mmax" +visibility = "hidden" + +[[visualizers]] +element = "edge" +layer = "mmax" +vis_type = "discourse" +display_name = "coref" +visibility = "hidden" + +[[visualizers]] +element = "node" +layer = "urml" +vis_type = "grid" +display_name = "urml" +visibility = "hidden" + +[corpus_size] +quantity = 44 + +[corpus_size.unit] +name = "tokens" diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/global_statistics.toml b/graphannis/tests/data/sample-disk-based-3.8/current/global_statistics.toml new file mode 100644 index 000000000..9054d2c28 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/global_statistics.toml @@ -0,0 +1,6 @@ +all_token_in_order_component = true + +[corpus_size.Token] +base_token_count = 44 + +[corpus_size.Token.segmentation_count] diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/edges.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/impl.cfg new file mode 100644 index 000000000..449496de3 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/impl.cfg @@ -0,0 +1 @@ +DiskAdjacencyListV1 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/edges.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/impl.cfg new file mode 100644 index 000000000..449496de3 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/impl.cfg @@ -0,0 +1 @@ +DiskAdjacencyListV1 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/inverse_edges.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/stats.toml new file mode 100644 index 000000000..5dc1a4696 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inherited-coverage/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = true +nodes = 0 +root_nodes = 0 +avg_fan_out = 0.0 +fan_out_99_percentile = 0 +inverse_fan_out_99_percentile = 0 +max_fan_out = 0 +max_depth = 1 +dfs_visit_ratio = 0.0 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inverse_edges.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/stats.toml new file mode 100644 index 000000000..5dc1a4696 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/annis/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = true +nodes = 0 +root_nodes = 0 +avg_fan_out = 0.0 +fan_out_99_percentile = 0 +inverse_fan_out_99_percentile = 0 +max_fan_out = 0 +max_depth = 1 +dfs_visit_ratio = 0.0 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/edges.bin new file mode 100644 index 000000000..cd98d32e7 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/impl.cfg new file mode 100644 index 000000000..449496de3 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/impl.cfg @@ -0,0 +1 @@ +DiskAdjacencyListV1 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/inverse_edges.bin new file mode 100644 index 000000000..24c24b63f Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/stats.toml new file mode 100644 index 000000000..59af7ef1f --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_layer/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = false +nodes = 92 +root_nodes = 48 +avg_fan_out = 2.1739130434782608 +fan_out_99_percentile = 11 +inverse_fan_out_99_percentile = 9 +max_fan_out = 11 +max_depth = 1 +dfs_visit_ratio = 2.6956521739130435 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/edges.bin new file mode 100644 index 000000000..6bd7597dc Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/impl.cfg new file mode 100644 index 000000000..449496de3 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/impl.cfg @@ -0,0 +1 @@ +DiskAdjacencyListV1 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/inverse_edges.bin new file mode 100644 index 000000000..986cbd5ef Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/stats.toml new file mode 100644 index 000000000..5dc6ca90e --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Coverage/default_ns/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = false +nodes = 56 +root_nodes = 12 +avg_fan_out = 0.9285714285714286 +fan_out_99_percentile = 10 +inverse_fan_out_99_percentile = 2 +max_fan_out = 10 +max_depth = 1 +dfs_visit_ratio = 1.1428571428571428 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/annos.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/annos.bin new file mode 100644 index 000000000..a2ab2f0b8 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/annos.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/impl.cfg new file mode 100644 index 000000000..e96ea914e --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/impl.cfg @@ -0,0 +1 @@ +PrePostOrderO16L8V1 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/node_to_order.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/node_to_order.bin new file mode 100644 index 000000000..952be524a Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/node_to_order.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/order_to_node.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/order_to_node.bin new file mode 100644 index 000000000..8a4a431cb Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/order_to_node.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/stats.toml new file mode 100644 index 000000000..170f24e0b --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Dominance/syntax/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = true +nodes = 92 +root_nodes = 4 +avg_fan_out = 0.9565217391304348 +fan_out_99_percentile = 3 +inverse_fan_out_99_percentile = 1 +max_fan_out = 3 +max_depth = 9 +dfs_visit_ratio = 1.0 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/impl.cfg new file mode 100644 index 000000000..bd39cc239 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/impl.cfg @@ -0,0 +1 @@ +DiskPathV1_D15 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/inverse_edges.bin new file mode 100644 index 000000000..bfdae90fe Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/paths.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/paths.bin new file mode 100644 index 000000000..0c6f66210 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/paths.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/stats.toml new file mode 100644 index 000000000..29d71f537 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/LeftToken/annis/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = false +nodes = 92 +root_nodes = 60 +avg_fan_out = 0.6521739130434783 +fan_out_99_percentile = 1 +inverse_fan_out_99_percentile = 3 +max_fan_out = 1 +max_depth = 1 +dfs_visit_ratio = 1.3043478260869565 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/impl.cfg new file mode 100644 index 000000000..bd39cc239 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/impl.cfg @@ -0,0 +1 @@ +DiskPathV1_D15 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/inverse_edges.bin new file mode 100644 index 000000000..87efaf3e8 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/paths.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/paths.bin new file mode 100644 index 000000000..549871527 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/paths.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/stats.toml new file mode 100644 index 000000000..43895c29a --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Ordering/annis/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = true +nodes = 44 +root_nodes = 4 +avg_fan_out = 0.9090909090909091 +fan_out_99_percentile = 1 +inverse_fan_out_99_percentile = 1 +max_fan_out = 1 +max_depth = 10 +dfs_visit_ratio = 1.0 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/impl.cfg new file mode 100644 index 000000000..bd39cc239 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/impl.cfg @@ -0,0 +1 @@ +DiskPathV1_D15 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/inverse_edges.bin new file mode 100644 index 000000000..9f564de37 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/paths.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/paths.bin new file mode 100644 index 000000000..c0a9c2b99 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/paths.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/stats.toml new file mode 100644 index 000000000..ef1a9204c --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/PartOf/annis/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = false +nodes = 115 +root_nodes = 104 +avg_fan_out = 0.991304347826087 +fan_out_99_percentile = 1 +inverse_fan_out_99_percentile = 26 +max_fan_out = 1 +max_depth = 4 +dfs_visit_ratio = 4.521739130434782 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/impl.cfg new file mode 100644 index 000000000..bd39cc239 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/impl.cfg @@ -0,0 +1 @@ +DiskPathV1_D15 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/inverse_edges.bin new file mode 100644 index 000000000..d104934b4 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/paths.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/paths.bin new file mode 100644 index 000000000..350cf3b3d Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/paths.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/stats.toml new file mode 100644 index 000000000..599f469e7 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/Pointing/default_ns/anaphoric/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = true +nodes = 8 +root_nodes = 4 +avg_fan_out = 0.5 +fan_out_99_percentile = 1 +inverse_fan_out_99_percentile = 1 +max_fan_out = 1 +max_depth = 1 +dfs_visit_ratio = 1.0 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/impl.cfg b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/impl.cfg new file mode 100644 index 000000000..bd39cc239 --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/impl.cfg @@ -0,0 +1 @@ +DiskPathV1_D15 \ No newline at end of file diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/inverse_edges.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/inverse_edges.bin new file mode 100644 index 000000000..eb64a463c Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/inverse_edges.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..b02d64e9e Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..ed24586ec Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/nodes_diskmap_v1/custom.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/paths.bin b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/paths.bin new file mode 100644 index 000000000..292e96c47 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/paths.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/stats.toml b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/stats.toml new file mode 100644 index 000000000..10adbd93f --- /dev/null +++ b/graphannis/tests/data/sample-disk-based-3.8/current/gs/RightToken/annis/stats.toml @@ -0,0 +1,10 @@ +cyclic = false +rooted_tree = false +nodes = 84 +root_nodes = 60 +avg_fan_out = 0.7142857142857143 +fan_out_99_percentile = 1 +inverse_fan_out_99_percentile = 8 +max_fan_out = 1 +max_depth = 1 +dfs_visit_ratio = 1.4285714285714286 diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_anno_qname.bin b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_anno_qname.bin new file mode 100644 index 000000000..993a85532 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_anno_qname.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_container.bin b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_container.bin new file mode 100644 index 000000000..2d8ced0af Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/by_container.bin differ diff --git a/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/custom.bin b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/custom.bin new file mode 100644 index 000000000..a5cb3e4c9 Binary files /dev/null and b/graphannis/tests/data/sample-disk-based-3.8/current/nodes_diskmap_v1/custom.bin differ