diff --git a/Cargo.toml b/Cargo.toml index ccca81713..0b90b681b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,13 +1,13 @@ [workspace] members = [ - "core", - "graphannis", - "cli", - "capi", - "webservice", - "examples/tutorial", + "core", + "graphannis", + "cli", + "capi", + "webservice", + "examples/tutorial", ] -resolver = "2" +resolver = "3" # Config for 'cargo dist' [workspace.metadata.dist] @@ -21,10 +21,10 @@ installers = [] pr-run-mode = "plan" # Target platforms to build apps for (Rust target-triple syntax) targets = [ - "aarch64-apple-darwin", - "x86_64-apple-darwin", - "x86_64-unknown-linux-gnu", - "x86_64-pc-windows-msvc", + "aarch64-apple-darwin", + "x86_64-apple-darwin", + "x86_64-unknown-linux-gnu", + "x86_64-pc-windows-msvc", ] [workspace.metadata.dist.github-custom-runners] global = "ubuntu-22.04" diff --git a/capi/Cargo.toml b/capi/Cargo.toml index 24831caa4..bdbd9b84b 100644 --- a/capi/Cargo.toml +++ b/capi/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Thomas Krause "] description = "This is the C-API to the ANNIS linguistic search and visualization system." -edition = "2018" +edition = "2024" license = "Apache-2.0" name = "graphannis-capi" readme = "crate-info.md" diff --git a/capi/src/cerror.rs b/capi/src/cerror.rs index 11bc5b11b..7bd941226 100644 --- a/capi/src/cerror.rs +++ b/capi/src/cerror.rs @@ -103,13 +103,13 @@ pub fn new(err: Box) -> *mut ErrorList { } /// Returns the number of errors in the list. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_error_size(ptr: *const ErrorList) -> size_t { vec_size(ptr) } /// Get the message for the error at position `i` in the list. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_error_get_msg(ptr: *const ErrorList, i: size_t) -> *const c_char { let item = vec_get(ptr, i); if item.is_null() { @@ -120,7 +120,7 @@ pub extern "C" fn annis_error_get_msg(ptr: *const ErrorList, i: size_t) -> *cons } /// Get the kind or type for the error at position `i` in the list. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_error_get_kind(ptr: *const ErrorList, i: size_t) -> *const c_char { let item = vec_get(ptr, i); if item.is_null() { diff --git a/capi/src/corpusstorage.rs b/capi/src/corpusstorage.rs index ab8eb3024..debe38c4b 100644 --- a/capi/src/corpusstorage.rs +++ b/capi/src/corpusstorage.rs @@ -1,15 +1,15 @@ -use super::cerror::ErrorList; use super::Matrix; +use super::cerror::ErrorList; use super::{cast_const, cast_mut, cstr, map_cerr}; use graphannis::corpusstorage::ExportFormat; use graphannis::{ + AnnotationGraph, CorpusStorage, corpusstorage::{ CacheStrategy, CountExtra, FrequencyDefEntry, FrequencyTable, FrequencyTableRow, ImportFormat, QueryAttributeDescription, QueryLanguage, ResultOrder, SearchQuery, }, model::{AnnotationComponent, AnnotationComponentType}, update::GraphUpdate, - AnnotationGraph, CorpusStorage, }; use std::ffi::CString; use std::path::PathBuf; @@ -22,7 +22,7 @@ use std::path::PathBuf; /// - `db_dir` - The path on the filesystem where the corpus storage content is located. Must be an existing directory. /// - `use_parallel_joins` - If `true` parallel joins are used by the system, using all available cores. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_with_auto_cache_size( db_dir: *const libc::c_char, use_parallel_joins: bool, @@ -45,7 +45,7 @@ pub extern "C" fn annis_cs_with_auto_cache_size( /// - `max_cache_size` - Fixed maximum size of the cache in bytes. /// - `use_parallel_joins` - If `true` parallel joins are used by the system, using all available cores. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_with_max_cache_size( db_dir: *const libc::c_char, max_cache_size: usize, @@ -73,13 +73,13 @@ pub extern "C" fn annis_cs_with_max_cache_size( /// # Safety /// /// This functions dereferences the pointer given as argument and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn annis_cs_free(ptr: *mut CorpusStorage) { if ptr.is_null() { return; } // take ownership and destroy the pointer - let ptr = Box::from_raw(ptr); + let ptr = unsafe { Box::from_raw(ptr) }; std::mem::drop(ptr); } @@ -91,7 +91,7 @@ pub unsafe extern "C" fn annis_cs_free(ptr: *mut CorpusStorage) { /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. /// /// Returns the count as number. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_count( ptr: *const CorpusStorage, corpus_names: *const Vec, @@ -124,7 +124,7 @@ pub extern "C" fn annis_cs_count( /// - `query` - The query as string. /// - `query_language` The query language of the query (e.g. AQL). /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_count_extra( ptr: *const CorpusStorage, corpus_names: *const Vec, @@ -168,7 +168,7 @@ pub extern "C" fn annis_cs_count_extra( /// # Safety /// /// This functions dereferences the `err` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn annis_cs_find( ptr: *const CorpusStorage, corpus_names: *const Vec, @@ -194,7 +194,7 @@ pub unsafe extern "C" fn annis_cs_find( timeout: None, }; - let limit = if limit.is_null() { None } else { Some(*limit) }; + let limit = unsafe { if limit.is_null() { None } else { Some(*limit) } }; map_cerr(cs.find(search_query, offset, limit, order), err) .map(|result| { @@ -221,7 +221,7 @@ pub unsafe extern "C" fn annis_cs_find( /// # Safety /// /// This functions dereferences the `err` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_subgraph( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -262,7 +262,7 @@ pub extern "C" fn annis_cs_subgraph( /// # Safety /// /// This functions dereferences the `err` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_subcorpus_graph( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -285,7 +285,7 @@ pub extern "C" fn annis_cs_subcorpus_graph( /// /// - `ptr` - The corpus storage object. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_corpus_graph( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -306,7 +306,7 @@ pub extern "C" fn annis_cs_corpus_graph( /// - `query` - The query which defines included nodes. /// - `query_language` - The query language of the query (e.g. AQL). /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_subgraph_for_query( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -334,7 +334,7 @@ pub extern "C" fn annis_cs_subgraph_for_query( /// - `query_language` - The query language of the query (e.g. AQL). /// - `component_type_filter` - Only include edges of that belong to a component of the given type. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_subgraph_for_query_with_ctype( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -365,7 +365,7 @@ pub extern "C" fn annis_cs_subgraph_for_query_with_ctype( /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. /// /// Returns a frequency table of strings. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_frequency( ptr: *const CorpusStorage, corpus_names: *const Vec, @@ -424,7 +424,7 @@ pub extern "C" fn annis_cs_frequency( /// /// - `ptr` - The corpus storage object. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_list( ptr: *const CorpusStorage, err: *mut *mut ErrorList, @@ -451,7 +451,7 @@ pub extern "C" fn annis_cs_list( /// - `list_values` - If true include the possible values in the result. /// - `only_most_frequent_values` - If both this argument and `list_values` are true, only return the most frequent value for each annotation name. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_list_node_annotations( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -491,7 +491,7 @@ pub extern "C" fn annis_cs_list_node_annotations( /// - `component_layer` - The layer of the edge component. /// - `only_most_frequent_values` - If both this argument and `list_values` are true, only return the most frequent value for each annotation name. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_list_edge_annotations( ptr: *const CorpusStorage, corpus_name: *const libc::c_char, @@ -539,7 +539,7 @@ pub extern "C" fn annis_cs_list_edge_annotations( /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. /// /// Returns `true` if valid and an error with the parser message if invalid. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_validate_query( ptr: *const CorpusStorage, corpus_names: *const Vec, @@ -568,7 +568,7 @@ pub extern "C" fn annis_cs_validate_query( /// - `query` - The query to be analyzed. /// - `query_language` - The query language of the query (e.g. AQL). /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_node_descriptions( ptr: *const CorpusStorage, query: *const libc::c_char, @@ -595,7 +595,7 @@ pub extern "C" fn annis_cs_node_descriptions( /// /// Returns the name of the imported corpus. /// The returned string must be deallocated by the caller using annis_str_free()! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_import_from_fs( ptr: *mut CorpusStorage, path: *const libc::c_char, @@ -639,7 +639,7 @@ pub extern "C" fn annis_cs_import_from_fs( /// - `path` - The location on the file system where the corpus data should be written to. /// - `format` - The format in which this corpus data will be stored stored. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_export_to_fs( ptr: *mut CorpusStorage, corpus_names: *const Vec, @@ -664,7 +664,7 @@ pub extern "C" fn annis_cs_export_to_fs( /// - `ptr` - The corpus storage object. /// - `ctype` -Filter by the component type. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_list_components_by_type( ptr: *mut CorpusStorage, corpus_name: *const libc::c_char, @@ -684,7 +684,7 @@ pub extern "C" fn annis_cs_list_components_by_type( /// /// - `ptr` - The corpus storage object. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_delete( ptr: *mut CorpusStorage, corpus: *const libc::c_char, @@ -700,7 +700,7 @@ pub extern "C" fn annis_cs_delete( /// /// - `corpus` The name of the corpus to unload. /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_unload( ptr: *mut CorpusStorage, corpus: *const libc::c_char, @@ -718,7 +718,7 @@ pub extern "C" fn annis_cs_unload( /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. /// /// It is ensured that the update process is atomic and that the changes are persisted to disk if the error list is empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_cs_apply_update( ptr: *mut CorpusStorage, corpus_name: *const libc::c_char, diff --git a/capi/src/data.rs b/capi/src/data.rs index 21866cad7..66a022165 100644 --- a/capi/src/data.rs +++ b/capi/src/data.rs @@ -17,7 +17,7 @@ use std::ffi::CString; /// # Safety /// /// This functions dereferences the `ptr` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] #[allow(clippy::from_raw_with_void_ptr)] pub unsafe extern "C" fn annis_free(ptr: *mut c_void) { if ptr.is_null() { @@ -25,7 +25,7 @@ pub unsafe extern "C" fn annis_free(ptr: *mut c_void) { } // take ownership and destroy the pointer // TODO: this is problematic (https://rust-lang.github.io/rust-clippy/master/index.html#/from_raw_with_void_ptr). Introduce type aware _free functions. - let ptr = Box::from_raw(ptr); + let ptr = unsafe { Box::from_raw(ptr) }; std::mem::drop(ptr); } @@ -34,23 +34,24 @@ pub unsafe extern "C" fn annis_free(ptr: *mut c_void) { /// # Safety /// /// This functions dereferences the `s` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn annis_str_free(s: *mut c_char) { if s.is_null() { return; } // take ownership and destruct - drop(CString::from_raw(s)); + let s = unsafe { CString::from_raw(s) }; + drop(s); } pub type IterPtr = Box>>; fn iter_next(ptr: *mut Box>>, err: *mut *mut ErrorList) -> *mut T { let it: &mut Box>> = cast_mut(ptr); - if let Some(v) = it.next() { - if let Some(v) = map_cerr(v, err) { - return Box::into_raw(Box::new(v)); - } + if let Some(v) = it.next() + && let Some(v) = map_cerr(v, err) + { + return Box::into_raw(Box::new(v)); } std::ptr::null_mut() } @@ -59,7 +60,7 @@ fn iter_next(ptr: *mut Box>>, err: *mut *mut Er /// or `NULL` if iterator is empty. /// /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_iter_nodeid_next( ptr: *mut IterPtr, err: *mut *mut ErrorList, @@ -81,13 +82,13 @@ pub fn vec_get(ptr: *const Vec, i: size_t) -> *const T { } /// Returns the number of elements of the string vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_str_size(ptr: *const Vec) -> size_t { vec_size(ptr) } /// Get a read-only reference to the string at position `i` of the vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_str_get(ptr: *const Vec, i: size_t) -> *const c_char { // custom implementation for string vectors, don't return a referance to CString but a char pointer let strvec: &Vec = cast_const(ptr); @@ -99,14 +100,14 @@ pub extern "C" fn annis_vec_str_get(ptr: *const Vec, i: size_t) -> *con } /// Create a new string vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_str_new() -> *mut Vec { let result: Vec = Vec::new(); Box::into_raw(Box::new(result)) } /// Add an element to the string vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_str_push(ptr: *mut Vec, v: *const c_char) { let strvec: &mut Vec = cast_mut(ptr); let v: &str = &cstr(v); @@ -116,7 +117,7 @@ pub extern "C" fn annis_vec_str_push(ptr: *mut Vec, v: *const c_char) { } /// Get the namespace of the given annotation object. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_annotation_ns(ptr: *const Annotation) -> *mut c_char { let anno: &Annotation = cast_const(ptr); CString::new(anno.key.ns.as_str()) @@ -125,7 +126,7 @@ pub extern "C" fn annis_annotation_ns(ptr: *const Annotation) -> *mut c_char { } /// Get the name of the given annotation object. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_annotation_name(ptr: *const Annotation) -> *mut c_char { let anno: &Annotation = cast_const(ptr); CString::new(anno.key.name.as_str()) @@ -134,7 +135,7 @@ pub extern "C" fn annis_annotation_name(ptr: *const Annotation) -> *mut c_char { } /// Get the value of the given annotation object. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_annotation_val(ptr: *const Annotation) -> *mut c_char { let anno: &Annotation = cast_const(ptr); CString::new(anno.val.as_str()) @@ -143,13 +144,13 @@ pub extern "C" fn annis_annotation_val(ptr: *const Annotation) -> *mut c_char { } /// Returns the number of elements of the annotation vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_annotation_size(ptr: *const Vec) -> size_t { vec_size(ptr) } /// Get a read-only reference to the annotation at position `i` of the vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_annotation_get( ptr: *const Vec, i: size_t, @@ -158,25 +159,25 @@ pub extern "C" fn annis_vec_annotation_get( } /// Returns the number of elements of the edge vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_edge_size(ptr: *const Vec) -> size_t { vec_size(ptr) } /// Get a read-only reference to the edge at position `i` of the vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_edge_get(ptr: *const Vec, i: size_t) -> *const Edge { vec_get(ptr, i) } /// Returns the number of elements of the component vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_component_size(ptr: *const Vec) -> size_t { vec_size(ptr) } /// Get a read-only reference to the component at position `i` of the vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_component_get( ptr: *const Vec, i: size_t, @@ -185,13 +186,13 @@ pub extern "C" fn annis_vec_component_get( } /// Returns the number of elements of the query attribute description vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_qattdesc_size(ptr: *const Vec) -> size_t { vec_size(ptr) } /// Get a read-only reference to the query attribute description at position `i` of the vector. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_qattdesc_get_component_nr( ptr: *const Vec, i: size_t, @@ -204,7 +205,7 @@ pub extern "C" fn annis_vec_qattdesc_get_component_nr( /// Create a string representing the AQL fragment part of the query attribute description. /// /// The resulting char* must be freeed with annis_str_free! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_qattdesc_get_aql_fragment( ptr: *const Vec, i: size_t, @@ -218,7 +219,7 @@ pub extern "C" fn annis_vec_qattdesc_get_aql_fragment( /// Create a string representing the variable part of the query attribute description. /// /// The resulting char* must be freeed with annis_str_free! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_qattdesc_get_variable( ptr: *const Vec, i: size_t, @@ -232,14 +233,14 @@ pub extern "C" fn annis_vec_qattdesc_get_variable( /// Create a string representing the annotation name part of the query attribute description. /// /// The resulting char* must be freeed with annis_str_free! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_vec_qattdesc_get_anno_name( ptr: *const Vec, i: size_t, ) -> *mut c_char { let desc_ptr: *const QueryAttributeDescription = vec_get(ptr, i); let desc: &QueryAttributeDescription = cast_const(desc_ptr); - if let Some(ref anno_name) = desc.anno_name { + if let Some(anno_name) = &desc.anno_name { let cstr: CString = CString::new(anno_name.as_str()).unwrap_or_default(); cstr.into_raw() } else { @@ -248,13 +249,13 @@ pub extern "C" fn annis_vec_qattdesc_get_anno_name( } /// Returns the number of rows of the string matrix. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_matrix_str_nrows(ptr: *const Matrix) -> size_t { vec_size(ptr) } /// Returns the number of columns of the string matrix. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_matrix_str_ncols(ptr: *const Matrix) -> size_t { let v: &Vec> = cast_const(ptr); if !v.is_empty() { @@ -264,7 +265,7 @@ pub extern "C" fn annis_matrix_str_ncols(ptr: *const Matrix) -> size_t } /// Get a read-only reference to the string at the at position (`row`, `col`) of the matrix. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_matrix_str_get( ptr: *const Matrix, row: size_t, @@ -279,13 +280,13 @@ pub extern "C" fn annis_matrix_str_get( } /// Returns the number of rows of the frequency table. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_freqtable_str_nrows(ptr: *const FrequencyTable) -> size_t { vec_size(ptr) } /// Returns the number of columns of the frequency table. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_freqtable_str_ncols(ptr: *const FrequencyTable) -> size_t { let v: &FrequencyTable = cast_const(ptr); if !v.is_empty() { @@ -295,7 +296,7 @@ pub extern "C" fn annis_freqtable_str_ncols(ptr: *const FrequencyTable) } /// Get a read-only reference to the string at the at position (`row`, `col`) of the frequency table. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_freqtable_str_get( ptr: *const FrequencyTable, row: size_t, @@ -310,7 +311,7 @@ pub extern "C" fn annis_freqtable_str_get( } /// Get the count of the `row` of the frequency table. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_freqtable_str_count( ptr: *const FrequencyTable, row: size_t, diff --git a/capi/src/graph.rs b/capi/src/graph.rs index 8790ebee8..b8010a0b6 100644 --- a/capi/src/graph.rs +++ b/capi/src/graph.rs @@ -1,16 +1,16 @@ use super::{cast_const, cstr}; use crate::{cerror::ErrorList, data::IterPtr, map_cerr}; use graphannis::{ + AnnotationGraph, errors::GraphAnnisError, graph::{Annotation, Edge, Match, NodeID}, model::{AnnotationComponent, AnnotationComponentType}, - AnnotationGraph, }; use itertools::Itertools; use std::ffi::CString; /// Get the type of the given component. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_component_type(c: *const AnnotationComponent) -> AnnotationComponentType { let c: &AnnotationComponent = cast_const(c); c.get_type() @@ -19,7 +19,7 @@ pub extern "C" fn annis_component_type(c: *const AnnotationComponent) -> Annotat /// Get the layer of the given component. /// /// The returned string must be deallocated by the caller using annis_str_free()! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_component_layer(c: *const AnnotationComponent) -> *mut libc::c_char { let c: &AnnotationComponent = cast_const(c); let as_string: &str = &c.layer; @@ -29,7 +29,7 @@ pub extern "C" fn annis_component_layer(c: *const AnnotationComponent) -> *mut l /// Get the name of the given component. /// /// The returned string must be deallocated by the caller using annis_str_free()! -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_component_name(c: *const AnnotationComponent) -> *mut libc::c_char { let c: &AnnotationComponent = cast_const(c); let as_string: &str = &c.name; @@ -37,7 +37,7 @@ pub extern "C" fn annis_component_name(c: *const AnnotationComponent) -> *mut li } /// Return an iterator over all nodes of the graph `g` and the given `node_type` (e.g. "node" or "corpus"). -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_nodes_by_type( g: *const AnnotationGraph, node_type: *const libc::c_char, @@ -55,7 +55,7 @@ pub extern "C" fn annis_graph_nodes_by_type( /// Return a vector of all annotations for the given `node` in the graph `g`. /// /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_annotations_for_node( g: *const AnnotationGraph, node: NodeID, @@ -72,7 +72,7 @@ pub extern "C" fn annis_graph_annotations_for_node( } /// Return a vector of all components for the graph `g`. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_all_components( g: *const AnnotationGraph, ) -> *mut Vec { @@ -82,7 +82,7 @@ pub extern "C" fn annis_graph_all_components( } /// Return a vector of all components for the graph `g` and the given component type. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_all_components_by_type( g: *const AnnotationGraph, ctype: AnnotationComponentType, @@ -95,7 +95,7 @@ pub extern "C" fn annis_graph_all_components_by_type( /// Return a vector of all outgoing edges for the graph `g`, the `source` node and the given `component`. /// /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_outgoing_edges( g: *const AnnotationGraph, source: NodeID, @@ -121,7 +121,7 @@ pub extern "C" fn annis_graph_outgoing_edges( /// Return a vector of annnotations for the given `edge` in the `component` of graph `g. /// /// - `err` - Pointer to a list of errors. If any error occured, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graph_annotations_for_edge( g: *const AnnotationGraph, edge: Edge, diff --git a/capi/src/logging.rs b/capi/src/logging.rs index a2c7bc96f..c741fef3d 100644 --- a/capi/src/logging.rs +++ b/capi/src/logging.rs @@ -36,7 +36,7 @@ impl From for simplelog::LevelFilter { /// # Safety /// /// This functions dereferences the `err` pointer and is therefore unsafe. -#[no_mangle] +#[unsafe(no_mangle)] pub unsafe extern "C" fn annis_init_logging( logfile: *const libc::c_char, level: LogLevel, @@ -45,20 +45,24 @@ pub unsafe extern "C" fn annis_init_logging( if !logfile.is_null() { let logfile: &str = &cstr(logfile); - match File::create(logfile) { - Ok(f) => { - if let Err(e) = WriteLogger::init(LevelFilter::from(level), Config::default(), f) { - // File was created, but logger was not. + unsafe { + match File::create(logfile) { + Ok(f) => { + if let Err(e) = + WriteLogger::init(LevelFilter::from(level), Config::default(), f) + { + // File was created, but logger was not. + if !err.is_null() { + *err = Box::into_raw(Box::new(vec![Error::from(e)])); + } + } + } + Err(e) => { if !err.is_null() { *err = Box::into_raw(Box::new(vec![Error::from(e)])); } } - } - Err(e) => { - if !err.is_null() { - *err = Box::into_raw(Box::new(vec![Error::from(e)])); - } - } - }; + }; + } } } diff --git a/capi/src/update.rs b/capi/src/update.rs index 49039d3be..ff8955e97 100644 --- a/capi/src/update.rs +++ b/capi/src/update.rs @@ -3,7 +3,7 @@ use super::{cast_mut, cstr, map_cerr}; use graphannis::update::{GraphUpdate, UpdateEvent}; /// Create a new graph (empty) update instance -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_new() -> *mut GraphUpdate { let gu = GraphUpdate::new(); Box::into_raw(Box::new(gu)) @@ -15,7 +15,7 @@ pub extern "C" fn annis_graphupdate_new() -> *mut GraphUpdate { /// - `node_name` - Name of the new node. /// - `node_type` - Type of the new node, e.g. "node" or "corpus". /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_add_node( ptr: *mut GraphUpdate, node_name: *const libc::c_char, @@ -37,7 +37,7 @@ pub extern "C" fn annis_graphupdate_add_node( /// - `ptr` - The graph update object. /// - `node_name` - Name of node to delete. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_delete_node( ptr: *mut GraphUpdate, node_name: *const libc::c_char, @@ -60,7 +60,7 @@ pub extern "C" fn annis_graphupdate_delete_node( /// - `annos_name` - Name of the new annotation. /// - `annos_value` - Value of the new annotation. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_add_node_label( ptr: *mut GraphUpdate, node_name: *const libc::c_char, @@ -88,7 +88,7 @@ pub extern "C" fn annis_graphupdate_add_node_label( /// - `annos_ns` - Namespace of deleted new annotation. /// - `annos_name` - Name of the deleted annotation. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_delete_node_label( ptr: *mut GraphUpdate, node_name: *const libc::c_char, @@ -116,7 +116,7 @@ pub extern "C" fn annis_graphupdate_delete_node_label( /// - `component_type` - Type of the component of the new edge. /// - `component_name` - Name of the component of the new edge. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_add_edge( ptr: *mut GraphUpdate, source_node: *const libc::c_char, @@ -149,7 +149,7 @@ pub extern "C" fn annis_graphupdate_add_edge( /// - `component_type` - Type of the component of the edge to delete. /// - `component_name` - Name of the component of the edge to delete. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_delete_edge( ptr: *mut GraphUpdate, source_node: *const libc::c_char, @@ -184,7 +184,7 @@ pub extern "C" fn annis_graphupdate_delete_edge( /// - `annos_name` - Name of the new annotation. /// - `annos_value` - Value of the new annotation. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_add_edge_label( ptr: *mut GraphUpdate, source_node: *const libc::c_char, @@ -225,7 +225,7 @@ pub extern "C" fn annis_graphupdate_add_edge_label( /// - `annos_ns` - Namespace of the annotation to delete. /// - `annos_name` - Name of the annotation to delete. /// - `err` - Pointer to a list of errors. If any error occurred, this list will be non-empty. -#[no_mangle] +#[unsafe(no_mangle)] pub extern "C" fn annis_graphupdate_delete_edge_label( ptr: *mut GraphUpdate, source_node: *const libc::c_char, diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 8383884de..39c7e37fd 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Thomas Krause "] autobins = true description = "This is a command-line interface to the new backend implementation of the ANNIS linguistic search and visualization system." -edition = "2021" +edition = "2024" license = "Apache-2.0" name = "graphannis-cli" readme = "crate-info.md" diff --git a/cli/src/bin/annis.rs b/cli/src/bin/annis.rs index 9205f43e2..d51beff0d 100644 --- a/cli/src/bin/annis.rs +++ b/cli/src/bin/annis.rs @@ -3,20 +3,20 @@ extern crate anyhow; use clap::{App, Arg}; use compound_duration::format_dhms; +use graphannis::CorpusStorage; use graphannis::corpusstorage::FrequencyDefEntry; use graphannis::corpusstorage::LoadStatus; use graphannis::corpusstorage::QueryLanguage; use graphannis::corpusstorage::ResultOrder; use graphannis::corpusstorage::{CorpusInfo, SearchQuery}; use graphannis::corpusstorage::{ExportFormat, ImportFormat}; -use graphannis::CorpusStorage; use log::info; use prettytable::Cell; use prettytable::Row; use prettytable::Table; +use rustyline::Editor; use rustyline::completion::{Completer, FilenameCompleter}; use rustyline::error::ReadlineError; -use rustyline::Editor; use rustyline_derive::{Helper, Highlighter, Hinter, Validator}; use simplelog::{LevelFilter, SimpleLogger, TermLogger}; use std::path::{Path, PathBuf}; @@ -162,7 +162,7 @@ impl AnnisRunner { println!("No previous history."); } - if let Some(ref storage) = self.storage { + if let Some(storage) = &self.storage { rl.set_helper(Some(ConsoleHelper::new(storage.list().unwrap_or_default()))); } @@ -406,7 +406,10 @@ impl AnnisRunner { if corpora.contains(s) { self.current_corpus.push(s.to_string()); } else { - println!("Corpus {} does not exist. Uses the \"list\" command to get all available corpora", s); + println!( + "Corpus {} does not exist. Uses the \"list\" command to get all available corpora", + s + ); } } } @@ -593,7 +596,9 @@ impl AnnisRunner { defs.filter_map(|d| -> Option { d.parse().ok() }) .collect() } else { - println!("You have to give the frequency definition as first argument and the AQL as second argument"); + println!( + "You have to give the frequency definition as first argument and the AQL as second argument" + ); return Ok(()); }; @@ -753,7 +758,10 @@ fn main() { simplelog::TerminalMode::Mixed, simplelog::ColorChoice::Auto, ) { - println!("Error, can't initialize the terminal log output: {}.\nWill degrade to a more simple logger", e); + println!( + "Error, can't initialize the terminal log output: {}.\nWill degrade to a more simple logger", + e + ); if let Err(e_simple) = SimpleLogger::init(log_filter, log_config) { println!("Simple logging failed too: {}", e_simple); } diff --git a/cli/src/bin/annis_bench_queries.rs b/cli/src/bin/annis_bench_queries.rs index 49141d9be..d717402c2 100644 --- a/cli/src/bin/annis_bench_queries.rs +++ b/cli/src/bin/annis_bench_queries.rs @@ -5,16 +5,16 @@ extern crate graphannis; use clap::*; use criterion::BenchmarkGroup; -use criterion::{measurement::Measurement, Criterion}; +use criterion::{Criterion, measurement::Measurement}; use std::collections::BTreeSet; use std::path::{Path, PathBuf}; use std::time::Duration; use std::sync::Arc; +use graphannis::CorpusStorage; use graphannis::corpusstorage::{QueryLanguage, SearchQuery}; use graphannis::util::{self, SearchDef}; -use graphannis::CorpusStorage; pub fn create_query_input( data_dir: &Path, diff --git a/core/Cargo.toml b/core/Cargo.toml index d14d51cc3..bde057458 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Thomas Krause "] description = "This crate supports graph representation and generic query-functionality." -edition = "2021" +edition = "2024" license = "Apache-2.0" name = "graphannis-core" readme = "crate-info.md" @@ -22,16 +22,16 @@ normpath = "1.1.1" num-traits = "0.2" percent-encoding = "2.1" quick-xml = "0.28" -rand = {version = "0.8", features = ["small_rng"]} -rayon = {version = "1.3", default-features = false} +rand = { version = "0.9", features = ["small_rng"] } +rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" rustc-hash = "1.0" -serde = {version = "1.0", features = ["rc"]} +serde = { version = "1.0", features = ["rc"] } serde_bytes = "0.11" serde_derive = "1.0" smallvec = "1.6" -smartstring = {version = "1", features = ["serde"]} +smartstring = { version = "1", features = ["serde"] } sstable = "0.11" strum = "0.21" strum_macros = "0.21" @@ -41,11 +41,11 @@ toml = "0.8" transient-btree-index = "0.5" [target.'cfg(windows)'.dependencies] -winapi = {version = "0.3", features = ["heapapi"]} +winapi = { version = "0.3", features = ["heapapi"] } [dev-dependencies] env_logger = "0.9" fake = "2.2" -insta = {version = "1.38.0", features = ["json"]} +insta = { version = "1.38.0", features = ["json"] } pretty_assertions = "1.3" serde_json = "1.0" diff --git a/core/src/annostorage/inmemory.rs b/core/src/annostorage/inmemory.rs index d59b70e78..6631c1dbe 100644 --- a/core/src/annostorage/inmemory.rs +++ b/core/src/annostorage/inmemory.rs @@ -8,7 +8,6 @@ use crate::{annostorage::symboltable::SymbolTable, errors::GraphAnnisCoreError}; use core::ops::Bound::*; use itertools::Itertools; use rand::seq::IteratorRandom; -use rand::thread_rng; use rustc_hash::FxHashSet; use smartstring::alias::String; use smartstring::{LazyCompact, SmartString}; @@ -225,7 +224,7 @@ where } }; - if let Some(ref existing_anno) = existing_anno { + if let Some(existing_anno) = &existing_anno { // remove the relation from the original annotation to this item self.remove_element_from_by_anno(existing_anno, &item); } @@ -267,47 +266,47 @@ where let mut result = None; let orig_key = key; - if let Some(key) = self.anno_keys.get_symbol(key) { - if let Some(mut all_annos) = self.by_container.remove(item) { - // find the specific annotation key from the sorted vector of all annotations of this item - let anno_idx = all_annos.binary_search_by_key(&key, |a| a.key); + if let Some(key) = self.anno_keys.get_symbol(key) + && let Some(mut all_annos) = self.by_container.remove(item) + { + // find the specific annotation key from the sorted vector of all annotations of this item + let anno_idx = all_annos.binary_search_by_key(&key, |a| a.key); - if let Ok(anno_idx) = anno_idx { - // since value was found, also remove the item from the other containers - self.remove_element_from_by_anno(&all_annos[anno_idx], item); + if let Ok(anno_idx) = anno_idx { + // since value was found, also remove the item from the other containers + self.remove_element_from_by_anno(&all_annos[anno_idx], item); - let old_value = all_annos[anno_idx].val; + let old_value = all_annos[anno_idx].val; - // remove the specific annotation key from the entry - all_annos.remove(anno_idx); + // remove the specific annotation key from the entry + all_annos.remove(anno_idx); - // decrease the annotation count for this key - let new_key_count: usize = - if let Some(num_of_keys) = self.anno_key_sizes.get_mut(orig_key) { - *num_of_keys -= 1; - *num_of_keys - } else { - 0 - }; - // if annotation count dropped to zero remove the key - if new_key_count == 0 { - self.by_anno.remove(&key); - self.anno_key_sizes.remove(orig_key); - self.anno_keys.remove(key); - } + // decrease the annotation count for this key + let new_key_count: usize = + if let Some(num_of_keys) = self.anno_key_sizes.get_mut(orig_key) { + *num_of_keys -= 1; + *num_of_keys + } else { + 0 + }; + // if annotation count dropped to zero remove the key + if new_key_count == 0 { + self.by_anno.remove(&key); + self.anno_key_sizes.remove(orig_key); + self.anno_keys.remove(key); + } - result = self - .anno_values - .get_value_ref(old_value) - .map(|v| Cow::Owned(v.clone().into())); + result = self + .anno_values + .get_value_ref(old_value) + .map(|v| Cow::Owned(v.clone().into())); - self.check_and_remove_value_symbol(old_value); - self.total_number_of_annos -= 1; - } - // if there are more annotations for this item, re-insert them - if !all_annos.is_empty() { - self.by_container.insert(item.clone(), all_annos); - } + self.check_and_remove_value_symbol(old_value); + self.total_number_of_annos -= 1; + } + // if there are more annotations for this item, re-insert them + if !all_annos.is_empty() { + self.by_container.insert(item.clone(), all_annos); } } @@ -398,25 +397,23 @@ where (self.anno_keys.get_symbol(key), self.by_container.get(item)) { let idx = all_annos.binary_search_by_key(&key_symbol, |a| a.key); - if let Ok(idx) = idx { - if let Some(val) = self.anno_values.get_value_ref(all_annos[idx].val) { - return Ok(Some(Cow::Borrowed(val))); - } + if let Ok(idx) = idx + && let Some(val) = self.anno_values.get_value_ref(all_annos[idx].val) + { + return Ok(Some(Cow::Borrowed(val))); } } Ok(None) } fn has_value_for_item(&self, item: &T, key: &AnnoKey) -> Result { - if let Some(key_symbol) = self.anno_keys.get_symbol(key) { - if let Some(all_annos) = self.by_container.get(item) { - if all_annos - .binary_search_by_key(&key_symbol, |a| a.key) - .is_ok() - { - return Ok(true); - } - } + if let Some(key_symbol) = self.anno_keys.get_symbol(key) + && let Some(all_annos) = self.by_container.get(item) + && all_annos + .binary_search_by_key(&key_symbol, |a| a.key) + .is_ok() + { + return Ok(true); } Ok(false) } @@ -442,13 +439,12 @@ where if let Some(key_symbol) = self.anno_keys.get_symbol(&key) { for item in it { let item = item?; - if let Some(all_annos) = self.by_container.get(&item) { - if all_annos + if let Some(all_annos) = self.by_container.get(&item) + && all_annos .binary_search_by_key(&key_symbol, |a| a.key) .is_ok() - { - matches.push((item, key.clone()).into()); - } + { + matches.push((item, key.clone()).into()); } } } @@ -468,13 +464,12 @@ where for item in it { let item = item?; for (key_symbol, key) in matching_key_symbols.iter() { - if let Some(all_annos) = self.by_container.get(&item) { - if all_annos + if let Some(all_annos) = self.by_container.get(&item) + && all_annos .binary_search_by_key(&key_symbol, |a| &a.key) .is_ok() - { - matches.push((item.clone(), key.clone()).into()); - } + { + matches.push((item.clone(), key.clone()).into()); } } } @@ -591,10 +586,10 @@ where Ok((item, anno_key, value)) }) .filter_map_ok(move |(item, anno_key, item_value)| { - if let Some(item_value) = item_value { - if item_value != value { - return Some((item, anno_key).into()); - } + if let Some(item_value) = item_value + && item_value != value + { + return Some((item, anno_key).into()); } None }); @@ -657,15 +652,13 @@ where ns: ns.into(), name: name.into(), }; - if let Some(key_symbol) = self.anno_keys.get_symbol(&key) { - if let Some(all_annos) = self.by_container.get(item) { - if all_annos - .binary_search_by_key(&key_symbol, |a| a.key) - .is_ok() - { - return Ok(vec![Arc::from(key)]); - } - } + if let Some(key_symbol) = self.anno_keys.get_symbol(&key) + && let Some(all_annos) = self.by_container.get(item) + && all_annos + .binary_search_by_key(&key_symbol, |a| a.key) + .is_ok() + { + return Ok(vec![Arc::from(key)]); } Ok(vec![]) @@ -723,22 +716,22 @@ where if let Some(anno_size) = self.anno_key_sizes.get(&anno_key) { universe_size += *anno_size; - if let Some(anno_key) = self.anno_keys.get_symbol(&anno_key) { - if let Some(histo) = self.histogram_bounds.get(&anno_key) { - // find the range in which the value is contained - // we need to make sure the histogram is not empty -> should have at least two bounds - if histo.len() >= 2 { - sum_histogram_buckets += histo.len() - 1; - - for i in 0..histo.len() - 1 { - let bucket_begin = &histo[i]; - let bucket_end = &histo[i + 1]; - // check if the range overlaps with the search range - if bucket_begin.as_str() <= upper_val - && lower_val <= bucket_end.as_str() - { - count_matches += 1; - } + if let Some(anno_key) = self.anno_keys.get_symbol(&anno_key) + && let Some(histo) = self.histogram_bounds.get(&anno_key) + { + // find the range in which the value is contained + // we need to make sure the histogram is not empty -> should have at least two bounds + if histo.len() >= 2 { + sum_histogram_buckets += histo.len() - 1; + + for i in 0..histo.len() - 1 { + let bucket_begin = &histo[i]; + let bucket_end = &histo[i + 1]; + // check if the range overlaps with the search range + if bucket_begin.as_str() <= upper_val + && lower_val <= bucket_end.as_str() + { + count_matches += 1; } } } @@ -783,7 +776,7 @@ where // For regular expressions without a prefix the worst case would be `.*[X].*` where `[X]` are the most common characters. // Sample values from the histogram to get a better estimation of how many percent of the actual values could match. if let Ok(pattern) = regex::Regex::new(&full_match_pattern) { - let mut rng = thread_rng(); + let mut rng = rand::rng(); let qualified_keys: Vec<_> = match ns { Some(ns) => vec![AnnoKey { name: name.into(), @@ -801,26 +794,25 @@ where .copied() .unwrap_or_default(); - if let Some(histo) = self.histogram_bounds.get(&anno_key_symbol) { - if !histo.is_empty() { - let sampled_values = histo.iter().choose_multiple(&mut rng, 20); - let matches = sampled_values - .iter() - .filter(|v| pattern.is_match(v)) - .count(); - if sampled_values.len() == matches { - // Assume all values match - guessed_count += anno_size; - } else if matches == 0 { - // No match found, but use the bucket size as pessimistic guess - guessed_count += - (anno_size as f64 / sampled_values.len() as f64) as usize; - } else { - // Use the percent of matched values to guess the overall number - let match_ratio = - (matches as f64) / (sampled_values.len() as f64); - guessed_count += ((anno_size as f64) * match_ratio) as usize; - } + if let Some(histo) = self.histogram_bounds.get(&anno_key_symbol) + && !histo.is_empty() + { + let sampled_values = histo.iter().choose_multiple(&mut rng, 20); + let matches = sampled_values + .iter() + .filter(|v| pattern.is_match(v)) + .count(); + if sampled_values.len() == matches { + // Assume all values match + guessed_count += anno_size; + } else if matches == 0 { + // No match found, but use the bucket size as pessimistic guess + guessed_count += + (anno_size as f64 / sampled_values.len() as f64) as usize; + } else { + // Use the percent of matched values to guess the overall number + let match_ratio = (matches as f64) / (sampled_values.len() as f64); + guessed_count += ((anno_size as f64) * match_ratio) as usize; } } } @@ -851,12 +843,12 @@ where // guess for each fully qualified annotation key for anno_key in qualified_keys { - if let Some(anno_key) = self.anno_keys.get_symbol(&anno_key) { - if let Some(histo) = self.histogram_bounds.get(&anno_key) { - for v in histo.iter() { - let count: &mut usize = sampled_values.entry(v).or_insert(0); - *count += 1; - } + if let Some(anno_key) = self.anno_keys.get_symbol(&anno_key) + && let Some(histo) = self.histogram_bounds.get(&anno_key) + { + for v in histo.iter() { + let count: &mut usize = sampled_values.entry(v).or_insert(0); + *count += 1; } } } @@ -881,28 +873,28 @@ where key: &AnnoKey, most_frequent_first: bool, ) -> Result>> { - if let Some(key) = self.anno_keys.get_symbol(key) { - if let Some(values_for_key) = self.by_anno.get(&key) { - if most_frequent_first { - let result = values_for_key - .iter() - .filter_map(|(val, items)| { - let val = self.anno_values.get_value_ref(*val)?; - Some((items.len(), val)) - }) - .sorted() - .rev() - .map(|(_, val)| Cow::Borrowed(&val[..])) - .collect(); - return Ok(result); - } else { - let result = values_for_key - .iter() - .filter_map(|(val, _items)| self.anno_values.get_value_ref(*val)) - .map(|val| Cow::Borrowed(&val[..])) - .collect(); - return Ok(result); - } + if let Some(key) = self.anno_keys.get_symbol(key) + && let Some(values_for_key) = self.by_anno.get(&key) + { + if most_frequent_first { + let result = values_for_key + .iter() + .filter_map(|(val, items)| { + let val = self.anno_values.get_value_ref(*val)?; + Some((items.len(), val)) + }) + .sorted() + .rev() + .map(|(_, val)| Cow::Borrowed(&val[..])) + .collect(); + return Ok(result); + } else { + let result = values_for_key + .iter() + .filter_map(|(val, _items)| self.anno_values.get_value_ref(*val)) + .map(|val| Cow::Borrowed(&val[..])) + .collect(); + return Ok(result); } } Ok(vec![]) @@ -926,7 +918,7 @@ where for anno_key in self.anno_key_sizes.keys() { if let Some(anno_key) = self.anno_keys.get_symbol(anno_key) { // sample a maximal number of annotation values - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); if let Some(values_for_key) = self.by_anno.get(&anno_key) { let sampled_anno_values: Vec = values_for_key .iter() @@ -1023,12 +1015,10 @@ impl NodeAnnotationStorage for AnnoStorageImpl { self.anno_keys.get_symbol(&NODE_NAME_KEY), self.anno_values .get_symbol(&SmartString::::from(node_name)), - ) { - if let Some(items_with_anno) = self.by_anno.get(&anno_name_symbol) { - if let Some(items) = items_with_anno.get(&value_symbol) { - return Ok(items.iter().copied().next()); - } - } + ) && let Some(items_with_anno) = self.by_anno.get(&anno_name_symbol) + && let Some(items) = items_with_anno.get(&value_symbol) + { + return Ok(items.iter().copied().next()); } Ok(None) @@ -1039,12 +1029,10 @@ impl NodeAnnotationStorage for AnnoStorageImpl { self.anno_keys.get_symbol(&NODE_NAME_KEY), self.anno_values .get_symbol(&SmartString::::from(node_name)), - ) { - if let Some(items_with_anno) = self.by_anno.get(&anno_name_symbol) { - if let Some(items) = items_with_anno.get(&value_symbol) { - return Ok(!items.is_empty()); - } - } + ) && let Some(items_with_anno) = self.by_anno.get(&anno_name_symbol) + && let Some(items) = items_with_anno.get(&value_symbol) + { + return Ok(!items.is_empty()); } Ok(false) diff --git a/core/src/annostorage/mod.rs b/core/src/annostorage/mod.rs index 911ada4e5..ac6ed0ec9 100644 --- a/core/src/annostorage/mod.rs +++ b/core/src/annostorage/mod.rs @@ -159,10 +159,10 @@ impl ValueSearch { #[inline] pub fn as_ref(&self) -> ValueSearch<&T> { - match *self { + match self { ValueSearch::Any => ValueSearch::Any, - ValueSearch::Some(ref v) => ValueSearch::Some(v), - ValueSearch::NotSome(ref v) => ValueSearch::NotSome(v), + ValueSearch::Some(v) => ValueSearch::Some(v), + ValueSearch::NotSome(v) => ValueSearch::NotSome(v), } } } @@ -300,7 +300,7 @@ where /// Return a list of all existing values for a given annotation `key`. /// If the `most_frequent_first` parameter is true, the results are sorted by their frequency. fn get_all_values(&self, key: &AnnoKey, most_frequent_first: bool) - -> Result>>; + -> Result>>; /// Get all the annotation keys which are part of this annotation storage fn annotation_keys(&self) -> Result>; diff --git a/core/src/annostorage/ondisk.rs b/core/src/annostorage/ondisk.rs index 4725c9079..b00d6025b 100644 --- a/core/src/annostorage/ondisk.rs +++ b/core/src/annostorage/ondisk.rs @@ -1,5 +1,5 @@ -use crate::annostorage::symboltable::SymbolTable; use crate::annostorage::AnnotationStorage; +use crate::annostorage::symboltable::SymbolTable; use crate::annostorage::{Match, ValueSearch}; use crate::errors::Result; use crate::graph::NODE_NAME_KEY; @@ -10,9 +10,8 @@ use crate::{try_as_boxed_iter, util}; use core::ops::Bound::*; use itertools::Itertools; use rand::seq::IteratorRandom; -use rand::thread_rng; -use regex_syntax::hir::literal::Seq; use regex_syntax::Parser; +use regex_syntax::hir::literal::Seq; use serde_bytes::ByteBuf; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap, HashSet}; @@ -707,10 +706,10 @@ where Err(e) => Err(e), }) .filter_map_ok(move |(item, anno_key, item_value)| { - if let Some(item_value) = item_value { - if item_value != value { - return Some((item, anno_key).into()); - } + if let Some(item_value) = item_value + && item_value != value + { + return Some((item, anno_key).into()); } None }); @@ -927,7 +926,7 @@ where // For regular expressions without a prefix the worst case would be `.*[X].*` where `[X]` are the most common characters. // Sample values from the histogram to get a better estimation of how many percent of the actual values could match. if let Ok(pattern) = regex::Regex::new(&full_match_pattern) { - let mut rng = thread_rng(); + let mut rng = rand::rng(); let qualified_keys = match ns { Some(ns) => vec![AnnoKey { name: name.into(), @@ -941,27 +940,26 @@ where .get(&anno_key) .copied() .unwrap_or_default(); - if let Some(histo) = self.histogram_bounds.get(&anno_key) { - if !histo.is_empty() { - let sampled_values = histo.iter().choose_multiple(&mut rng, 20); - - let matches = sampled_values - .iter() - .filter(|v| pattern.is_match(v)) - .count(); - if sampled_values.len() == matches { - // Assume all values match - guessed_count += anno_size; - } else if matches == 0 { - // No match found, but use the bucket size as pessimistic guess - guessed_count += - (anno_size as f64 / sampled_values.len() as f64) as usize; - } else { - // Use the percent of matched values to guess the overall number - let match_ratio = - (matches as f64) / (sampled_values.len() as f64); - guessed_count += ((anno_size as f64) * match_ratio) as usize; - } + if let Some(histo) = self.histogram_bounds.get(&anno_key) + && !histo.is_empty() + { + let sampled_values = histo.iter().choose_multiple(&mut rng, 20); + + let matches = sampled_values + .iter() + .filter(|v| pattern.is_match(v)) + .count(); + if sampled_values.len() == matches { + // Assume all values match + guessed_count += anno_size; + } else if matches == 0 { + // No match found, but use the bucket size as pessimistic guess + guessed_count += + (anno_size as f64 / sampled_values.len() as f64) as usize; + } else { + // Use the percent of matched values to guess the overall number + let match_ratio = (matches as f64) / (sampled_values.len() as f64); + guessed_count += ((anno_size as f64) * match_ratio) as usize; } } } @@ -1069,7 +1067,7 @@ where // collect statistics for each annotation key separately for anno_key in self.anno_key_sizes.keys() { // sample a maximal number of annotation values - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); let all_values_for_key = self.get_by_anno_qname_range(anno_key); diff --git a/core/src/annostorage/symboltable.rs b/core/src/annostorage/symboltable.rs index 38d9b2862..0d778e6bf 100644 --- a/core/src/annostorage/symboltable.rs +++ b/core/src/annostorage/symboltable.rs @@ -83,19 +83,19 @@ where } pub fn get_value(&self, id: usize) -> Option> { - if id < self.by_id.len() { - if let Some(ref val) = self.by_id[id] { - return Some(val.clone()); - } + if id < self.by_id.len() + && let Some(ref val) = self.by_id[id] + { + return Some(val.clone()); } None } pub fn get_value_ref(&self, id: usize) -> Option<&T> { - if id < self.by_id.len() { - if let Some(ref val) = self.by_id[id] { - return Some(val.as_ref()); - } + if id < self.by_id.len() + && let Some(ref val) = self.by_id[id] + { + return Some(val.as_ref()); } None } diff --git a/core/src/dfs.rs b/core/src/dfs.rs index b849a60b2..211720b15 100644 --- a/core/src/dfs.rs +++ b/core/src/dfs.rs @@ -113,8 +113,7 @@ impl<'a> CycleSafeDFS<'a> { } trace!( "enter_node finished with result {} for node {}", - found, - node + found, node ); Ok(found) } diff --git a/core/src/graph/mod.rs b/core/src/graph/mod.rs index 80d167fd8..f38a57b33 100644 --- a/core/src/graph/mod.rs +++ b/core/src/graph/mod.rs @@ -5,7 +5,7 @@ pub mod update; use crate::{ annostorage::{AnnotationStorage, NodeAnnotationStorage, ValueSearch}, errors::Result, - graph::storage::{registry, GraphStorage, WriteableGraphStorage}, + graph::storage::{GraphStorage, WriteableGraphStorage, registry}, }; use crate::{ errors::GraphAnnisCoreError, @@ -97,7 +97,7 @@ fn component_path( c: &Component, ) -> Option { match location { - Some(ref loc) => { + Some(loc) => { let mut p = PathBuf::from(loc); // Check if we need to load the component from the backup folder let backup = loc.join("backup"); @@ -520,12 +520,12 @@ impl Graph { &mut node_id_cache, )?; // only add edge if both nodes already exist - if let (Some(source), Some(target)) = (source, target) { - if let Ok(ctype) = CT::from_str(component_type) { - let c = Component::new(ctype, layer.into(), component_name.into()); - let gs = self.get_or_create_writable(&c)?; - gs.add_edge(Edge { source, target })?; - } + if let (Some(source), Some(target)) = (source, target) + && let Ok(ctype) = CT::from_str(component_type) + { + let c = Component::new(ctype, layer.into(), component_name.into()); + let gs = self.get_or_create_writable(&c)?; + gs.add_edge(Edge { source, target })?; } } UpdateEvent::DeleteEdge { @@ -543,13 +543,13 @@ impl Graph { Cow::Borrowed(target_node), &mut node_id_cache, )?; - if let (Some(source), Some(target)) = (source, target) { - if let Ok(ctype) = CT::from_str(component_type) { - let c = Component::new(ctype, layer.into(), component_name.into()); + if let (Some(source), Some(target)) = (source, target) + && let Ok(ctype) = CT::from_str(component_type) + { + let c = Component::new(ctype, layer.into(), component_name.into()); - let gs = self.get_or_create_writable(&c)?; - gs.delete_edge(&Edge { source, target })?; - } + let gs = self.get_or_create_writable(&c)?; + gs.delete_edge(&Edge { source, target })?; } } UpdateEvent::AddEdgeLabel { @@ -570,22 +570,22 @@ impl Graph { Cow::Borrowed(target_node), &mut node_id_cache, )?; - if let (Some(source), Some(target)) = (source, target) { - if let Ok(ctype) = CT::from_str(component_type) { - let c = Component::new(ctype, layer.into(), component_name.into()); - let gs = self.get_or_create_writable(&c)?; - // only add label if the edge already exists - let e = Edge { source, target }; - if gs.is_connected(source, target, 1, Included(1))? { - let anno = Annotation { - key: AnnoKey { - ns: anno_ns.into(), - name: anno_name.into(), - }, - val: anno_value.into(), - }; - gs.add_edge_annotation(e, anno)?; - } + if let (Some(source), Some(target)) = (source, target) + && let Ok(ctype) = CT::from_str(component_type) + { + let c = Component::new(ctype, layer.into(), component_name.into()); + let gs = self.get_or_create_writable(&c)?; + // only add label if the edge already exists + let e = Edge { source, target }; + if gs.is_connected(source, target, 1, Included(1))? { + let anno = Annotation { + key: AnnoKey { + ns: anno_ns.into(), + name: anno_name.into(), + }, + val: anno_value.into(), + }; + gs.add_edge_annotation(e, anno)?; } } } @@ -606,19 +606,19 @@ impl Graph { Cow::Borrowed(target_node), &mut node_id_cache, )?; - if let (Some(source), Some(target)) = (source, target) { - if let Ok(ctype) = CT::from_str(component_type) { - let c = Component::new(ctype, layer.into(), component_name.into()); - let gs = self.get_or_create_writable(&c)?; - // only add label if the edge already exists - let e = Edge { source, target }; - if gs.is_connected(source, target, 1, Included(1))? { - let key = AnnoKey { - ns: anno_ns.into(), - name: anno_name.into(), - }; - gs.delete_edge_annotation(&e, &key)?; - } + if let (Some(source), Some(target)) = (source, target) + && let Ok(ctype) = CT::from_str(component_type) + { + let c = Component::new(ctype, layer.into(), component_name.into()); + let gs = self.get_or_create_writable(&c)?; + // only add label if the edge already exists + let e = Edge { source, target }; + if gs.is_connected(source, target, 1, Included(1))? { + let key = AnnoKey { + ns: anno_ns.into(), + name: anno_name.into(), + }; + gs.delete_edge_annotation(&e, &key)?; } } } @@ -889,10 +889,10 @@ impl Graph { /// Returns `true` if the graph storage for this specific component is loaded and ready to use. pub fn is_loaded(&self, c: &Component) -> bool { let entry: Option<&Option>> = self.components.get(c); - if let Some(gs_opt) = entry { - if gs_opt.is_some() { - return true; - } + if let Some(gs_opt) = entry + && gs_opt.is_some() + { + return true; } false } @@ -1022,31 +1022,31 @@ impl Graph { } pub fn optimize_gs_impl(&mut self, c: &Component) -> Result<()> { - if let Some(gs) = self.get_graphstorage(c) { - if let Some(stats) = gs.get_statistics() { - let opt_info = registry::get_optimal_impl_heuristic(self, stats); - - // convert if necessary - if opt_info.id != gs.serialization_id() { - let mut new_gs = registry::create_from_info(&opt_info)?; - let converted = if let Some(new_gs_mut) = Arc::get_mut(&mut new_gs) { - info!( - "converting component {} to implementation {}", - c, opt_info.id, - ); - new_gs_mut.copy(self.get_node_annos(), gs.as_ref())?; - true - } else { - false - }; - if converted { - // insert into components map - info!( - "finished conversion of component {} to implementation {}", - c, opt_info.id, - ); - self.components.insert(c.clone(), Some(new_gs.clone())); - } + if let Some(gs) = self.get_graphstorage(c) + && let Some(stats) = gs.get_statistics() + { + let opt_info = registry::get_optimal_impl_heuristic(self, stats); + + // convert if necessary + if opt_info.id != gs.serialization_id() { + let mut new_gs = registry::create_from_info(&opt_info)?; + let converted = if let Some(new_gs_mut) = Arc::get_mut(&mut new_gs) { + info!( + "converting component {} to implementation {}", + c, opt_info.id, + ); + new_gs_mut.copy(self.get_node_annos(), gs.as_ref())?; + true + } else { + false + }; + if converted { + // insert into components map + info!( + "finished conversion of component {} to implementation {}", + c, opt_info.id, + ); + self.components.insert(c.clone(), Some(new_gs.clone())); } } } @@ -1119,15 +1119,15 @@ impl Graph { .components .keys() .filter(move |c| { - if let Some(ctype) = ctype.clone() { - if ctype != c.get_type() { - return false; - } + if let Some(ctype) = ctype.clone() + && ctype != c.get_type() + { + return false; } - if let Some(name) = name { - if name != c.name { - return false; - } + if let Some(name) = name + && name != c.name + { + return false; } true }) diff --git a/core/src/graph/serialization/graphml.rs b/core/src/graph/serialization/graphml.rs index 420f268cf..e225dadea 100644 --- a/core/src/graph/serialization/graphml.rs +++ b/core/src/graph/serialization/graphml.rs @@ -2,18 +2,18 @@ use crate::{ annostorage::{Match, ValueSearch}, errors::{GraphAnnisCoreError, Result}, graph::{ + ANNIS_NS, Graph, NODE_NAME, NODE_NAME_KEY, NODE_TYPE, NODE_TYPE_KEY, update::{GraphUpdate, UpdateEvent}, - Graph, ANNIS_NS, NODE_NAME, NODE_NAME_KEY, NODE_TYPE, NODE_TYPE_KEY, }, types::{AnnoKey, Annotation, Component, ComponentType, Edge}, util::{join_qname, split_qname}, }; use itertools::Itertools; use quick_xml::{ + Reader, Writer, events::{ - attributes::Attributes, BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event, + BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event, attributes::Attributes, }, - Reader, Writer, }; use std::{ cmp::Ordering, @@ -78,26 +78,26 @@ fn write_annotation_keys( all_components.sort_unstable(); } for c in all_components { - if !autogenerated_components.contains(&c) { - if let Some(gs) = graph.get_graphstorage(&c) { - for key in gs.get_anno_storage().annotation_keys()? { - #[allow(clippy::map_entry)] - if !key_id_mapping.contains_key(&key) { - let new_id = format!("k{}", id_counter); - id_counter += 1; + if !autogenerated_components.contains(&c) + && let Some(gs) = graph.get_graphstorage(&c) + { + for key in gs.get_anno_storage().annotation_keys()? { + #[allow(clippy::map_entry)] + if !key_id_mapping.contains_key(&key) { + let new_id = format!("k{}", id_counter); + id_counter += 1; - let qname = join_qname(&key.ns, &key.name); + let qname = join_qname(&key.ns, &key.name); - let mut key_start = BytesStart::new("key"); - key_start.push_attribute(("id", new_id.as_str())); - key_start.push_attribute(("for", "node")); - key_start.push_attribute(("attr.name", qname.as_str())); - key_start.push_attribute(("attr.type", "string")); + let mut key_start = BytesStart::new("key"); + key_start.push_attribute(("id", new_id.as_str())); + key_start.push_attribute(("for", "node")); + key_start.push_attribute(("attr.name", qname.as_str())); + key_start.push_attribute(("attr.type", "string")); - writer.write_event(Event::Empty(key_start))?; + writer.write_event(Event::Empty(key_start))?; - key_id_mapping.insert(key, new_id); - } + key_id_mapping.insert(key, new_id); } } } @@ -211,63 +211,63 @@ fn write_edges( for c in all_components { // Create edge annotation keys for all components, but skip auto-generated ones - if !autogenerated_components.contains(&c) { - if let Some(gs) = graph.get_graphstorage(&c) { - let source_nodes_iterator = if sorted { - Box::new(gs.source_nodes().sorted_unstable_by(compare_results)) - } else { - gs.source_nodes() - }; - for source in source_nodes_iterator { - let source = source?; - if let Some(source_id) = graph - .get_node_annos() - .get_value_for_item(&source, &NODE_NAME_KEY)? - { - let target_nodes_iterator = if sorted { - Box::new( - gs.get_outgoing_edges(source) - .sorted_unstable_by(compare_results), - ) - } else { + if !autogenerated_components.contains(&c) + && let Some(gs) = graph.get_graphstorage(&c) + { + let source_nodes_iterator = if sorted { + Box::new(gs.source_nodes().sorted_unstable_by(compare_results)) + } else { + gs.source_nodes() + }; + for source in source_nodes_iterator { + let source = source?; + if let Some(source_id) = graph + .get_node_annos() + .get_value_for_item(&source, &NODE_NAME_KEY)? + { + let target_nodes_iterator = if sorted { + Box::new( gs.get_outgoing_edges(source) - }; - for target in target_nodes_iterator { - let target = target?; - if let Some(target_id) = graph - .get_node_annos() - .get_value_for_item(&target, &NODE_NAME_KEY)? - { - let edge = Edge { source, target }; - - let mut edge_id = edge_counter.to_string(); - edge_counter += 1; - edge_id.insert(0, 'e'); - - let mut edge_start = BytesStart::new("edge"); - edge_start.push_attribute(("id", edge_id.as_str())); - edge_start.push_attribute(("source", source_id.as_ref())); - edge_start.push_attribute(("target", target_id.as_ref())); - // Use the "label" attribute as component type. This is consistent with how Neo4j interprets this non-standard attribute - edge_start.push_attribute(("label", c.to_string().as_ref())); - - writer.write_event(Event::Start(edge_start))?; - - // Write all annotations of the node as "data" element, but sort - // them using the internal annotation key (k0, k1, k2, etc.) - let mut edge_annotations = - gs.get_anno_storage().get_annotations_for_item(&edge)?; - edge_annotations.sort_unstable_by_key(|anno| { - key_id_mapping - .get(&anno.key) - .map(|internal_key| internal_key.as_str()) - .unwrap_or("") - }); - for anno in edge_annotations { - write_data(anno, writer, key_id_mapping)?; - } - writer.write_event(Event::End(BytesEnd::new("edge")))?; + .sorted_unstable_by(compare_results), + ) + } else { + gs.get_outgoing_edges(source) + }; + for target in target_nodes_iterator { + let target = target?; + if let Some(target_id) = graph + .get_node_annos() + .get_value_for_item(&target, &NODE_NAME_KEY)? + { + let edge = Edge { source, target }; + + let mut edge_id = edge_counter.to_string(); + edge_counter += 1; + edge_id.insert(0, 'e'); + + let mut edge_start = BytesStart::new("edge"); + edge_start.push_attribute(("id", edge_id.as_str())); + edge_start.push_attribute(("source", source_id.as_ref())); + edge_start.push_attribute(("target", target_id.as_ref())); + // Use the "label" attribute as component type. This is consistent with how Neo4j interprets this non-standard attribute + edge_start.push_attribute(("label", c.to_string().as_ref())); + + writer.write_event(Event::Start(edge_start))?; + + // Write all annotations of the node as "data" element, but sort + // them using the internal annotation key (k0, k1, k2, etc.) + let mut edge_annotations = + gs.get_anno_storage().get_annotations_for_item(&edge)?; + edge_annotations.sort_unstable_by_key(|anno| { + key_id_mapping + .get(&anno.key) + .map(|internal_key| internal_key.as_str()) + .unwrap_or("") + }); + for anno in edge_annotations { + write_data(anno, writer, key_id_mapping)?; } + writer.write_event(Event::End(BytesEnd::new("edge")))?; } } } @@ -595,11 +595,13 @@ fn read_graphml( } } Event::CData(t) => { - if let Some(current_data_key) = ¤t_data_key { - if in_graph && level == 3 && current_data_key == "k0" { - // This is the configuration content - config = Some(String::from_utf8_lossy(&t).to_string()); - } + if let Some(current_data_key) = ¤t_data_key + && in_graph + && level == 3 + && current_data_key == "k0" + { + // This is the configuration content + config = Some(String::from_utf8_lossy(&t).to_string()); } } Event::End(ref e) => { @@ -638,17 +640,17 @@ fn read_graphml( } } b"data" => { - if let Some(current_data_key) = current_data_key { - if let Some(anno_key) = keys.get(¤t_data_key) { - // Copy all data attributes into our own map - if let Some(v) = current_data_value.take() { - data.insert(anno_key.clone(), v); - } else { - // If there is an end tag without any text - // data event, the value exists but is - // empty. - data.insert(anno_key.clone(), String::default()); - } + if let Some(current_data_key) = current_data_key + && let Some(anno_key) = keys.get(¤t_data_key) + { + // Copy all data attributes into our own map + if let Some(v) = current_data_value.take() { + data.insert(anno_key.clone(), v); + } else { + // If there is an end tag without any text + // data event, the value exists but is + // empty. + data.insert(anno_key.clone(), String::default()); } } @@ -720,7 +722,7 @@ where mod tests { use super::*; use crate::{ - graph::{GraphUpdate, DEFAULT_NS}, + graph::{DEFAULT_NS, GraphUpdate}, types::DefaultComponentType, }; use pretty_assertions::assert_eq; diff --git a/core/src/graph/storage/adjacencylist.rs b/core/src/graph/storage/adjacencylist.rs index c70f19f2f..6b23a5529 100644 --- a/core/src/graph/storage/adjacencylist.rs +++ b/core/src/graph/storage/adjacencylist.rs @@ -1,6 +1,6 @@ use crate::{ annostorage::{ - inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, + AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl, }, dfs::CycleSafeDFS, errors::Result, @@ -8,10 +8,9 @@ use crate::{ }; use super::{ - deserialize_gs_field, + EdgeContainer, GraphStatistic, GraphStorage, WriteableGraphStorage, deserialize_gs_field, legacy::{self, AdjacencyListStorageV1}, - load_statistics_from_location, save_statistics_to_toml, serialize_gs_field, EdgeContainer, - GraphStatistic, GraphStorage, WriteableGraphStorage, + load_statistics_from_location, save_statistics_to_toml, serialize_gs_field, }; use itertools::Itertools; use rustc_hash::FxHashSet; @@ -294,25 +293,25 @@ impl WriteableGraphStorage for AdjacencyListStorage { } fn add_edge_annotation(&mut self, edge: Edge, anno: Annotation) -> Result<()> { - if let Some(outgoing) = self.edges.get(&edge.source) { - if outgoing.contains(&edge.target) { - self.annos.insert(edge, anno)?; - } + if let Some(outgoing) = self.edges.get(&edge.source) + && outgoing.contains(&edge.target) + { + self.annos.insert(edge, anno)?; } Ok(()) } fn delete_edge(&mut self, edge: &Edge) -> Result<()> { - if let Some(outgoing) = self.edges.get_mut(&edge.source) { - if let Ok(idx) = outgoing.binary_search(&edge.target) { - outgoing.remove(idx); - } + if let Some(outgoing) = self.edges.get_mut(&edge.source) + && let Ok(idx) = outgoing.binary_search(&edge.target) + { + outgoing.remove(idx); } - if let Some(ingoing) = self.inverse_edges.get_mut(&edge.target) { - if let Ok(idx) = ingoing.binary_search(&edge.source) { - ingoing.remove(idx); - } + if let Some(ingoing) = self.inverse_edges.get_mut(&edge.target) + && let Ok(idx) = ingoing.binary_search(&edge.source) + { + ingoing.remove(idx); } self.annos.remove_item(edge)?; diff --git a/core/src/graph/storage/dense_adjacency.rs b/core/src/graph/storage/dense_adjacency.rs index 3d86425a6..66c6413b5 100644 --- a/core/src/graph/storage/dense_adjacency.rs +++ b/core/src/graph/storage/dense_adjacency.rs @@ -1,10 +1,11 @@ use super::{ - deserialize_gs_field, legacy::DenseAdjacencyListStorageV1, load_statistics_from_location, - save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, + EdgeContainer, GraphStatistic, GraphStorage, deserialize_gs_field, + legacy::DenseAdjacencyListStorageV1, load_statistics_from_location, save_statistics_to_toml, + serialize_gs_field, }; use crate::{ annostorage::{ - inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, + AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl, }, dfs::CycleSafeDFS, errors::Result, @@ -47,12 +48,11 @@ impl EdgeContainer for DenseAdjacencyListStorage { &'a self, node: NodeID, ) -> Box> + 'a> { - if let Some(node) = node.to_usize() { - if node < self.edges.len() { - if let Some(outgoing) = self.edges[node] { - return Box::new(std::iter::once(Ok(outgoing))); - } - } + if let Some(node) = node.to_usize() + && node < self.edges.len() + && let Some(outgoing) = self.edges[node] + { + return Box::new(std::iter::once(Ok(outgoing))); } Box::new(std::iter::empty()) } @@ -179,23 +179,23 @@ impl GraphStorage for DenseAdjacencyListStorage { for source in orig.source_nodes() { let source = source?; - if let Some(idx) = source.to_usize() { - if let Some(target) = orig.get_outgoing_edges(source).next() { - let target = target?; - // insert edge - self.edges[idx] = Some(target); + if let Some(idx) = source.to_usize() + && let Some(target) = orig.get_outgoing_edges(source).next() + { + let target = target?; + // insert edge + self.edges[idx] = Some(target); - // insert inverse edge - let e = Edge { source, target }; - let inverse_entry = self.inverse_edges.entry(e.target).or_default(); - // no need to insert it: edge already exists - if let Err(insertion_idx) = inverse_entry.binary_search(&e.source) { - inverse_entry.insert(insertion_idx, e.source); - } - // insert annotation - for a in orig.get_anno_storage().get_annotations_for_item(&e)? { - self.annos.insert(e.clone(), a)?; - } + // insert inverse edge + let e = Edge { source, target }; + let inverse_entry = self.inverse_edges.entry(e.target).or_default(); + // no need to insert it: edge already exists + if let Err(insertion_idx) = inverse_entry.binary_search(&e.source) { + inverse_entry.insert(insertion_idx, e.source); + } + // insert annotation + for a in orig.get_anno_storage().get_annotations_for_item(&e)? { + self.annos.insert(e.clone(), a)?; } } } diff --git a/core/src/graph/storage/disk_adjacency.rs b/core/src/graph/storage/disk_adjacency.rs index f38b9bdfb..d3e992409 100644 --- a/core/src/graph/storage/disk_adjacency.rs +++ b/core/src/graph/storage/disk_adjacency.rs @@ -1,10 +1,10 @@ use super::*; use crate::{ - annostorage::{ondisk::AnnoStorageImpl, NodeAnnotationStorage}, + annostorage::{NodeAnnotationStorage, ondisk::AnnoStorageImpl}, dfs::CycleSafeDFS, errors::Result, - util::disk_collections::{DiskMap, EvictionStrategy, DEFAULT_BLOCK_CACHE_CAPACITY}, + util::disk_collections::{DEFAULT_BLOCK_CACHE_CAPACITY, DiskMap, EvictionStrategy}, }; use itertools::Itertools; use rustc_hash::FxHashSet; diff --git a/core/src/graph/storage/disk_path.rs b/core/src/graph/storage/disk_path.rs index 9264bd616..a5a668c96 100644 --- a/core/src/graph/storage/disk_path.rs +++ b/core/src/graph/storage/disk_path.rs @@ -8,17 +8,17 @@ use tempfile::tempfile; use transient_btree_index::BtreeConfig; use crate::{ - annostorage::{ondisk::AnnoStorageImpl, AnnotationStorage}, + annostorage::{AnnotationStorage, ondisk::AnnoStorageImpl}, dfs::CycleSafeDFS, errors::Result, try_as_boxed_iter, types::{Edge, NodeID}, - util::disk_collections::{DiskMap, EvictionStrategy, DEFAULT_BLOCK_CACHE_CAPACITY}, + util::disk_collections::{DEFAULT_BLOCK_CACHE_CAPACITY, DiskMap, EvictionStrategy}, }; use super::{ - load_statistics_from_location, save_statistics_to_toml, EdgeContainer, GraphStatistic, - GraphStorage, + EdgeContainer, GraphStatistic, GraphStorage, load_statistics_from_location, + save_statistics_to_toml, }; use binary_layout::prelude::*; diff --git a/core/src/graph/storage/disk_path/tests.rs b/core/src/graph/storage/disk_path/tests.rs index e2418c3a3..e78717d67 100644 --- a/core/src/graph/storage/disk_path/tests.rs +++ b/core/src/graph/storage/disk_path/tests.rs @@ -1,6 +1,6 @@ use super::*; use crate::{ - graph::storage::{adjacencylist::AdjacencyListStorage, WriteableGraphStorage}, + graph::storage::{WriteableGraphStorage, adjacencylist::AdjacencyListStorage}, types::{AnnoKey, Annotation}, }; use pretty_assertions::assert_eq; diff --git a/core/src/graph/storage/legacy.rs b/core/src/graph/storage/legacy.rs index ad011bd48..2d39eaaf3 100644 --- a/core/src/graph/storage/legacy.rs +++ b/core/src/graph/storage/legacy.rs @@ -14,9 +14,9 @@ use crate::{ }; use super::{ + GraphStatistic, linear::RelativePosition, prepost::{OrderVecEntry, PrePost}, - GraphStatistic, }; /// Some general statistical numbers specific to a graph component diff --git a/core/src/graph/storage/linear.rs b/core/src/graph/storage/linear.rs index d3f037002..9fcd67363 100644 --- a/core/src/graph/storage/linear.rs +++ b/core/src/graph/storage/linear.rs @@ -1,10 +1,11 @@ use super::{ - deserialize_gs_field, legacy::LinearGraphStorageV1, load_statistics_from_location, - save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, + EdgeContainer, GraphStatistic, GraphStorage, deserialize_gs_field, + legacy::LinearGraphStorageV1, load_statistics_from_location, save_statistics_to_toml, + serialize_gs_field, }; use crate::{ annostorage::{ - inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, + AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl, }, dfs::CycleSafeDFS, errors::Result, @@ -93,10 +94,10 @@ where // find the next node in the chain if let Some(chain) = self.node_chains.get(&pos.root) { let next_pos = pos.pos.clone() + PosT::one(); - if let Some(next_pos) = next_pos.to_usize() { - if next_pos < chain.len() { - return Box::from(std::iter::once(Ok(chain[next_pos]))); - } + if let Some(next_pos) = next_pos.to_usize() + && next_pos < chain.len() + { + return Box::from(std::iter::once(Ok(chain[next_pos]))); } } } @@ -109,12 +110,11 @@ where ) -> Box> + 'a> { if let Some(pos) = self.node_to_pos.get(&node) { // find the previous node in the chain - if let Some(chain) = self.node_chains.get(&pos.root) { - if let Some(pos) = pos.pos.to_usize() { - if let Some(previous_pos) = pos.checked_sub(1) { - return Box::from(std::iter::once(Ok(chain[previous_pos]))); - } - } + if let Some(chain) = self.node_chains.get(&pos.root) + && let Some(pos) = pos.pos.to_usize() + && let Some(previous_pos) = pos.checked_sub(1) + { + return Box::from(std::iter::once(Ok(chain[previous_pos]))); } } Box::from(std::iter::empty()) @@ -206,30 +206,23 @@ where min_distance: usize, max_distance: std::ops::Bound, ) -> Box> + 'a> { - if let Some(start_pos) = self.node_to_pos.get(&source) { - if let Some(chain) = self.node_chains.get(&start_pos.root) { - if let Some(offset) = start_pos.pos.to_usize() { - if let Some(min_distance) = offset.checked_add(min_distance) { - if min_distance < chain.len() { - let max_distance = match max_distance { - std::ops::Bound::Unbounded => { - return Box::new(chain[min_distance..].iter().map(|n| Ok(*n))); - } - std::ops::Bound::Included(max_distance) => { - offset + max_distance + 1 - } - std::ops::Bound::Excluded(max_distance) => offset + max_distance, - }; - // clip to chain length - let max_distance = std::cmp::min(chain.len(), max_distance); - if min_distance < max_distance { - return Box::new( - chain[min_distance..max_distance].iter().map(|n| Ok(*n)), - ); - } - } - } + if let Some(start_pos) = self.node_to_pos.get(&source) + && let Some(chain) = self.node_chains.get(&start_pos.root) + && let Some(offset) = start_pos.pos.to_usize() + && let Some(min_distance) = offset.checked_add(min_distance) + && min_distance < chain.len() + { + let max_distance = match max_distance { + std::ops::Bound::Unbounded => { + return Box::new(chain[min_distance..].iter().map(|n| Ok(*n))); } + std::ops::Bound::Included(max_distance) => offset + max_distance + 1, + std::ops::Bound::Excluded(max_distance) => offset + max_distance, + }; + // clip to chain length + let max_distance = std::cmp::min(chain.len(), max_distance); + if min_distance < max_distance { + return Box::new(chain[min_distance..max_distance].iter().map(|n| Ok(*n))); } } Box::new(std::iter::empty()) @@ -241,32 +234,23 @@ where min_distance: usize, max_distance: std::ops::Bound, ) -> Box> + 'a> { - if let Some(start_pos) = self.node_to_pos.get(&source) { - if let Some(chain) = self.node_chains.get(&start_pos.root) { - if let Some(offset) = start_pos.pos.to_usize() { - let max_distance = match max_distance { - std::ops::Bound::Unbounded => 0, - std::ops::Bound::Included(max_distance) => { - offset.saturating_sub(max_distance) - } - std::ops::Bound::Excluded(max_distance) => { - offset.saturating_sub(max_distance + 1) - } - }; + if let Some(start_pos) = self.node_to_pos.get(&source) + && let Some(chain) = self.node_chains.get(&start_pos.root) + && let Some(offset) = start_pos.pos.to_usize() + { + let max_distance = match max_distance { + std::ops::Bound::Unbounded => 0, + std::ops::Bound::Included(max_distance) => offset.saturating_sub(max_distance), + std::ops::Bound::Excluded(max_distance) => offset.saturating_sub(max_distance + 1), + }; - if let Some(min_distance) = offset.checked_sub(min_distance) { - if min_distance < chain.len() && max_distance <= min_distance { - // return all entries in the chain between min_distance..max_distance (inclusive) - return Box::new( - chain[max_distance..=min_distance].iter().map(|n| Ok(*n)), - ); - } else if max_distance < chain.len() { - // return all entries in the chain between min_distance..max_distance - return Box::new( - chain[max_distance..chain.len()].iter().map(|n| Ok(*n)), - ); - } - } + if let Some(min_distance) = offset.checked_sub(min_distance) { + if min_distance < chain.len() && max_distance <= min_distance { + // return all entries in the chain between min_distance..max_distance (inclusive) + return Box::new(chain[max_distance..=min_distance].iter().map(|n| Ok(*n))); + } else if max_distance < chain.len() { + // return all entries in the chain between min_distance..max_distance + return Box::new(chain[max_distance..chain.len()].iter().map(|n| Ok(*n))); } } } @@ -280,12 +264,12 @@ where if let (Some(source_pos), Some(target_pos)) = (self.node_to_pos.get(&source), self.node_to_pos.get(&target)) + && source_pos.root == target_pos.root + && source_pos.pos <= target_pos.pos { - if source_pos.root == target_pos.root && source_pos.pos <= target_pos.pos { - let diff = target_pos.pos.clone() - source_pos.pos.clone(); - if let Some(diff) = diff.to_usize() { - return Ok(Some(diff)); - } + let diff = target_pos.pos.clone() - source_pos.pos.clone(); + if let Some(diff) = diff.to_usize() { + return Ok(Some(diff)); } } Ok(None) @@ -300,20 +284,20 @@ where ) -> Result { if let (Some(source_pos), Some(target_pos)) = (self.node_to_pos.get(&source), self.node_to_pos.get(&target)) + && source_pos.root == target_pos.root + && source_pos.pos <= target_pos.pos { - if source_pos.root == target_pos.root && source_pos.pos <= target_pos.pos { - let diff = target_pos.pos.clone() - source_pos.pos.clone(); - if let Some(diff) = diff.to_usize() { - match max_distance { - std::ops::Bound::Unbounded => { - return Ok(diff >= min_distance); - } - std::ops::Bound::Included(max_distance) => { - return Ok(diff >= min_distance && diff <= max_distance); - } - std::ops::Bound::Excluded(max_distance) => { - return Ok(diff >= min_distance && diff < max_distance); - } + let diff = target_pos.pos.clone() - source_pos.pos.clone(); + if let Some(diff) = diff.to_usize() { + match max_distance { + std::ops::Bound::Unbounded => { + return Ok(diff >= min_distance); + } + std::ops::Bound::Included(max_distance) => { + return Ok(diff >= min_distance && diff <= max_distance); + } + std::ops::Bound::Excluded(max_distance) => { + return Ok(diff >= min_distance && diff < max_distance); } } } diff --git a/core/src/graph/storage/mod.rs b/core/src/graph/storage/mod.rs index 52f0498fc..6d3e4f286 100644 --- a/core/src/graph/storage/mod.rs +++ b/core/src/graph/storage/mod.rs @@ -56,7 +56,13 @@ impl std::fmt::Display for GraphStatistic { write!( f, "nodes={}, root nodes={}, avg_fan_out={:.2}, max_fan_out={}, fan_out_99%={}, inv_fan_out_99%={}, max_depth={}", - self.nodes, self.root_nodes, self.avg_fan_out, self.max_fan_out, self.fan_out_99_percentile, self.inverse_fan_out_99_percentile, self.max_depth + self.nodes, + self.root_nodes, + self.avg_fan_out, + self.max_fan_out, + self.fan_out_99_percentile, + self.inverse_fan_out_99_percentile, + self.max_depth )?; if self.cyclic { write!(f, ", cyclic")?; diff --git a/core/src/graph/storage/prepost.rs b/core/src/graph/storage/prepost.rs index 6f3ee559a..275ed982c 100644 --- a/core/src/graph/storage/prepost.rs +++ b/core/src/graph/storage/prepost.rs @@ -1,10 +1,11 @@ use super::{ - deserialize_gs_field, legacy::PrePostOrderStorageV1, load_statistics_from_location, - save_statistics_to_toml, serialize_gs_field, EdgeContainer, GraphStatistic, GraphStorage, + EdgeContainer, GraphStatistic, GraphStorage, deserialize_gs_field, + legacy::PrePostOrderStorageV1, load_statistics_from_location, save_statistics_to_toml, + serialize_gs_field, }; use crate::{ annostorage::{ - inmemory::AnnoStorageImpl, AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, + AnnotationStorage, EdgeAnnotationStorage, NodeAnnotationStorage, inmemory::AnnoStorageImpl, }, dfs::CycleSafeDFS, errors::Result, @@ -262,11 +263,7 @@ where .map(move |order| (root_order.clone(), order)) }) .filter_map(move |(root, order)| match order { - OrderVecEntry::Pre { - ref post, - ref level, - ref node, - } => { + OrderVecEntry::Pre { post, level, node } => { if let (Some(current_level), Some(root_level)) = (level.to_usize(), root.level.to_usize()) { @@ -336,20 +333,16 @@ where .filter_map(move |(use_post, root, idx, order)| { let (current_pre, current_post, current_level, current_node) = if use_post { match order { - OrderVecEntry::Post { - ref pre, - ref level, - ref node, - } => (pre.to_usize(), Some(idx), level.to_usize(), Some(node)), + OrderVecEntry::Post { pre, level, node } => { + (pre.to_usize(), Some(idx), level.to_usize(), Some(node)) + } _ => (None, None, None, None), } } else { match order { - OrderVecEntry::Pre { - ref post, - ref level, - ref node, - } => (Some(idx), post.to_usize(), level.to_usize(), Some(node)), + OrderVecEntry::Pre { post, level, node } => { + (Some(idx), post.to_usize(), level.to_usize(), Some(node)) + } _ => (None, None, None, None), } }; @@ -416,11 +409,10 @@ where // check the level if let (Some(source_level), Some(target_level)) = (order_source.level.to_usize(), order_target.level.to_usize()) + && source_level <= target_level { - if source_level <= target_level { - was_found = true; - min_level = std::cmp::min(target_level - source_level, min_level); - } + was_found = true; + min_level = std::cmp::min(target_level - source_level, min_level); } } } @@ -458,11 +450,10 @@ where // check the level if let (Some(source_level), Some(target_level)) = (order_source.level.to_usize(), order_target.level.to_usize()) + && source_level <= target_level { - if source_level <= target_level { - let diff_level = target_level - source_level; - return Ok(min_distance <= diff_level && diff_level <= max_distance); - } + let diff_level = target_level - source_level; + return Ok(min_distance <= diff_level && diff_level <= max_distance); } } } diff --git a/core/src/graph/storage/registry.rs b/core/src/graph/storage/registry.rs index 67091eafc..d2f7ae477 100644 --- a/core/src/graph/storage/registry.rs +++ b/core/src/graph/storage/registry.rs @@ -4,8 +4,8 @@ use super::disk_adjacency::DiskAdjacencyListStorage; use super::disk_path::DiskPathStorage; use super::linear::LinearGraphStorage; +use super::{GraphStatistic, GraphStorage, prepost::PrePostOrderStorage}; use super::{disk_adjacency, disk_path}; -use super::{prepost::PrePostOrderStorage, GraphStatistic, GraphStorage}; use crate::{ errors::{GraphAnnisCoreError, Result}, graph::Graph, @@ -113,10 +113,11 @@ fn get_adjacencylist_impl(db: &Graph, stats: &GraphStatis create_info_diskadjacency() } else { // check if a large percentage of nodes are part of the graph storage - if let Ok(Some(largest_node_id)) = db.node_annos.get_largest_item() { - if stats.max_fan_out <= 1 && (stats.nodes as f64 / largest_node_id as f64) >= 0.75 { - return create_info::(); - } + if let Ok(Some(largest_node_id)) = db.node_annos.get_largest_item() + && stats.max_fan_out <= 1 + && (stats.nodes as f64 / largest_node_id as f64) >= 0.75 + { + return create_info::(); } create_info::() diff --git a/core/src/types.rs b/core/src/types.rs index 6b223a87a..fab0a4e93 100644 --- a/core/src/types.rs +++ b/core/src/types.rs @@ -1,6 +1,6 @@ use num_traits::{Bounded, FromPrimitive, Num, ToPrimitive}; -use serde::de::DeserializeOwned; use serde::Serialize; +use serde::de::DeserializeOwned; use smartstring::alias::String; use std::error::Error; use std::fmt; @@ -14,7 +14,7 @@ use super::serializer::{FixedSizeKeySerializer, KeySerializer}; use crate::serializer::KeyVec; use crate::{ errors::{ComponentTypeError, GraphAnnisCoreError}, - graph::{update::UpdateEvent, Graph}, + graph::{Graph, update::UpdateEvent}, }; use fmt::Debug; use std::result::Result as StdResult; diff --git a/core/src/util/disk_collections.rs b/core/src/util/disk_collections.rs index 5bcb43fdc..28cfea741 100644 --- a/core/src/util/disk_collections.rs +++ b/core/src/util/disk_collections.rs @@ -73,13 +73,12 @@ where ) -> Result> { let mut disk_table = None; - if let Some(persisted_file) = persisted_file { - if persisted_file.is_file() { - // Use existing file as read-only table which contains the whole map - let table = - Table::new_from_file(custom_options(block_cache_capacity), persisted_file)?; - disk_table = Some(table); - } + if let Some(persisted_file) = persisted_file + && persisted_file.is_file() + { + // Use existing file as read-only table which contains the whole map + let table = Table::new_from_file(custom_options(block_cache_capacity), persisted_file)?; + disk_table = Some(table); } Ok(DiskMap { @@ -129,15 +128,15 @@ where } } // Check C1 (BTree disk index) - if let Some(c1) = &self.c1 { - if let Some(entry) = c1.get(key)? { - if let Some(value) = entry { - return Ok(Some(Cow::Owned(value))); - } else { - // Value was explicitly deleted with a tombstone entry. - // Do not query C1 and C2. - return Ok(None); - } + if let Some(c1) = &self.c1 + && let Some(entry) = c1.get(key)? + { + if let Some(value) = entry { + return Ok(Some(Cow::Owned(value))); + } else { + // Value was explicitly deleted with a tombstone entry. + // Do not query C1 and C2. + return Ok(None); } } @@ -170,10 +169,10 @@ where } // Check C1 (BTree disk index) - if let Some(c1) = &self.c1 { - if c1.contains_key(key)? { - return Ok(true); - } + if let Some(c1) = &self.c1 + && c1.contains_key(key)? + { + return Ok(true); } // Use a iterator on the single disk to check if there is an entry with this, without getting the value. @@ -183,10 +182,10 @@ where let mut table_it = c2.iter(); let key = K::create_key(key); table_it.seek(&key); - if let Some(it_key) = table_it.current_key() { - if it_key == key.as_ref() { - return Ok(true); - } + if let Some(it_key) = table_it.current_key() + && it_key == key.as_ref() + { + return Ok(true); } } Ok(false) @@ -720,12 +719,12 @@ where fn range_contains(&self, item: &[u8]) -> bool { (match &self.range_start { - Bound::Included(ref start) => start.as_slice() <= item, - Bound::Excluded(ref start) => start.as_slice() < item, + Bound::Included(start) => start.as_slice() <= item, + Bound::Excluded(start) => start.as_slice() < item, Bound::Unbounded => true, }) && (match &self.range_end { - Bound::Included(ref end) => item <= end.as_ref(), - Bound::Excluded(ref end) => item < end.as_ref(), + Bound::Included(end) => item <= end.as_ref(), + Bound::Excluded(end) => item < end.as_ref(), Bound::Unbounded => true, }) } diff --git a/core/src/util/disk_collections/tests.rs b/core/src/util/disk_collections/tests.rs index a5939cf62..e5e74552b 100644 --- a/core/src/util/disk_collections/tests.rs +++ b/core/src/util/disk_collections/tests.rs @@ -1,8 +1,8 @@ use super::*; +use fake::Fake; use fake::faker::name::raw::*; use fake::locales::*; -use fake::Fake; use tempfile::NamedTempFile; #[test] diff --git a/core/src/util/example_graphs.rs b/core/src/util/example_graphs.rs index 1f082c29c..02a19d959 100644 --- a/core/src/util/example_graphs.rs +++ b/core/src/util/example_graphs.rs @@ -1,6 +1,6 @@ use crate::errors::Result; -use crate::graph::storage::adjacencylist::AdjacencyListStorage; use crate::graph::storage::WriteableGraphStorage; +use crate::graph::storage::adjacencylist::AdjacencyListStorage; use crate::types::{AnnoKey, Annotation, Edge}; /// Creates an example graph storage with the folllowing structure: diff --git a/core/src/util/mod.rs b/core/src/util/mod.rs index 479b1b4a3..12b479e63 100644 --- a/core/src/util/mod.rs +++ b/core/src/util/mod.rs @@ -1,5 +1,5 @@ use crate::errors::{GraphAnnisCoreError, Result}; -use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; +use percent_encoding::{AsciiSet, CONTROLS, utf8_percent_encode}; use std::borrow::Cow; pub mod disk_collections; diff --git a/examples/tutorial/Cargo.toml b/examples/tutorial/Cargo.toml index c7cd6b6c7..1b4d6c380 100644 --- a/examples/tutorial/Cargo.toml +++ b/examples/tutorial/Cargo.toml @@ -1,9 +1,9 @@ [package] authors = ["Thomas Krause "] -edition = "2021" +edition = "2024" name = "graphannis-tutorial" publish = false version = "2.0.0" [dependencies] -graphannis = {path = "../../graphannis"} +graphannis = { path = "../../graphannis" } diff --git a/examples/tutorial/src/bin/apply_update.rs b/examples/tutorial/src/bin/apply_update.rs index 9dfd42166..c494687e5 100644 --- a/examples/tutorial/src/bin/apply_update.rs +++ b/examples/tutorial/src/bin/apply_update.rs @@ -1,5 +1,5 @@ -use graphannis::update::{GraphUpdate, UpdateEvent}; use graphannis::CorpusStorage; +use graphannis::update::{GraphUpdate, UpdateEvent}; use std::path::PathBuf; fn main() -> Result<(), Box> { diff --git a/examples/tutorial/src/bin/find_subgraph.rs b/examples/tutorial/src/bin/find_subgraph.rs index baf7e4907..c6e62f06d 100644 --- a/examples/tutorial/src/bin/find_subgraph.rs +++ b/examples/tutorial/src/bin/find_subgraph.rs @@ -1,6 +1,6 @@ +use graphannis::CorpusStorage; use graphannis::corpusstorage::{QueryLanguage, ResultOrder, SearchQuery}; use graphannis::util; -use graphannis::CorpusStorage; use std::path::PathBuf; fn main() { diff --git a/examples/tutorial/src/bin/query.rs b/examples/tutorial/src/bin/query.rs index d4fbab2a8..a738f1273 100644 --- a/examples/tutorial/src/bin/query.rs +++ b/examples/tutorial/src/bin/query.rs @@ -1,5 +1,5 @@ -use graphannis::corpusstorage::{QueryLanguage, ResultOrder, SearchQuery}; use graphannis::CorpusStorage; +use graphannis::corpusstorage::{QueryLanguage, ResultOrder, SearchQuery}; use std::path::PathBuf; fn main() { diff --git a/examples/tutorial/src/bin/subcorpus_graph.rs b/examples/tutorial/src/bin/subcorpus_graph.rs index 07c332477..d7a75b7d7 100644 --- a/examples/tutorial/src/bin/subcorpus_graph.rs +++ b/examples/tutorial/src/bin/subcorpus_graph.rs @@ -1,5 +1,5 @@ -use graphannis::update::{GraphUpdate, UpdateEvent}; use graphannis::CorpusStorage; +use graphannis::update::{GraphUpdate, UpdateEvent}; use std::path::PathBuf; fn main() -> Result<(), Box> { diff --git a/graphannis/Cargo.toml b/graphannis/Cargo.toml index 17c5431b6..e5541c0e6 100644 --- a/graphannis/Cargo.toml +++ b/graphannis/Cargo.toml @@ -2,7 +2,7 @@ authors = ["Thomas Krause "] build = "build.rs" description = "This is a new backend implementation of the ANNIS linguistic search and visualization system." -edition = "2021" +edition = "2024" license = "Apache-2.0" name = "graphannis" readme = "../README.md" @@ -36,7 +36,7 @@ lru = "0.7" memory-stats = "1.1.0" page_size = "0.4" percent-encoding = "2.1" -rand = { version = "0.8", features = ["small_rng"] } +rand = { version = "0.9", features = ["small_rng"] } rayon = { version = "1.3", default-features = false } regex = "1" regex-syntax = "0.8" diff --git a/graphannis/benches/graphannis.rs b/graphannis/benches/graphannis.rs index 1e474bf24..6fbbe0692 100644 --- a/graphannis/benches/graphannis.rs +++ b/graphannis/benches/graphannis.rs @@ -7,13 +7,13 @@ extern crate rand; extern crate rustc_hash; use criterion::Criterion; +use fake::Fake; use fake::faker::name::raw::*; use fake::locales::*; -use fake::Fake; +use graphannis::CorpusStorage; use graphannis::corpusstorage::ResultOrder; use graphannis::corpusstorage::{QueryLanguage, SearchQuery}; use graphannis::update::{GraphUpdate, UpdateEvent}; -use graphannis::CorpusStorage; use std::collections::HashSet; use std::path::Path; use std::path::PathBuf; diff --git a/graphannis/src/annis/db/aql/conjunction.rs b/graphannis/src/annis/db/aql/conjunction.rs index ed5a918a1..9268699c5 100644 --- a/graphannis/src/annis/db/aql/conjunction.rs +++ b/graphannis/src/annis/db/aql/conjunction.rs @@ -1,8 +1,9 @@ #[cfg(test)] mod tests; -use super::disjunction::Disjunction; use super::Config; +use super::disjunction::Disjunction; +use crate::AnnotationGraph; use crate::annis::db::aql::model::AnnotationComponentType; use crate::annis::db::exec::filter::Filter; use crate::annis::db::exec::indexjoin::IndexJoin; @@ -16,17 +17,14 @@ use crate::annis::operator::{ UnaryOperatorSpec, }; use crate::annis::util::TimeoutCheck; -use crate::AnnotationGraph; use crate::{ annis::types::{LineColumnRange, QueryAttributeDescription}, errors::Result, }; use graphannis_core::annostorage::EdgeAnnotationStorage; use graphannis_core::{annostorage::MatchGroup, graph::storage::GraphStatistic, types::Component}; -use rand::distributions::Distribution; -use rand::distributions::Uniform; -use rand::rngs::SmallRng; -use rand::SeedableRng; +use rand::distr::Uniform; +use rand::prelude::*; use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::Arc; @@ -133,11 +131,11 @@ fn should_switch_operand_order( op_spec: &BinaryOperatorSpecEntry, node2cost: &BTreeMap, ) -> bool { - if let Some((cost_lhs, cost_rhs)) = get_cost_estimates(op_spec, node2cost) { - if cost_rhs.output < cost_lhs.output { - // switch operands - return true; - } + if let Some((cost_lhs, cost_rhs)) = get_cost_estimates(op_spec, node2cost) + && cost_rhs.output < cost_lhs.output + { + // switch operands + return true; } false @@ -186,19 +184,19 @@ fn create_join<'b>( idx_left: usize, idx_right: usize, ) -> Result> + 'b>> { - if exec_right.as_nodesearch().is_some() { - if let BinaryOperator::Index(op) = op_entry.op { - // we can use directly use an index join - return create_index_join( - db, - config, - op, - &op_entry.args, - exec_left, - exec_right, - idx_left, - ); - } + if exec_right.as_nodesearch().is_some() + && let BinaryOperator::Index(op) = op_entry.op + { + // we can use directly use an index join + return create_index_join( + db, + config, + op, + &op_entry.args, + exec_left, + exec_right, + idx_left, + ); } if exec_left.as_nodesearch().is_some() { @@ -411,10 +409,10 @@ impl Conjunction { location: Option, ) -> Result { let idx = self.resolve_variable_pos(variable, location.clone())?; - if let Some(pos) = idx.checked_sub(self.var_idx_offset) { - if pos < self.nodes.len() { - return Ok(self.nodes[pos].clone()); - } + if let Some(pos) = idx.checked_sub(self.var_idx_offset) + && pos < self.nodes.len() + { + return Ok(self.nodes[pos].clone()); } Err(GraphAnnisError::AQLSemanticError(AQLError { @@ -460,7 +458,7 @@ impl Conjunction { // use a constant seed to make the result deterministic let mut rng = SmallRng::from_seed(*b"Graphs are great and need a seed"); - let dist = Uniform::from(0..self.binary_operators.len()); + let dist = Uniform::new(0, self.binary_operators.len())?; let mut best_operator_order: Vec<_> = (0..self.binary_operators.len()).collect(); @@ -707,13 +705,13 @@ impl Conjunction { let mut spec_idx_right = op_spec_entry.args.right; let inverse_op = op.get_inverse_operator(g)?; - if let Some(inverse_op) = inverse_op { - if should_switch_operand_order(op_spec_entry, &helper.node2cost) { - spec_idx_left = op_spec_entry.args.right; - spec_idx_right = op_spec_entry.args.left; + if let Some(inverse_op) = inverse_op + && should_switch_operand_order(op_spec_entry, &helper.node2cost) + { + spec_idx_left = op_spec_entry.args.right; + spec_idx_right = op_spec_entry.args.left; - op = inverse_op; - } + op = inverse_op; } // substract the offset from the specificated numbers to get the internal node number for this conjunction @@ -908,20 +906,20 @@ impl Conjunction { for (node_nr, cid) in &node2component { if first_component_id.is_none() { first_component_id = Some(*cid); - } else if let Some(first) = first_component_id { - if first != *cid { - // add location and description which node is not connected - let n_var = &self.nodes[*node_nr].var; - let location = self.location_in_query.get(n_var); - - return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!( - "Variable \"#{}\" not bound (use linguistic operators)", - n_var - ), - location: location.cloned(), - })); - } + } else if let Some(first) = first_component_id + && first != *cid + { + // add location and description which node is not connected + let n_var = &self.nodes[*node_nr].var; + let location = self.location_in_query.get(n_var); + + return Err(GraphAnnisError::AQLSemanticError(AQLError { + desc: format!( + "Variable \"#{}\" not bound (use linguistic operators)", + n_var + ), + location: location.cloned(), + })); } } diff --git a/graphannis/src/annis/db/aql/conjunction/tests.rs b/graphannis/src/annis/db/aql/conjunction/tests.rs index 72136606f..63a81b2ae 100644 --- a/graphannis/src/annis/db/aql/conjunction/tests.rs +++ b/graphannis/src/annis/db/aql/conjunction/tests.rs @@ -1,19 +1,18 @@ use core::panic; use crate::{ + AnnotationGraph, annis::{ db::{ aql::{ - self, + self, Config, operators::{DominanceSpec, NegatedOpSpec, PrecedenceSpec, RangeSpec}, - Config, }, plan::ExecutionPlan, }, errors::GraphAnnisError::AQLSemanticError, util::TimeoutCheck, }, - AnnotationGraph, }; #[test] diff --git a/graphannis/src/annis/db/aql/disjunction.rs b/graphannis/src/annis/db/aql/disjunction.rs index ce67c0267..b9613bfe3 100644 --- a/graphannis/src/annis/db/aql/disjunction.rs +++ b/graphannis/src/annis/db/aql/disjunction.rs @@ -1,5 +1,5 @@ use super::conjunction::Conjunction; -use crate::{annis::db::aql::model::AnnotationComponentType, AnnotationGraph}; +use crate::{AnnotationGraph, annis::db::aql::model::AnnotationComponentType}; use graphannis_core::types::Component; use std::collections::HashSet; diff --git a/graphannis/src/annis/db/aql/mod.rs b/graphannis/src/annis/db/aql/mod.rs index ebc92be85..d99c40cad 100644 --- a/graphannis/src/annis/db/aql/mod.rs +++ b/graphannis/src/annis/db/aql/mod.rs @@ -13,6 +13,7 @@ lalrpop_mod!( "/annis/db/aql/parser.rs" ); +use crate::AnnotationGraph; use crate::annis::db::aql::conjunction::Conjunction; use crate::annis::db::aql::disjunction::Disjunction; use crate::annis::db::aql::operators::{ @@ -25,7 +26,6 @@ use crate::annis::errors::*; use crate::annis::operator::{BinaryOperatorSpec, UnaryOperatorSpec}; use crate::annis::types::{LineColumn, LineColumnRange}; use crate::annis::util::TimeoutCheck; -use crate::AnnotationGraph; use lalrpop_util::ParseError; use std::collections::BTreeMap; use std::collections::HashMap; @@ -202,11 +202,12 @@ fn map_conjunction( } else if node_left.optional && node_right.optional { // Not supported yet return Err(GraphAnnisError::AQLSemanticError(AQLError { - desc: format!( - "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", - var_left, var_right), - location: op_pos, - })); + desc: format!( + "Negated binary operator needs a non-optional left or right operand, but both operands (#{}, #{}) are optional, as indicated by their \"?\" suffix.", + var_left, var_right + ), + location: op_pos, + })); } else { let target_left = node_left.optional; let filtered_var = if target_left { @@ -523,12 +524,12 @@ pub fn parse(query_as_aql: &str, quirks_mode: bool) -> Result { } .to_string(); let location = extract_location(&e, query_as_aql); - if let ParseError::UnrecognizedToken { expected, .. } = e { - if !expected.is_empty() { - //TODO: map token regular expressions and IDs (like IDENT_NODE) to human readable descriptions - desc.push_str(" Expected one of: "); - desc.push_str(&expected.join(",")); - } + if let ParseError::UnrecognizedToken { expected, .. } = e + && !expected.is_empty() + { + //TODO: map token regular expressions and IDs (like IDENT_NODE) to human readable descriptions + desc.push_str(" Expected one of: "); + desc.push_str(&expected.join(",")); } Err(GraphAnnisError::AQLSyntaxError(AQLError { desc, location })) } diff --git a/graphannis/src/annis/db/aql/model.rs b/graphannis/src/annis/db/aql/model.rs index 987e03ba9..a3b95b88e 100644 --- a/graphannis/src/annis/db/aql/model.rs +++ b/graphannis/src/annis/db/aql/model.rs @@ -7,9 +7,9 @@ use graphannis_core::{ annostorage::ValueSearch, dfs::CycleSafeDFS, errors::ComponentTypeError, - graph::{storage::union::UnionEdgeContainer, ANNIS_NS, NODE_TYPE_KEY}, + graph::{ANNIS_NS, NODE_TYPE_KEY, storage::union::UnionEdgeContainer}, types::ComponentType, - util::disk_collections::{DiskMap, EvictionStrategy, DEFAULT_BLOCK_CACHE_CAPACITY}, + util::disk_collections::{DEFAULT_BLOCK_CACHE_CAPACITY, DiskMap, EvictionStrategy}, }; use std::{ collections::{BTreeMap, HashMap}, @@ -22,13 +22,13 @@ use std::{str::FromStr, sync::Arc}; use strum::IntoEnumIterator; use strum_macros::{EnumIter, EnumString}; -use crate::{update::UpdateEvent, AnnotationGraph}; +use crate::{AnnotationGraph, update::UpdateEvent}; use rustc_hash::FxHashSet; use crate::{ + Graph, graph::{AnnoKey, Component}, model::AnnotationComponent, - Graph, }; pub const TOK: &str = "tok"; @@ -341,11 +341,11 @@ impl AQLUpdateGraphIndex { } // if the node already has a left/right token, just return this value - if let Some(alignment_gs) = graph.get_graphstorage_as_ref(&alignment_component) { - if let Some(existing) = alignment_gs.get_outgoing_edges(n).next() { - let existing = existing?; - return Ok(Some(existing)); - } + if let Some(alignment_gs) = graph.get_graphstorage_as_ref(&alignment_component) + && let Some(existing) = alignment_gs.get_outgoing_edges(n).next() + { + let existing = existing?; + return Ok(Some(existing)); } // order the candidate token by their position in the order chain @@ -473,24 +473,24 @@ impl ComponentType for AnnotationComponentType { component_type, .. } => { - if !index.graph_without_nodes { - if let Ok(ctype) = AnnotationComponentType::from_str(component_type) { - if ctype == AnnotationComponentType::Coverage - || ctype == AnnotationComponentType::Dominance - || ctype == AnnotationComponentType::Ordering - || ctype == AnnotationComponentType::LeftToken - || ctype == AnnotationComponentType::RightToken - { - let source = index - .get_cached_node_id_from_name(Cow::Borrowed(source_node), graph)?; - index.calculate_invalidated_nodes_by_coverage(graph, source)?; - } - - if ctype == AnnotationComponentType::Ordering { - let target = index - .get_cached_node_id_from_name(Cow::Borrowed(target_node), graph)?; - index.calculate_invalidated_nodes_by_coverage(graph, target)?; - } + if !index.graph_without_nodes + && let Ok(ctype) = AnnotationComponentType::from_str(component_type) + { + if ctype == AnnotationComponentType::Coverage + || ctype == AnnotationComponentType::Dominance + || ctype == AnnotationComponentType::Ordering + || ctype == AnnotationComponentType::LeftToken + || ctype == AnnotationComponentType::RightToken + { + let source = index + .get_cached_node_id_from_name(Cow::Borrowed(source_node), graph)?; + index.calculate_invalidated_nodes_by_coverage(graph, source)?; + } + + if ctype == AnnotationComponentType::Ordering { + let target = index + .get_cached_node_id_from_name(Cow::Borrowed(target_node), graph)?; + index.calculate_invalidated_nodes_by_coverage(graph, target)?; } } } @@ -513,39 +513,35 @@ impl ComponentType for AnnotationComponentType { target_node, .. } = update + && let Ok(ctype) = AnnotationComponentType::from_str(&component_type) { - if let Ok(ctype) = AnnotationComponentType::from_str(&component_type) { - if (ctype == AnnotationComponentType::Dominance - || ctype == AnnotationComponentType::Coverage) - && component_name.is_empty() - { - // might be a new text coverage component - let c = AnnotationComponent::new( - ctype.clone(), - layer.into(), - component_name.into(), - ); - index.text_coverage_components.insert(c); - } + if (ctype == AnnotationComponentType::Dominance + || ctype == AnnotationComponentType::Coverage) + && component_name.is_empty() + { + // might be a new text coverage component + let c = + AnnotationComponent::new(ctype.clone(), layer.into(), component_name.into()); + index.text_coverage_components.insert(c); + } - if !index.graph_without_nodes { - if ctype == AnnotationComponentType::Coverage - || ctype == AnnotationComponentType::Dominance - || ctype == AnnotationComponentType::Ordering - || ctype == AnnotationComponentType::LeftToken - || ctype == AnnotationComponentType::RightToken - { - let source = - index.get_cached_node_id_from_name(Cow::Owned(source_node), graph)?; + if !index.graph_without_nodes { + if ctype == AnnotationComponentType::Coverage + || ctype == AnnotationComponentType::Dominance + || ctype == AnnotationComponentType::Ordering + || ctype == AnnotationComponentType::LeftToken + || ctype == AnnotationComponentType::RightToken + { + let source = + index.get_cached_node_id_from_name(Cow::Owned(source_node), graph)?; - index.calculate_invalidated_nodes_by_coverage(graph, source)?; - } + index.calculate_invalidated_nodes_by_coverage(graph, source)?; + } - if ctype == AnnotationComponentType::Ordering { - let target = - index.get_cached_node_id_from_name(Cow::Owned(target_node), graph)?; - index.calculate_invalidated_nodes_by_coverage(graph, target)?; - } + if ctype == AnnotationComponentType::Ordering { + let target = + index.get_cached_node_id_from_name(Cow::Owned(target_node), graph)?; + index.calculate_invalidated_nodes_by_coverage(graph, target)?; } } } @@ -655,14 +651,12 @@ impl ComponentType for AnnotationComponentType { for ordering_component in graph.get_all_components(Some(AnnotationComponentType::Ordering), None) { - if !ordering_component.name.is_empty() { - if let Some(gs_stats) = graph + if !ordering_component.name.is_empty() + && let Some(gs_stats) = graph .get_graphstorage_as_ref(&ordering_component) .and_then(|gs| gs.get_statistics()) - { - token_count_by_ordering_component - .insert(ordering_component, gs_stats.nodes as u64); - } + { + token_count_by_ordering_component.insert(ordering_component, gs_stats.nodes as u64); } } diff --git a/graphannis/src/annis/db/aql/model/tests.rs b/graphannis/src/annis/db/aql/model/tests.rs index 28feda0ac..5373757f8 100644 --- a/graphannis/src/annis/db/aql/model/tests.rs +++ b/graphannis/src/annis/db/aql/model/tests.rs @@ -1,16 +1,16 @@ use std::{fs::File, path::PathBuf}; use crate::{ + AnnotationGraph, annis::db::{aql::model::CorpusSize, example_generator}, model::AnnotationComponent, - AnnotationGraph, }; use assert_matches::assert_matches; use graphannis_core::graph::{ + NODE_NAME_KEY, serialization::graphml, storage::GraphStorage, update::{GraphUpdate, UpdateEvent}, - NODE_NAME_KEY, }; use insta::assert_snapshot; use itertools::Itertools; diff --git a/graphannis/src/annis/db/aql/operators/arity.rs b/graphannis/src/annis/db/aql/operators/arity.rs index 7f15e224f..f935712ae 100644 --- a/graphannis/src/annis/db/aql/operators/arity.rs +++ b/graphannis/src/annis/db/aql/operators/arity.rs @@ -1,13 +1,13 @@ use super::RangeSpec; use crate::annis::operator::EstimationType; use crate::{ + AnnotationGraph, annis::{ db::aql::model::AnnotationComponentType, operator::{UnaryOperator, UnaryOperatorSpec}, }, errors::Result, graph::{GraphStorage, Match}, - AnnotationGraph, }; use graphannis_core::types::{Component, NodeID}; use std::collections::HashSet; diff --git a/graphannis/src/annis/db/aql/operators/edge_op.rs b/graphannis/src/annis/db/aql/operators/edge_op.rs index c22f204ac..3e67418ae 100644 --- a/graphannis/src/annis/db/aql/operators/edge_op.rs +++ b/graphannis/src/annis/db/aql/operators/edge_op.rs @@ -7,7 +7,7 @@ use crate::annis::operator::{ }; use crate::errors::Result; use crate::graph::{GraphStatistic, GraphStorage, Match}; -use crate::{try_as_boxed_iter, AnnotationGraph}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use graphannis_core::{ graph::{ANNIS_NS, DEFAULT_ANNO_KEY, NODE_TYPE_KEY}, types::{Component, Edge, NodeID}, @@ -129,15 +129,15 @@ fn check_edge_annotation( if name != &a.key.name { continue; } - if let Some(template_ns) = ns { - if template_ns != &a.key.ns { - continue; - } + if let Some(template_ns) = ns + && template_ns != &a.key.ns + { + continue; } - if let Some(template_val) = val { - if template_val != &*a.val { - continue; - } + if let Some(template_val) = val + && template_val != &*a.val + { + continue; } // all checks passed, this edge has the correct annotation return Ok(true); @@ -152,10 +152,10 @@ fn check_edge_annotation( if name != &a.key.name { continue; } - if let Some(template_ns) = ns { - if template_ns != &a.key.ns { - continue; - } + if let Some(template_ns) = ns + && template_ns != &a.key.ns + { + continue; } if val.as_str() == a.val.as_str() { continue; @@ -177,10 +177,10 @@ fn check_edge_annotation( if name != &a.key.name { continue; } - if let Some(template_ns) = ns { - if template_ns != &a.key.ns { - continue; - } + if let Some(template_ns) = ns + && template_ns != &a.key.ns + { + continue; } if !re.is_match(&a.val) { @@ -204,10 +204,10 @@ fn check_edge_annotation( if name != &a.key.name { continue; } - if let Some(template_ns) = ns { - if template_ns != &a.key.ns { - continue; - } + if let Some(template_ns) = ns + && template_ns != &a.key.ns + { + continue; } if re.is_match(&a.val) { diff --git a/graphannis/src/annis/db/aql/operators/equal_value.rs b/graphannis/src/annis/db/aql/operators/equal_value.rs index 3d3c6eee4..77e592061 100644 --- a/graphannis/src/annis/db/aql/operators/equal_value.rs +++ b/graphannis/src/annis/db/aql/operators/equal_value.rs @@ -1,5 +1,6 @@ -use crate::annis::db::exec::nodesearch::NodeSearchSpec; use crate::annis::db::exec::CostEstimate; +use crate::annis::db::exec::nodesearch::NodeSearchSpec; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{ annis::{ db::aql::model::{AnnotationComponentType, TOK, TOKEN_KEY}, @@ -8,7 +9,6 @@ use crate::{ errors::Result, graph::Match, }; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::annostorage::NodeAnnotationStorage; use graphannis_core::{annostorage::ValueSearch, graph::ANNIS_NS, types::Component}; use std::borrow::Cow; @@ -137,26 +137,24 @@ impl BinaryOperatorBase for EqualValue<'_> { } fn estimation_type(&self) -> Result { - if let Some((ns, name)) = EqualValue::anno_def_for_spec(&self.spec_left) { - if let Some(most_frequent_value_left) = + if let Some((ns, name)) = EqualValue::anno_def_for_spec(&self.spec_left) + && let Some(most_frequent_value_left) = self.node_annos.guess_most_frequent_value(ns, name)? - { - if let Some((ns, name)) = EqualValue::anno_def_for_spec(&self.spec_right) { - let guessed_count_right = self.node_annos.guess_max_count( - ns, - name, - &most_frequent_value_left, - &most_frequent_value_left, - )?; - - let total_annos = self.node_annos.number_of_annotations_by_name(ns, name)?; - let sel = guessed_count_right as f64 / total_annos as f64; - if self.negated { - return Ok(EstimationType::Selectivity(1.0 - sel)); - } else { - return Ok(EstimationType::Selectivity(sel)); - } - } + && let Some((ns, name)) = EqualValue::anno_def_for_spec(&self.spec_right) + { + let guessed_count_right = self.node_annos.guess_max_count( + ns, + name, + &most_frequent_value_left, + &most_frequent_value_left, + )?; + + let total_annos = self.node_annos.number_of_annotations_by_name(ns, name)?; + let sel = guessed_count_right as f64 / total_annos as f64; + if self.negated { + return Ok(EstimationType::Selectivity(1.0 - sel)); + } else { + return Ok(EstimationType::Selectivity(sel)); } } // fallback to default diff --git a/graphannis/src/annis/db/aql/operators/identical_cov.rs b/graphannis/src/annis/db/aql/operators/identical_cov.rs index 7e49c1c9e..08e456b29 100644 --- a/graphannis/src/annis/db/aql/operators/identical_cov.rs +++ b/graphannis/src/annis/db/aql/operators/identical_cov.rs @@ -5,11 +5,11 @@ use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType}; use crate::try_as_boxed_iter; use crate::{ + AnnotationGraph, annis::operator::{BinaryOperatorBase, BinaryOperatorSpec}, errors::Result, graph::{GraphStorage, Match}, model::AnnotationComponentType, - AnnotationGraph, }; use graphannis_core::{ graph::{ANNIS_NS, DEFAULT_ANNO_KEY}, @@ -178,13 +178,13 @@ impl BinaryOperatorIndex for IdenticalCoverage<'_> { // check if also right-aligned match self.tok_helper.right_token_for(c) { Ok(c_right) => { - if let Some(c_right) = c_right { - if n_right == c_right { - result.push(Ok(Match { - node: c, - anno_key: DEFAULT_ANNO_KEY.clone(), - })); - } + if let Some(c_right) = c_right + && n_right == c_right + { + result.push(Ok(Match { + node: c, + anno_key: DEFAULT_ANNO_KEY.clone(), + })); } } Err(e) => result.push(Err(e)), diff --git a/graphannis/src/annis/db/aql/operators/identical_node.rs b/graphannis/src/annis/db/aql/operators/identical_node.rs index 1d15237e6..7121c8fe2 100644 --- a/graphannis/src/annis/db/aql/operators/identical_node.rs +++ b/graphannis/src/annis/db/aql/operators/identical_node.rs @@ -1,5 +1,5 @@ -use crate::annis::db::exec::CostEstimate; use crate::AnnotationGraph; +use crate::annis::db::exec::CostEstimate; use crate::{ annis::{db::aql::model::AnnotationComponentType, operator::*}, errors::Result, diff --git a/graphannis/src/annis/db/aql/operators/inclusion.rs b/graphannis/src/annis/db/aql/operators/inclusion.rs index 31fd13d1f..fb6a2cd85 100644 --- a/graphannis/src/annis/db/aql/operators/inclusion.rs +++ b/graphannis/src/annis/db/aql/operators/inclusion.rs @@ -3,13 +3,13 @@ use crate::annis::db::token_helper; use crate::annis::db::token_helper::TokenHelper; use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{ annis::operator::{BinaryOperatorBase, BinaryOperatorSpec}, errors::Result, graph::{GraphStorage, Match}, model::AnnotationComponentType, }; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::types::NodeID; use graphannis_core::{ graph::{ANNIS_NS, DEFAULT_ANNO_KEY}, diff --git a/graphannis/src/annis/db/aql/operators/leftalignment.rs b/graphannis/src/annis/db/aql/operators/leftalignment.rs index 92b2f832d..bd81303d5 100644 --- a/graphannis/src/annis/db/aql/operators/leftalignment.rs +++ b/graphannis/src/annis/db/aql/operators/leftalignment.rs @@ -4,8 +4,8 @@ use crate::annis::db::{aql::model::AnnotationComponentType, token_helper::TokenH use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorSpec}; use crate::annis::operator::{BinaryOperatorBase, BinaryOperatorIndex}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{annis::operator::EstimationType, errors::Result, graph::Match}; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::{graph::DEFAULT_ANNO_KEY, types::Component}; use itertools::Itertools; use std::collections::HashSet; diff --git a/graphannis/src/annis/db/aql/operators/near.rs b/graphannis/src/annis/db/aql/operators/near.rs index eab7f0661..c8a58e148 100644 --- a/graphannis/src/annis/db/aql/operators/near.rs +++ b/graphannis/src/annis/db/aql/operators/near.rs @@ -4,12 +4,12 @@ use crate::annis::db::token_helper; use crate::annis::db::token_helper::TokenHelper; use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{ annis::operator::{BinaryOperatorBase, BinaryOperatorSpec}, errors::Result, graph::{GraphStorage, Match}, }; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::graph::DEFAULT_NS; use graphannis_core::types::NodeID; use graphannis_core::{ diff --git a/graphannis/src/annis/db/aql/operators/negated_op.rs b/graphannis/src/annis/db/aql/operators/negated_op.rs index 23a091c41..db88c2f34 100644 --- a/graphannis/src/annis/db/aql/operators/negated_op.rs +++ b/graphannis/src/annis/db/aql/operators/negated_op.rs @@ -1,12 +1,12 @@ use std::{fmt::Display, sync::Arc}; use crate::{ + AnnotationGraph, annis::{ db::exec::CostEstimate, operator::{BinaryOperator, BinaryOperatorBase, BinaryOperatorSpec, EstimationType}, }, errors::Result, - AnnotationGraph, }; use graphannis_core::annostorage::Match; diff --git a/graphannis/src/annis/db/aql/operators/non_existing.rs b/graphannis/src/annis/db/aql/operators/non_existing.rs index 42a31dbff..5dff03a30 100644 --- a/graphannis/src/annis/db/aql/operators/non_existing.rs +++ b/graphannis/src/annis/db/aql/operators/non_existing.rs @@ -11,6 +11,7 @@ use graphannis_core::{ }; use crate::{ + AnnotationGraph, annis::{ db::{ aql::conjunction::Conjunction, @@ -23,7 +24,6 @@ use crate::{ }, util::TimeoutCheck, }, - AnnotationGraph, }; #[derive(Debug, Clone)] diff --git a/graphannis/src/annis/db/aql/operators/overlap.rs b/graphannis/src/annis/db/aql/operators/overlap.rs index 3a6064d5a..5bdf5ecd1 100644 --- a/graphannis/src/annis/db/aql/operators/overlap.rs +++ b/graphannis/src/annis/db/aql/operators/overlap.rs @@ -3,13 +3,13 @@ use crate::annis::db::token_helper; use crate::annis::db::token_helper::TokenHelper; use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{ annis::operator::{BinaryOperatorBase, BinaryOperatorSpec}, errors::Result, graph::{GraphStorage, Match}, model::{AnnotationComponent, AnnotationComponentType}, }; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::{ graph::{ANNIS_NS, DEFAULT_ANNO_KEY}, types::NodeID, diff --git a/graphannis/src/annis/db/aql/operators/precedence.rs b/graphannis/src/annis/db/aql/operators/precedence.rs index b01e5384f..fb8df05de 100644 --- a/graphannis/src/annis/db/aql/operators/precedence.rs +++ b/graphannis/src/annis/db/aql/operators/precedence.rs @@ -4,13 +4,13 @@ use crate::annis::db::token_helper; use crate::annis::db::token_helper::TokenHelper; use crate::annis::errors::GraphAnnisError; use crate::annis::operator::{BinaryOperator, BinaryOperatorIndex, EstimationType}; +use crate::{AnnotationGraph, try_as_boxed_iter}; use crate::{ annis::operator::{BinaryOperatorBase, BinaryOperatorSpec}, errors::Result, graph::{GraphStorage, Match}, model::{AnnotationComponent, AnnotationComponentType}, }; -use crate::{try_as_boxed_iter, AnnotationGraph}; use graphannis_core::graph::{ANNIS_NS, DEFAULT_ANNO_KEY, DEFAULT_NS}; use itertools::Itertools; diff --git a/graphannis/src/annis/db/aql/operators/rightalignment.rs b/graphannis/src/annis/db/aql/operators/rightalignment.rs index 0425a668d..f32129b9e 100644 --- a/graphannis/src/annis/db/aql/operators/rightalignment.rs +++ b/graphannis/src/annis/db/aql/operators/rightalignment.rs @@ -1,3 +1,4 @@ +use crate::AnnotationGraph; use crate::annis::db::exec::CostEstimate; use crate::annis::db::token_helper; use crate::annis::db::token_helper::TokenHelper; @@ -7,7 +8,6 @@ use crate::annis::operator::BinaryOperatorBase; use crate::annis::operator::BinaryOperatorIndex; use crate::annis::operator::BinaryOperatorSpec; use crate::try_as_boxed_iter; -use crate::AnnotationGraph; use crate::{ annis::operator::EstimationType, errors::Result, graph::Match, model::AnnotationComponent, }; diff --git a/graphannis/src/annis/db/corpusstorage.rs b/graphannis/src/annis/db/corpusstorage.rs index 5f779b435..565b20cd2 100644 --- a/graphannis/src/annis/db/corpusstorage.rs +++ b/graphannis/src/annis/db/corpusstorage.rs @@ -15,28 +15,28 @@ use crate::annis::types::{ QueryAttributeDescription, }; use crate::annis::types::{CorpusSizeInfo, CountExtra}; -use crate::annis::util::quicksort; use crate::annis::util::TimeoutCheck; -use crate::{graph::Match, AnnotationGraph}; +use crate::annis::util::quicksort; +use crate::{AnnotationGraph, graph::Match}; use fmt::Display; use fs2::FileExt; use graphannis_core::annostorage::symboltable::SymbolTable; use graphannis_core::annostorage::{ - match_group_resolve_symbol_ids, match_group_with_symbol_ids, NodeAnnotationStorage, + NodeAnnotationStorage, match_group_resolve_symbol_ids, match_group_with_symbol_ids, }; use graphannis_core::errors::Result as CoreResult; use graphannis_core::{ annostorage::{MatchGroup, ValueSearch}, graph::{ - storage::GraphStatistic, update::GraphUpdate, ANNIS_NS, NODE_NAME, NODE_NAME_KEY, NODE_TYPE, + ANNIS_NS, NODE_NAME, NODE_NAME_KEY, NODE_TYPE, storage::GraphStatistic, update::GraphUpdate, }, types::{AnnoKey, Annotation, Component, NodeID}, }; use itertools::Itertools; use linked_hash_map::LinkedHashMap; use memory_stats::memory_stats; -use percent_encoding::{percent_decode_str, utf8_percent_encode, AsciiSet, CONTROLS}; -use rand::Rng; +use percent_encoding::{AsciiSet, CONTROLS, percent_decode_str, utf8_percent_encode}; +use rand::prelude::*; use smartstring::alias::String as SmartString; use std::collections::HashSet; use std::fmt; @@ -53,7 +53,6 @@ use transient_btree_index::{BtreeConfig, BtreeIndex}; use rustc_hash::{FxHashMap, FxHashSet}; -use rand::seq::SliceRandom; use std::{ ffi::CString, io::{BufReader, Write}, @@ -550,7 +549,7 @@ impl CorpusStorage { .unwrap_or_default(); let corpus_info: CorpusInfo = match &*lock { - CacheEntry::Loaded(ref db) => { + CacheEntry::Loaded(db) => { // check if all components are loaded let mut load_status = LoadStatus::FullyLoaded; @@ -806,10 +805,10 @@ impl CorpusStorage { if let Some(relannis_root) = output_path.parent() { relannis_files.push(relannis_root.to_owned()) } - } else if let Some(ext) = output_path.extension() { - if ext.to_string_lossy().to_ascii_lowercase() == "graphml" { - graphannis_files.push(output_path.clone()); - } + } else if let Some(ext) = output_path.extension() + && ext.to_string_lossy().to_ascii_lowercase() == "graphml" + { + graphannis_files.push(output_path.clone()); } } @@ -1001,10 +1000,10 @@ impl CorpusStorage { .map(|db_entry| db_entry.write()) .transpose()?; - if db_path.is_dir() { - if let Err(e) = std::fs::remove_dir_all(&db_path) { - error!("Error when removing existing files {}", e); - } + if db_path.is_dir() + && let Err(e) = std::fs::remove_dir_all(&db_path) + { + error!("Error when removing existing files {}", e); } } else if cache.contains_key(&corpus_name) || db_path.is_dir() { return Err(GraphAnnisError::CorpusExists(corpus_name.to_string())); @@ -1084,30 +1083,30 @@ impl CorpusStorage { // Get the linked file for this node if let Some(original_path) = node_annos.get_value_for_item(&node, &linked_file_key)? { let original_path = old_base_path.join(PathBuf::from(original_path.as_ref())); - if original_path.is_file() { - if let Some(node_name) = node_annos.get_value_for_item(&node, &NODE_NAME_KEY)? { - // Create a new file name based on the node name and copy the file - let new_path = new_base_path.join(node_name.as_ref()); - debug!( - "Copying file from {} to {}", - original_path.as_path().to_string_lossy(), - new_path.to_string_lossy() - ); - if let Some(parent) = new_path.parent() { - std::fs::create_dir_all(parent)?; - } - std::fs::copy(&original_path, &new_path)?; - // Update the annotation to link to the new file with a relative path. - // Use the corpus directory as base path for this relative path. - let relative_path = new_path.strip_prefix(new_base_path)?; - node_annos.insert( - node, - Annotation { - key: linked_file_key.clone(), - val: relative_path.to_string_lossy().into(), - }, - )?; + if original_path.is_file() + && let Some(node_name) = node_annos.get_value_for_item(&node, &NODE_NAME_KEY)? + { + // Create a new file name based on the node name and copy the file + let new_path = new_base_path.join(node_name.as_ref()); + debug!( + "Copying file from {} to {}", + original_path.as_path().to_string_lossy(), + new_path.to_string_lossy() + ); + if let Some(parent) = new_path.parent() { + std::fs::create_dir_all(parent)?; } + std::fs::copy(&original_path, &new_path)?; + // Update the annotation to link to the new file with a relative path. + // Use the corpus directory as base path for this relative path. + let relative_path = new_path.strip_prefix(new_base_path)?; + node_annos.insert( + node, + Annotation { + key: linked_file_key.clone(), + val: relative_path.to_string_lossy().into(), + }, + )?; } } } @@ -1719,10 +1718,9 @@ impl CorpusStorage { if let Some(v) = db .get_node_annos() .get_value_for_item(&m.node, &m.anno_key)? + && v == "3.3" { - if v == "3.3" { - relannis_version_33 = true; - } + relannis_version_33 = true; } } } @@ -1747,16 +1745,16 @@ impl CorpusStorage { if find_arguments.order == ResultOrder::Randomized { // Use a unique random index for each match to force a random order - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); for mgroup in plan { let mgroup = mgroup?; - let mut idx: usize = rng.gen(); - while tmp_results.contains_key(&idx)? { - idx = rng.gen(); + let mut idx: u64 = rng.random(); + while tmp_results.contains_key(&(idx as usize))? { + idx = rng.random(); } let m = match_group_with_symbol_ids(&mgroup, &mut anno_key_symbols)?; - tmp_results.insert(idx, m)?; + tmp_results.insert(idx as usize, m)?; } } else { // Insert results in the order as they are given by the iterator @@ -2022,7 +2020,7 @@ impl CorpusStorage { if order == ResultOrder::Randomized { // This is still oddly ordered, because results from one corpus will always be grouped together. // But it still better than just output the same corpus first. - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); corpus_names.shuffle(&mut rng); } else if order == ResultOrder::Inverted { corpus_names.sort(); @@ -2052,11 +2050,11 @@ impl CorpusStorage { result.extend(single_result.into_iter()); } - if let Some(limit) = limit { - if result.len() == limit { - // Searching in the first corpora already yielded enough results - break; - } + if let Some(limit) = limit + && result.len() == limit + { + // Searching in the first corpora already yielded enough results + break; } if skipped < offset { find_arguments.offset -= skipped; @@ -2492,36 +2490,36 @@ impl CorpusStorage { self.get_loaded_entry_with_components(corpus_name, vec![component.clone()]) { let lock = db_entry.read()?; - if let Ok(db) = get_read_or_error(&lock) { - if let Some(gs) = db.get_graphstorage(component) { - let edge_annos = gs.get_anno_storage(); - for key in edge_annos.annotation_keys()? { - if list_values { - if only_most_frequent_values { - // get the first value - if let Some(val) = - edge_annos.get_all_values(&key, true)?.into_iter().next() - { - result.push(Annotation { - key: key.clone(), - val: val.into(), - }); - } - } else { - // get all values - for val in edge_annos.get_all_values(&key, false)? { - result.push(Annotation { - key: key.clone(), - val: val.into(), - }); - } + if let Ok(db) = get_read_or_error(&lock) + && let Some(gs) = db.get_graphstorage(component) + { + let edge_annos = gs.get_anno_storage(); + for key in edge_annos.annotation_keys()? { + if list_values { + if only_most_frequent_values { + // get the first value + if let Some(val) = + edge_annos.get_all_values(&key, true)?.into_iter().next() + { + result.push(Annotation { + key: key.clone(), + val: val.into(), + }); } } else { - result.push(Annotation { - key: key.clone(), - val: SmartString::new(), - }); + // get all values + for val in edge_annos.get_all_values(&key, false)? { + result.push(Annotation { + key: key.clone(), + val: val.into(), + }); + } } + } else { + result.push(Annotation { + key: key.clone(), + val: SmartString::new(), + }); } } } @@ -2585,17 +2583,18 @@ impl Drop for CorpusStorage { // administration account (see // https://github.com/korpling/graphANNIS/issues/230). let lock_file_path = self.db_dir.join(DB_LOCK_FILE_NAME); - if lock_file_path.exists() && lock_file_path.is_file() { - if let Err(e) = std::fs::remove_file(lock_file_path) { - warn!("Could not remove CorpusStorage lock file: {:?}", e); - } + if lock_file_path.exists() + && lock_file_path.is_file() + && let Err(e) = std::fs::remove_file(lock_file_path) + { + warn!("Could not remove CorpusStorage lock file: {:?}", e); } } } } fn get_read_or_error<'a>(lock: &'a RwLockReadGuard) -> Result<&'a AnnotationGraph> { - if let CacheEntry::Loaded(ref db) = &**lock { + if let CacheEntry::Loaded(db) = &**lock { Ok(db) } else { Err(GraphAnnisError::LoadingGraphFailed { @@ -2607,7 +2606,7 @@ fn get_read_or_error<'a>(lock: &'a RwLockReadGuard) -> Result<&'a An fn get_write_or_error<'a>( lock: &'a mut RwLockWriteGuard, ) -> Result<&'a mut AnnotationGraph> { - if let CacheEntry::Loaded(ref mut db) = &mut **lock { + if let CacheEntry::Loaded(db) = &mut **lock { Ok(db) } else { Err(CorpusStorageError::CorpusCacheEntryNotLoaded.into()) @@ -2622,8 +2621,8 @@ fn get_max_cache_size(cache_strategy: &CacheStrategy, used_cache_size: usize) -> if let Ok(mem) = sys_info::mem_info() { // the free memory let free_system_mem: usize = mem.avail as usize * 1024; // mem.free is in KiB - // A part of the system memory is already used by the cache. - // We want x percent of the overall available memory (thus not used by us), so add the cache size + // A part of the system memory is already used by the cache. + // We want x percent of the overall available memory (thus not used by us), so add the cache size let available_memory: usize = free_system_mem + used_cache_size; ((available_memory as f64) * (max_percent / 100.0)) as usize } else { diff --git a/graphannis/src/annis/db/corpusstorage/subgraph.rs b/graphannis/src/annis/db/corpusstorage/subgraph.rs index 105a51830..5698c4652 100644 --- a/graphannis/src/annis/db/corpusstorage/subgraph.rs +++ b/graphannis/src/annis/db/corpusstorage/subgraph.rs @@ -17,7 +17,7 @@ use crate::annis::db::token_helper::TokenHelper; use crate::annis::errors::GraphAnnisError; use crate::annis::util::quicksort; use crate::try_as_option; -use crate::{annis::errors::Result, model::AnnotationComponentType, AnnotationGraph}; +use crate::{AnnotationGraph, annis::errors::Result, model::AnnotationComponentType}; struct TokenIterator<'a> { end_token: NodeID, @@ -498,27 +498,23 @@ where match_result.insert(m.clone()); trace!("subgraph query extracted node {:?}", m.node); - if let Some(token_helper) = &token_helper { - if token_helper.is_token(m.node)? { - if let (Some(gs_ordering), Some(previous_node)) = - (&gs_orig_ordering, previous_token) - { - if let Some(distance) = - gs_ordering.distance(previous_node, m.node)? - { - if distance > 1 { - let gs_result_ds_ordering_ = result - .get_or_create_writable(&ds_ordering_component)?; - - gs_result_ds_ordering_.add_edge(Edge { - source: previous_node, - target: m.node, - })?; - } - } - } - previous_token = Some(m.node); + if let Some(token_helper) = &token_helper + && token_helper.is_token(m.node)? + { + if let (Some(gs_ordering), Some(previous_node)) = + (&gs_orig_ordering, previous_token) + && let Some(distance) = gs_ordering.distance(previous_node, m.node)? + && distance > 1 + { + let gs_result_ds_ordering_ = + result.get_or_create_writable(&ds_ordering_component)?; + + gs_result_ds_ordering_.add_edge(Edge { + source: previous_node, + target: m.node, + })?; } + previous_token = Some(m.node); } create_subgraph_node(m.node, &mut result, orig_graph)?; @@ -562,30 +558,29 @@ fn create_subgraph_edge( && !c.name.is_empty()) || ctype == AnnotationComponentType::RightToken || ctype == AnnotationComponentType::LeftToken) + && let Some(orig_gs) = orig_db.get_graphstorage(c) { - if let Some(orig_gs) = orig_db.get_graphstorage(c) { - for target in orig_gs.get_outgoing_edges(source_id) { - let target = target?; - if !db - .get_node_annos() - .get_all_keys_for_item(&target, None, None)? - .is_empty() - { - let e = Edge { - source: source_id, - target, - }; - if let Ok(new_gs) = db.get_or_create_writable(c) { - new_gs.add_edge(e.clone())?; - } + for target in orig_gs.get_outgoing_edges(source_id) { + let target = target?; + if !db + .get_node_annos() + .get_all_keys_for_item(&target, None, None)? + .is_empty() + { + let e = Edge { + source: source_id, + target, + }; + if let Ok(new_gs) = db.get_or_create_writable(c) { + new_gs.add_edge(e.clone())?; + } - for a in orig_gs.get_anno_storage().get_annotations_for_item(&Edge { - source: source_id, - target, - })? { - if let Ok(new_gs) = db.get_or_create_writable(c) { - new_gs.add_edge_annotation(e.clone(), a)?; - } + for a in orig_gs.get_anno_storage().get_annotations_for_item(&Edge { + source: source_id, + target, + })? { + if let Ok(new_gs) = db.get_or_create_writable(c) { + new_gs.add_edge_annotation(e.clone(), a)?; } } } diff --git a/graphannis/src/annis/db/corpusstorage/tests.rs b/graphannis/src/annis/db/corpusstorage/tests.rs index 36493dd53..c70223c3e 100644 --- a/graphannis/src/annis/db/corpusstorage/tests.rs +++ b/graphannis/src/annis/db/corpusstorage/tests.rs @@ -7,7 +7,7 @@ use serial_test::serial; use std::path::{Path, PathBuf}; use std::vec; -use crate::annis::db::corpusstorage::{get_read_or_error, CacheEntry}; +use crate::annis::db::corpusstorage::{CacheEntry, get_read_or_error}; use crate::annis::db::example_generator::create_token_node; use crate::annis::db::{aql::model::AnnotationComponentType, example_generator}; use crate::annis::errors::GraphAnnisError; @@ -891,62 +891,72 @@ fn subgraph_with_segmentation_and_gap() { let g = cs.subgraph(&corpus_name, m.clone(), 1, 2, None).unwrap(); // Check that all token and the page are included, including the token // that is not covered by a segmentation node. - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#page2") - .unwrap() - .is_some()); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#page2") + .unwrap() + .is_some() + ); // Get the context for the norm node using the norm segmentation let g = cs .subgraph(&corpus_name, m, 1, 1, Some("norm".to_string())) .unwrap(); // Check that all token and the page are included - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#page2") - .unwrap() - .is_some()); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#page2") + .unwrap() + .is_some() + ); // Get the context for the token using the norm segmentation let g = cs @@ -959,31 +969,36 @@ fn subgraph_with_segmentation_and_gap() { ) .unwrap(); // Check that all token and the page are included - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") - .unwrap() - .is_some()); - assert!(g - .get_node_annos() - .get_node_id_from_name("SegmentationWithGaps/doc01#page2") - .unwrap() - .is_some()); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_11") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_12") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_13") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#tok_14") + .unwrap() + .is_some() + ); + assert!( + g.get_node_annos() + .get_node_id_from_name("SegmentationWithGaps/doc01#page2") + .unwrap() + .is_some() + ); } #[test] @@ -1321,12 +1336,14 @@ fn import_relative_corpus_with_linked_file() { let mut files = files.unwrap(); let first_file = files.next().unwrap().unwrap(); assert_eq!("linked_file.txt", first_file.0); - assert!(is_same_file( - tmp.path() - .join("CorpusWithLinkedFile/files/linked_file.txt"), - &first_file.1 - ) - .unwrap()); + assert!( + is_same_file( + tmp.path() + .join("CorpusWithLinkedFile/files/linked_file.txt"), + &first_file.1 + ) + .unwrap() + ); let file_content = std::fs::read_to_string(first_file.1).unwrap(); assert_eq!("The content of this file is not important.", file_content); } diff --git a/graphannis/src/annis/db/example_generator.rs b/graphannis/src/annis/db/example_generator.rs index 7c7ce3e9b..7d106644a 100644 --- a/graphannis/src/annis/db/example_generator.rs +++ b/graphannis/src/annis/db/example_generator.rs @@ -1,6 +1,6 @@ use graphannis_core::graph::{ - update::{GraphUpdate, UpdateEvent}, ANNIS_NS, DEFAULT_NS, + update::{GraphUpdate, UpdateEvent}, }; use crate::model::AnnotationComponentType; diff --git a/graphannis/src/annis/db/exec/filter.rs b/graphannis/src/annis/db/exec/filter.rs index f828eb329..84bd55566 100644 --- a/graphannis/src/annis/db/exec/filter.rs +++ b/graphannis/src/annis/db/exec/filter.rs @@ -76,11 +76,7 @@ impl<'a> Filter<'a> { .map(move |tuple| { let tuple = tuple?; let include = op_entry.op.filter_match(&tuple[lhs_idx], &tuple[rhs_idx])?; - if include { - Ok(Some(tuple)) - } else { - Ok(None) - } + if include { Ok(Some(tuple)) } else { Ok(None) } }) .filter_map_ok(|t| t); Ok(Filter { diff --git a/graphannis/src/annis/db/exec/nestedloop.rs b/graphannis/src/annis/db/exec/nestedloop.rs index 3d4d360ca..d4cdcffd0 100644 --- a/graphannis/src/annis/db/exec/nestedloop.rs +++ b/graphannis/src/annis/db/exec/nestedloop.rs @@ -31,12 +31,11 @@ impl<'a> NestedLoop<'a> { rhs_idx: usize, ) -> Result> { let mut left_is_outer = true; - if let (Some(desc_lhs), Some(desc_rhs)) = (lhs.get_desc(), rhs.get_desc()) { - if let (Some(cost_lhs), Some(cost_rhs)) = (&desc_lhs.cost, &desc_rhs.cost) { - if cost_lhs.output > cost_rhs.output { - left_is_outer = false; - } - } + if let (Some(desc_lhs), Some(desc_rhs)) = (lhs.get_desc(), rhs.get_desc()) + && let (Some(cost_lhs), Some(cost_rhs)) = (&desc_lhs.cost, &desc_rhs.cost) + && cost_lhs.output > cost_rhs.output + { + left_is_outer = false; } let processed_func = |_, out_lhs: usize, out_rhs: usize| { diff --git a/graphannis/src/annis/db/exec/nodesearch.rs b/graphannis/src/annis/db/exec/nodesearch.rs index 67a85c3cd..6a0ccc08d 100644 --- a/graphannis/src/annis/db/exec/nodesearch.rs +++ b/graphannis/src/annis/db/exec/nodesearch.rs @@ -1,5 +1,6 @@ use super::MatchValueFilterFunc; use super::{ExecutionNode, ExecutionNodeDesc, NodeSearchDesc}; +use crate::AnnotationGraph; use crate::annis::db::aql::model::AnnotationComponentType; use crate::annis::db::exec::tokensearch; use crate::annis::db::exec::tokensearch::AnyTokenSearch; @@ -7,7 +8,6 @@ use crate::annis::errors::*; use crate::annis::operator::EdgeAnnoSearchSpec; use crate::annis::types::LineColumnRange; use crate::annis::util::TimeoutCheck; -use crate::AnnotationGraph; use crate::{ annis::{db::aql::model::TOKEN_KEY, util}, graph::Match, @@ -17,7 +17,7 @@ use graphannis_core::errors::GraphAnnisCoreError; use graphannis_core::graph::{ANNIS_NS, NODE_NAME}; use graphannis_core::{ annostorage::{MatchGroup, ValueSearch}, - graph::{storage::GraphStorage, NODE_TYPE_KEY}, + graph::{NODE_TYPE_KEY, storage::GraphStorage}, types::{Component, NodeID}, }; use itertools::Itertools; @@ -299,12 +299,7 @@ impl NodeSearchSpec { impl fmt::Display for NodeSearchSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - NodeSearchSpec::ExactValue { - ref ns, - ref name, - ref val, - .. - } => { + NodeSearchSpec::ExactValue { ns, name, val, .. } => { if let (Some(ns), Some(val)) = (ns, val) { write!(f, "{}:{}=\"{}\"", ns, name, val) } else if let Some(ns) = ns { @@ -315,64 +310,43 @@ impl fmt::Display for NodeSearchSpec { write!(f, "{}", name) } } - NodeSearchSpec::NotExactValue { - ref ns, - ref name, - ref val, - .. - } => { - if let Some(ref ns) = ns { + NodeSearchSpec::NotExactValue { ns, name, val, .. } => { + if let Some(ns) = ns { write!(f, "{}:{}!=\"{}\"", ns, name, &val) } else { write!(f, "{}!=\"{}\"", name, &val) } } - NodeSearchSpec::RegexValue { - ref ns, - ref name, - ref val, - .. - } => { + NodeSearchSpec::RegexValue { ns, name, val, .. } => { if let Some(ns) = ns { write!(f, "{}:{}=/{}/", ns, name, &val) } else { write!(f, "{}=/{}/", name, &val) } } - NodeSearchSpec::NotRegexValue { - ref ns, - ref name, - ref val, - .. - } => { - if let Some(ref ns) = ns { + NodeSearchSpec::NotRegexValue { ns, name, val, .. } => { + if let Some(ns) = ns { write!(f, "{}:{}!=/{}/", ns, name, &val) } else { write!(f, "{}!=/{}/", name, &val) } } - NodeSearchSpec::ExactTokenValue { - ref val, - ref leafs_only, - } => { + NodeSearchSpec::ExactTokenValue { val, leafs_only } => { if *leafs_only { write!(f, "tok=\"{}\"", val) } else { write!(f, "\"{}\"", val) } } - NodeSearchSpec::NotExactTokenValue { ref val } => write!(f, "tok!=\"{}\"", val), - NodeSearchSpec::RegexTokenValue { - ref val, - ref leafs_only, - } => { + NodeSearchSpec::NotExactTokenValue { val } => write!(f, "tok!=\"{}\"", val), + NodeSearchSpec::RegexTokenValue { val, leafs_only } => { if *leafs_only { write!(f, "tok=/{}/", val) } else { write!(f, "/{}/", val) } } - NodeSearchSpec::NotRegexTokenValue { ref val } => write!(f, "tok!=/{}/", val), + NodeSearchSpec::NotRegexTokenValue { val } => write!(f, "tok!=/{}/", val), NodeSearchSpec::AnyToken => write!(f, "tok"), NodeSearchSpec::AnyNode => write!(f, "node"), } @@ -651,7 +625,7 @@ impl<'a> NodeSearch<'a> { *cached } else { match val { - ValueSearch::Some(ref val) => { + ValueSearch::Some(val) => { if qname.0.as_deref() == Some(ANNIS_NS) && qname.1 == NODE_NAME { // Our data model assumes that annis::node_name annotations are unique 1 @@ -659,12 +633,12 @@ impl<'a> NodeSearch<'a> { db.get_node_annos().guess_max_count( qname.0.as_deref(), &qname.1, - val, - val, + &val, + &val, )? } } - ValueSearch::NotSome(ref val) => { + ValueSearch::NotSome(val) => { let total = db .get_node_annos() .number_of_annotations_by_name(qname.0.as_deref(), &qname.1)?; @@ -672,8 +646,8 @@ impl<'a> NodeSearch<'a> { - db.get_node_annos().guess_max_count( qname.0.as_deref(), &qname.1, - val, - val, + &val, + &val, )? } ValueSearch::Any => db diff --git a/graphannis/src/annis/db/exec/parallel/indexjoin.rs b/graphannis/src/annis/db/exec/parallel/indexjoin.rs index 23a7d1aff..afc28c3ef 100644 --- a/graphannis/src/annis/db/exec/parallel/indexjoin.rs +++ b/graphannis/src/annis/db/exec/parallel/indexjoin.rs @@ -7,8 +7,8 @@ use graphannis_core::{annostorage::MatchGroup, types::NodeID}; use rayon::prelude::*; use std::error::Error; use std::iter::Peekable; -use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::Arc; +use std::sync::mpsc::{Receiver, Sender, channel}; const MAX_BUFFER_SIZE: usize = 512; diff --git a/graphannis/src/annis/db/exec/parallel/nestedloop.rs b/graphannis/src/annis/db/exec/parallel/nestedloop.rs index 05706ad40..98d436285 100644 --- a/graphannis/src/annis/db/exec/parallel/nestedloop.rs +++ b/graphannis/src/annis/db/exec/parallel/nestedloop.rs @@ -4,8 +4,8 @@ use crate::annis::operator::BinaryOperatorBase; use crate::errors::Result; use graphannis_core::annostorage::MatchGroup; use rayon::prelude::*; -use std::sync::mpsc::{channel, Receiver, Sender}; use std::sync::Arc; +use std::sync::mpsc::{Receiver, Sender, channel}; const MAX_BUFFER_SIZE: usize = 1024; @@ -39,12 +39,11 @@ impl<'a> NestedLoop<'a> { rhs_idx: usize, ) -> Result> { let mut left_is_outer = true; - if let (Some(desc_lhs), Some(desc_rhs)) = (lhs.get_desc(), rhs.get_desc()) { - if let (Some(cost_lhs), Some(cost_rhs)) = (&desc_lhs.cost, &desc_rhs.cost) { - if cost_lhs.output > cost_rhs.output { - left_is_outer = false; - } - } + if let (Some(desc_lhs), Some(desc_rhs)) = (lhs.get_desc(), rhs.get_desc()) + && let (Some(cost_lhs), Some(cost_rhs)) = (&desc_lhs.cost, &desc_rhs.cost) + && cost_lhs.output > cost_rhs.output + { + left_is_outer = false; } let processed_func = |_, out_lhs: usize, out_rhs: usize| { diff --git a/graphannis/src/annis/db/exec/tokensearch.rs b/graphannis/src/annis/db/exec/tokensearch.rs index f0e2d3e67..0c93acb8c 100644 --- a/graphannis/src/annis/db/exec/tokensearch.rs +++ b/graphannis/src/annis/db/exec/tokensearch.rs @@ -5,13 +5,13 @@ use crate::annis::db::sort_matches::SortCache; use crate::annis::db::token_helper; use crate::annis::util::quicksort; use crate::{ - annis::db::aql::model::AnnotationComponentType, annis::db::token_helper::TokenHelper, - errors::Result, graph::Match, AnnotationGraph, + AnnotationGraph, annis::db::aql::model::AnnotationComponentType, + annis::db::token_helper::TokenHelper, errors::Result, graph::Match, }; use graphannis_core::errors::GraphAnnisCoreError; use graphannis_core::{ annostorage::MatchGroup, - graph::{storage::GraphStorage, ANNIS_NS, NODE_TYPE_KEY}, + graph::{ANNIS_NS, NODE_TYPE_KEY, storage::GraphStorage}, types::{AnnoKey, Component, NodeID}, }; use itertools::Itertools; @@ -66,10 +66,10 @@ impl<'a> AnyTokenSearch<'a> { fn is_root_tok(&self, n: NodeID) -> Result { // Return early if node has ingoing edges and is not a root token - if let Some(order_gs) = self.order_gs { - if order_gs.has_ingoing_edges(n)? { - return Ok(false); - } + if let Some(order_gs) = self.order_gs + && order_gs.has_ingoing_edges(n)? + { + return Ok(false); } // Token also should also have no outgoing coverage edges diff --git a/graphannis/src/annis/db/plan.rs b/graphannis/src/annis/db/plan.rs index 7fbf6f6cd..00d58c979 100644 --- a/graphannis/src/annis/db/plan.rs +++ b/graphannis/src/annis/db/plan.rs @@ -1,9 +1,9 @@ -use crate::annis::db::aql::disjunction::Disjunction; +use crate::AnnotationGraph; use crate::annis::db::aql::Config; +use crate::annis::db::aql::disjunction::Disjunction; use crate::annis::db::exec::{EmptyResultSet, ExecutionNode, ExecutionNodeDesc}; use crate::annis::errors::*; use crate::annis::util::TimeoutCheck; -use crate::AnnotationGraph; use graphannis_core::annostorage::match_group_with_symbol_ids; use graphannis_core::annostorage::symboltable::SymbolTable; use graphannis_core::{ @@ -66,10 +66,10 @@ impl<'a> ExecutionPlan<'a> { } plans.push(p); - } else if let Err(e) = p { - if let GraphAnnisError::AQLSemanticError(_) = &e { - return Err(e); - } + } else if let Err(e) = p + && let GraphAnnisError::AQLSemanticError(_) = &e + { + return Err(e); } } @@ -96,7 +96,7 @@ impl<'a> ExecutionPlan<'a> { /// they are still included in the vector but you can not use the node ID at /// this position. fn reorder_match(&self, tmp: MatchGroup) -> MatchGroup { - if let Some(ref inverse_node_pos) = self.inverse_node_pos[self.current_plan] { + if let Some(inverse_node_pos) = &self.inverse_node_pos[self.current_plan] { // re-order the matched nodes by the original node position of the query let mut result = MatchGroup::new(); // We cannot assume that every node has a mapping, so use the maximum index @@ -120,7 +120,7 @@ impl<'a> ExecutionPlan<'a> { pub fn estimated_output_size(&self) -> usize { let mut estimation = 0; for desc in self.descriptions.iter().flatten() { - if let Some(ref cost) = desc.cost { + if let Some(cost) = &desc.cost { estimation += cost.output; } } @@ -153,7 +153,7 @@ impl std::fmt::Display for ExecutionPlan<'_> { if i > 0 { writeln!(f, "---[OR]---")?; } - if let Some(ref d) = d { + if let Some(d) = d { write!(f, "{}", d.debug_string(""))?; } else { write!(f, "")?; diff --git a/graphannis/src/annis/db/relannis.rs b/graphannis/src/annis/db/relannis.rs index 60f8d57eb..6287f66bf 100644 --- a/graphannis/src/annis/db/relannis.rs +++ b/graphannis/src/annis/db/relannis.rs @@ -5,6 +5,7 @@ use crate::annis::types::TimelineStrategy; use crate::annis::util::create_str_vec_key; use crate::update::{GraphUpdate, UpdateEvent}; use crate::{ + AnnotationGraph, annis::{ db::aql::model::TOK, types::{ @@ -13,7 +14,6 @@ use crate::{ }, }, corpusstorage::QueryLanguage, - AnnotationGraph, }; use graphannis_core::serializer::KeyVec; use graphannis_core::{ @@ -719,12 +719,12 @@ where let key = splitted[0]; let value = splitted[1]; - if let "context-steps" = key { - if let Ok(value) = value.parse::() { - config.context.sizes = (value..=config.context.max.unwrap_or(value)) - .step_by(value) - .collect(); - } + if let "context-steps" = key + && let Ok(value) = value.parse::() + { + config.context.sizes = (value..=config.context.max.unwrap_or(value)) + .step_by(value) + .collect(); } } } @@ -1031,31 +1031,30 @@ where for token in token_by_index.iter()? { let (current_textprop, current_token) = token?; // if the last token/text value is valid and we are still in the same text - if let (Some(last_token), Some(last_textprop)) = (last_token, last_textprop) { - if last_textprop.corpus_id == current_textprop.corpus_id - && last_textprop.text_id == current_textprop.text_id - && last_textprop.segmentation == current_textprop.segmentation - { - // we are still in the same text, add ordering between token - let ordering_layer = if current_textprop.segmentation.is_empty() { - ANNIS_NS.to_owned() - } else { - DEFAULT_NS.to_owned() - }; - updates.add_event(UpdateEvent::AddEdge { - source_node: id_to_node_name - .get(&last_token)? - .ok_or(RelAnnisError::NodeNotFound(last_token))? - .to_string(), - target_node: id_to_node_name - .get(¤t_token)? - .ok_or(RelAnnisError::NodeNotFound(current_token))? - .to_string(), - layer: ordering_layer, - component_type: AnnotationComponentType::Ordering.to_string(), - component_name: current_textprop.segmentation.clone().into(), - })?; - } + if let (Some(last_token), Some(last_textprop)) = (last_token, last_textprop) + && last_textprop.corpus_id == current_textprop.corpus_id + && last_textprop.text_id == current_textprop.text_id + && last_textprop.segmentation == current_textprop.segmentation + { + // we are still in the same text, add ordering between token + let ordering_layer = if current_textprop.segmentation.is_empty() { + ANNIS_NS.to_owned() + } else { + DEFAULT_NS.to_owned() + }; + updates.add_event(UpdateEvent::AddEdge { + source_node: id_to_node_name + .get(&last_token)? + .ok_or(RelAnnisError::NodeNotFound(last_token))? + .to_string(), + target_node: id_to_node_name + .get(¤t_token)? + .ok_or(RelAnnisError::NodeNotFound(current_token))? + .to_string(), + layer: ordering_layer, + component_type: AnnotationComponentType::Ordering.to_string(), + component_name: current_textprop.segmentation.clone().into(), + })?; } // end if same text // update the iterator and other variables @@ -1220,19 +1219,18 @@ where .textpos_table .token_to_index .contains_key(&n)? - { - if let Err(e) = add_automatic_cov_edge_for_node( + && let Err(e) = add_automatic_cov_edge_for_node( updates, n, load_node_and_corpus_result, load_rank_result, - ) { - // output a warning but do not fail - warn!( - "Adding coverage edges (connects spans with tokens) failed: {}", - e - ) - } + ) + { + // output a warning but do not fail + warn!( + "Adding coverage edges (connects spans with tokens) failed: {}", + e + ) } // end if not a token } @@ -1324,12 +1322,11 @@ where // Get the token borders of the next token to determine where the whitespace after this token is // The whitespace end position is non-inclusive. let mut whitespace_end_pos = None; - if let Some(Ok((_, next_token_id))) = token_iterator.peek() { - if let Some(next_token_left_pos) = + if let Some(Ok((_, next_token_id))) = token_iterator.peek() + && let Some(next_token_left_pos) = textpos_table.node_to_left_char.get(next_token_id)? - { - whitespace_end_pos = Some(next_token_left_pos.val as usize); - } + { + whitespace_end_pos = Some(next_token_left_pos.val as usize); } // Get the covered text which either goes until the next token or until the end of the text if there is none @@ -1469,15 +1466,15 @@ where })?; id_to_node_name.insert(node_nr, node_path.clone().into())?; - if let Some(layer) = layer { - if !layer.is_empty() { - updates.add_event(UpdateEvent::AddNodeLabel { - node_name: node_path.clone(), - anno_ns: ANNIS_NS.to_owned(), - anno_name: "layer".to_owned(), - anno_value: layer.to_string(), - })?; - } + if let Some(layer) = layer + && !layer.is_empty() + { + updates.add_event(UpdateEvent::AddNodeLabel { + node_name: node_path.clone(), + anno_ns: ANNIS_NS.to_owned(), + anno_name: "layer".to_owned(), + anno_value: layer.to_string(), + })?; } // Add the raw character offsets so it is possible to extract the text later on @@ -1893,12 +1890,12 @@ where load_rank_result.edges_by_pre.insert(pre, e)?; } } - } else if let Some(c) = component_by_id.get(&component_ref) { - if c.get_type() == AnnotationComponentType::Coverage { - load_rank_result - .component_for_parentless_target_node - .insert(target, c.clone())?; - } + } else if let Some(c) = component_by_id.get(&component_ref) + && c.get_type() == AnnotationComponentType::Coverage + { + load_rank_result + .component_for_parentless_target_node + .insert(target, c.clone())?; } } @@ -1945,31 +1942,31 @@ where let line = result?; let pre = get_field_not_null(&line, 0, "pre", &edge_anno_tab_path)?.parse::()?; - if let Some(c) = rank_result.components_by_pre.get(&pre)? { - if let Some(e) = rank_result.edges_by_pre.get(&pre)? { - let ns = get_field(&line, 1, "namespace", &edge_anno_tab_path)?.unwrap_or_default(); - let name = get_field_not_null(&line, 2, "name", &edge_anno_tab_path)?; - // If 'NULL', use an "invalid" string so it can't be found by its value, but only by its annotation name - let val = get_field(&line, 3, "value", &edge_anno_tab_path)? - .unwrap_or_else(|| INVALID_STRING.clone()); - - updates.add_event(UpdateEvent::AddEdgeLabel { - source_node: id_to_node_name - .get(&e.source)? - .ok_or(RelAnnisError::NodeNotFound(e.source))? - .to_string(), - target_node: id_to_node_name - .get(&e.target)? - .ok_or(RelAnnisError::NodeNotFound(e.target))? - .to_string(), - layer: c.layer.clone().into(), - component_type: c.get_type().to_string(), - component_name: c.name.to_string(), - anno_ns: ns.to_string(), - anno_name: name.to_string(), - anno_value: val.to_string(), - })?; - } + if let Some(c) = rank_result.components_by_pre.get(&pre)? + && let Some(e) = rank_result.edges_by_pre.get(&pre)? + { + let ns = get_field(&line, 1, "namespace", &edge_anno_tab_path)?.unwrap_or_default(); + let name = get_field_not_null(&line, 2, "name", &edge_anno_tab_path)?; + // If 'NULL', use an "invalid" string so it can't be found by its value, but only by its annotation name + let val = get_field(&line, 3, "value", &edge_anno_tab_path)? + .unwrap_or_else(|| INVALID_STRING.clone()); + + updates.add_event(UpdateEvent::AddEdgeLabel { + source_node: id_to_node_name + .get(&e.source)? + .ok_or(RelAnnisError::NodeNotFound(e.source))? + .to_string(), + target_node: id_to_node_name + .get(&e.target)? + .ok_or(RelAnnisError::NodeNotFound(e.target))? + .to_string(), + layer: c.layer.clone().into(), + component_type: c.get_type().to_string(), + component_name: c.name.to_string(), + anno_ns: ns.to_string(), + anno_name: name.to_string(), + anno_value: val.to_string(), + })?; } } diff --git a/graphannis/src/annis/db/sort_matches.rs b/graphannis/src/annis/db/sort_matches.rs index d6fbf91a9..fe196d1f9 100644 --- a/graphannis/src/annis/db/sort_matches.rs +++ b/graphannis/src/annis/db/sort_matches.rs @@ -2,7 +2,7 @@ use crate::annis::db::token_helper::TokenHelper; use crate::{errors::Result, graph::Match}; use graphannis_core::annostorage::NodeAnnotationStorage; use graphannis_core::{ - graph::{storage::GraphStorage, ANNIS_NS, NODE_NAME}, + graph::{ANNIS_NS, NODE_NAME, storage::GraphStorage}, types::{AnnoKey, NodeID}, }; use lru::LruCache; @@ -254,8 +254,8 @@ mod tests { fn tiger_doc_name_sort_strcoll() { use std::time::Duration; - std::env::set_var("LANG", "en_US.utf8"); unsafe { + std::env::set_var("LANG", "en_US.utf8"); let locale = CString::new("").unwrap_or_default(); libc::setlocale(libc::LC_COLLATE, locale.as_ptr()); std::thread::sleep(Duration::from_millis(500)); diff --git a/graphannis/src/annis/db/token_helper.rs b/graphannis/src/annis/db/token_helper.rs index 1bfd4954d..d896042b5 100644 --- a/graphannis/src/annis/db/token_helper.rs +++ b/graphannis/src/annis/db/token_helper.rs @@ -1,8 +1,8 @@ use crate::{ + AnnotationGraph, annis::db::aql::model::{AnnotationComponentType, TOKEN_KEY}, errors::Result, graph::GraphStorage, - AnnotationGraph, }; use graphannis_core::{ annostorage::NodeAnnotationStorage, diff --git a/graphannis/src/annis/errors.rs b/graphannis/src/annis/errors.rs index 873852630..09bc8071b 100644 --- a/graphannis/src/annis/errors.rs +++ b/graphannis/src/annis/errors.rs @@ -81,6 +81,8 @@ pub enum GraphAnnisError { IndexOutOfBounds(usize), #[error("The annotation graph that shall be queried is not fully loaded.")] QueriedGraphNotFullyLoaded, + #[error(transparent)] + InvalidUniformDistribution(#[from] rand::distr::uniform::Error), } impl From> for GraphAnnisError { diff --git a/graphannis/src/annis/operator.rs b/graphannis/src/annis/operator.rs index d3955055c..2ee776cd1 100644 --- a/graphannis/src/annis/operator.rs +++ b/graphannis/src/annis/operator.rs @@ -1,5 +1,5 @@ use super::db::{aql::model::AnnotationComponentType, exec::CostEstimate}; -use crate::{errors::Result, graph::Match, AnnotationGraph}; +use crate::{AnnotationGraph, errors::Result, graph::Match}; use graphannis_core::{annostorage::EdgeAnnotationStorage, types::Component}; use std::{collections::HashSet, fmt::Display}; @@ -31,29 +31,21 @@ pub enum EdgeAnnoSearchSpec { impl std::fmt::Display for EdgeAnnoSearchSpec { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { - EdgeAnnoSearchSpec::ExactValue { - ref ns, - ref name, - ref val, - } => { - let qname = if let Some(ref ns) = ns { + EdgeAnnoSearchSpec::ExactValue { ns, name, val } => { + let qname = if let Some(ns) = ns { format!("{}:{}", ns, name) } else { name.clone() }; - if let Some(ref val) = val { + if let Some(val) = val { write!(f, "{}=\"{}\"", qname, val) } else { write!(f, "{}", qname) } } - EdgeAnnoSearchSpec::NotExactValue { - ref ns, - ref name, - ref val, - } => { - let qname = if let Some(ref ns) = ns { + EdgeAnnoSearchSpec::NotExactValue { ns, name, val } => { + let qname = if let Some(ns) = ns { format!("{}:{}", ns, name) } else { name.clone() @@ -61,12 +53,8 @@ impl std::fmt::Display for EdgeAnnoSearchSpec { write!(f, "{}!=\"{}\"", qname, val) } - EdgeAnnoSearchSpec::RegexValue { - ref ns, - ref name, - ref val, - } => { - let qname = if let Some(ref ns) = ns { + EdgeAnnoSearchSpec::RegexValue { ns, name, val } => { + let qname = if let Some(ns) = ns { format!("{}:{}", ns, name) } else { name.clone() @@ -74,12 +62,8 @@ impl std::fmt::Display for EdgeAnnoSearchSpec { write!(f, "{}=/{}/", qname, val) } - EdgeAnnoSearchSpec::NotRegexValue { - ref ns, - ref name, - ref val, - } => { - let qname = if let Some(ref ns) = ns { + EdgeAnnoSearchSpec::NotRegexValue { ns, name, val } => { + let qname = if let Some(ns) = ns { format!("{}:{}", ns, name) } else { name.clone() @@ -94,11 +78,7 @@ impl std::fmt::Display for EdgeAnnoSearchSpec { impl EdgeAnnoSearchSpec { pub fn guess_max_count(&self, anno_storage: &dyn EdgeAnnotationStorage) -> Result { match self { - EdgeAnnoSearchSpec::ExactValue { - ref ns, - ref name, - ref val, - } => { + EdgeAnnoSearchSpec::ExactValue { ns, name, val } => { let result = if let Some(val) = val { anno_storage.guess_max_count(ns.as_ref().map(String::as_str), name, val, val)? } else { @@ -107,11 +87,7 @@ impl EdgeAnnoSearchSpec { }; Ok(result) } - EdgeAnnoSearchSpec::NotExactValue { - ref ns, - ref name, - ref val, - } => { + EdgeAnnoSearchSpec::NotExactValue { ns, name, val } => { let total = anno_storage .number_of_annotations_by_name(ns.as_ref().map(String::as_str), name)?; let result = total @@ -123,20 +99,10 @@ impl EdgeAnnoSearchSpec { )?; Ok(result) } - EdgeAnnoSearchSpec::RegexValue { - ref ns, - ref name, - ref val, - } => Ok(anno_storage.guess_max_count_regex( - ns.as_ref().map(String::as_str), - name, - val, - )?), - EdgeAnnoSearchSpec::NotRegexValue { - ref ns, - ref name, - ref val, - } => { + EdgeAnnoSearchSpec::RegexValue { ns, name, val } => Ok( + anno_storage.guess_max_count_regex(ns.as_ref().map(String::as_str), name, val)? + ), + EdgeAnnoSearchSpec::NotRegexValue { ns, name, val } => { let total = anno_storage .number_of_annotations_by_name(ns.as_ref().map(String::as_str), name)?; let result = total diff --git a/graphannis/src/annis/util/quicksort.rs b/graphannis/src/annis/util/quicksort.rs index eb54a736e..5e36e30da 100644 --- a/graphannis/src/annis/util/quicksort.rs +++ b/graphannis/src/annis/util/quicksort.rs @@ -109,11 +109,11 @@ where if (item_range.end - item_range.start) == 1 { Ok(item_range.start) } else { - let mut rng = rand::thread_rng(); + let mut rng = rand::rng(); // Use the median of 3 random positions as pivot - let i1 = rng.gen_range(item_range.clone()); - let i2 = rng.gen_range(item_range.clone()); - let i3 = rng.gen_range(item_range.clone()); + let i1 = rng.random_range(item_range.clone()); + let i2 = rng.random_range(item_range.clone()); + let i3 = rng.random_range(item_range.clone()); let v1 = (i1, items.try_get(i1)?.into_owned()); let v2 = (i2, items.try_get(i2)?.into_owned()); @@ -177,9 +177,8 @@ where mod test { use rand; - use rand::distributions::Distribution; - use rand::Rng; - use serde::{de::DeserializeOwned, Serialize}; + use rand::prelude::*; + use serde::{Serialize, de::DeserializeOwned}; use transient_btree_index::{BtreeConfig, BtreeIndex}; fn index_from_vec(items: Vec) -> BtreeIndex @@ -216,7 +215,9 @@ mod test { let num_items = items.len(); super::sort_first_n_items(&mut items, num_items, |x, y| Ok(x.cmp(y))).unwrap(); assert_eq!( - vec![1, 1, 3, 4, 4, 5, 5, 5, 10, 10, 10, 20, 23, 32, 42, 42, 56, 99, 100, 101, 202], + vec![ + 1, 1, 3, 4, 4, 5, 5, 5, 10, 10, 10, 20, 23, 32, 42, 42, 56, 99, 100, 101, 202 + ], items ); @@ -250,12 +251,12 @@ mod test { #[test] fn random_sort_vec() { // compare 100 random arrays against the standard library sort - let mut rng = rand::thread_rng(); - let random_item_gen = rand::distributions::Uniform::from(1..100); + let mut rng = rand::rng(); + let random_item_gen = rand::distr::Uniform::new(1, 100).unwrap(); for _i in 0..100 { // the arrays should have a size from 40 to 50 - let items_size = rng.gen_range(40..51); + let items_size = rng.random_range(40..51); let mut items = Vec::with_capacity(items_size); for _j in 0..items_size { items.push(random_item_gen.sample(&mut rng)); @@ -276,7 +277,9 @@ mod test { let num_items = items.len(); super::sort_first_n_items(&mut items, num_items, |x, y| Ok(x.cmp(y))).unwrap(); assert_eq!( - vec![1, 1, 3, 4, 4, 5, 5, 5, 10, 10, 10, 20, 23, 32, 42, 42, 56, 99, 100, 101, 202], + vec![ + 1, 1, 3, 4, 4, 5, 5, 5, 10, 10, 10, 20, 23, 32, 42, 42, 56, 99, 100, 101, 202 + ], index_to_vec(items) ); @@ -312,12 +315,12 @@ mod test { #[test] fn random_sort_btree() { // compare 100 random arrays against the standard library sort - let mut rng = rand::thread_rng(); - let random_item_gen = rand::distributions::Uniform::from(1..100); + let mut rng = rand::rng(); + let random_item_gen = rand::distr::Uniform::new(1, 100).unwrap(); for _i in 0..100 { // the arrays should have a size from 40 to 50 - let items_size = rng.gen_range(40..51); + let items_size = rng.random_range(40..51); let mut items = BtreeIndex::with_capacity(BtreeConfig::default(), items_size).unwrap(); let mut items_vec = Vec::new(); for j in 0..items_size { diff --git a/graphannis/src/annis/util/sortablecontainer.rs b/graphannis/src/annis/util/sortablecontainer.rs index 2b77ad7c2..a180283a4 100644 --- a/graphannis/src/annis/util/sortablecontainer.rs +++ b/graphannis/src/annis/util/sortablecontainer.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use crate::annis::errors::{GraphAnnisError, Result}; -use serde::{de::DeserializeOwned, Serialize}; +use serde::{Serialize, de::DeserializeOwned}; pub trait SortableContainer: Send { /// Swaps two elements in the container. diff --git a/graphannis/src/lib.rs b/graphannis/src/lib.rs index b10706d5b..fee18146b 100644 --- a/graphannis/src/lib.rs +++ b/graphannis/src/lib.rs @@ -77,7 +77,7 @@ pub mod errors { /// Utility functions. pub mod util { + pub use crate::annis::util::SearchDef; pub use crate::annis::util::get_queries_from_csv; pub use crate::annis::util::node_names_from_match; - pub use crate::annis::util::SearchDef; } diff --git a/graphannis/tests/searchtest.rs b/graphannis/tests/searchtest.rs index 5157e254a..4c44966ee 100644 --- a/graphannis/tests/searchtest.rs +++ b/graphannis/tests/searchtest.rs @@ -2,8 +2,8 @@ extern crate graphannis; #[macro_use] extern crate lazy_static; -use graphannis::corpusstorage::{QueryLanguage, SearchQuery}; use graphannis::CorpusStorage; +use graphannis::corpusstorage::{QueryLanguage, SearchQuery}; use std::path::PathBuf; @@ -288,7 +288,12 @@ fn exclude_optional_node_in_between() { .unwrap(); // Only node #1 and #3 should be part of the output - assert_eq!(vec!["ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321"], result); + assert_eq!( + vec![ + "ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321" + ], + result + ); } #[ignore] @@ -312,7 +317,12 @@ fn exclude_optional_node_at_end() { .unwrap(); // Only node #1 and #2 should be part of the output - assert_eq!(vec!["ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321"], result); + assert_eq!( + vec![ + "ref::entity::GUM/GUM_interview_ants#referent_291 ref::infstat::GUM/GUM_interview_ants#referent_321" + ], + result + ); } #[ignore] diff --git a/webservice/Cargo.toml b/webservice/Cargo.toml index 70dff4258..4d8db25d9 100644 --- a/webservice/Cargo.toml +++ b/webservice/Cargo.toml @@ -1,7 +1,7 @@ [package] authors = ["Thomas Krause "] description = "This is a web service to the new backend implementation of the ANNIS linguistic search and visualization system." -edition = "2018" +edition = "2024" license = "Apache-2.0" name = "graphannis-webservice" readme = "crate-info.md" diff --git a/webservice/src/api/administration.rs b/webservice/src/api/administration.rs index ed30a5331..8d6b5e82b 100644 --- a/webservice/src/api/administration.rs +++ b/webservice/src/api/administration.rs @@ -1,9 +1,9 @@ use super::check_is_admin; use crate::{ - actions, errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings, DbPool, + DbPool, actions, errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings, }; use actix_files::NamedFile; -use actix_web::{web, HttpRequest, HttpResponse}; +use actix_web::{HttpRequest, HttpResponse, web}; use futures::prelude::*; use graphannis::CorpusStorage; use std::io::Seek; @@ -144,29 +144,29 @@ pub async fn import_corpus( |status| { info!("Job {} update: {}", &id_as_string, status); // Add status report to background job messages - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - j.messages.push(status.to_string()); - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + j.messages.push(status.to_string()); } }, ); match import_result { Ok(corpora) => { - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - j.messages.push(format!("imported corpora {:?}", corpora)); - j.status = JobStatus::Finished(None); - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + j.messages.push(format!("imported corpora {:?}", corpora)); + j.status = JobStatus::Finished(None); } } Err(err) => { - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - j.messages - .push(format!("importing corpora failed: {:?}", err)); - j.status = JobStatus::Failed; - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + j.messages + .push(format!("importing corpora failed: {:?}", err)); + j.status = JobStatus::Failed; } } } @@ -203,10 +203,10 @@ fn export_corpus_background_taks( cs.export_to_zip(corpus_name, use_corpus_subdirectory, &mut zip, |status| { info!("Job {} update: {}", &id_as_string, status); // Add status report to background job messages - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - j.messages.push(status.to_string()); - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + j.messages.push(status.to_string()); } })?; } @@ -240,20 +240,20 @@ pub async fn export_corpus( std::thread::spawn(move || { match export_corpus_background_taks(¶ms.corpora, &cs, id, background_jobs.clone()) { Ok(tmp_file) => { - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - let created_file_name = params.corpora.join("_") + ".zip"; - j.status = JobStatus::Finished(Some((tmp_file, created_file_name))); - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + let created_file_name = params.corpora.join("_") + ".zip"; + j.status = JobStatus::Finished(Some((tmp_file, created_file_name))); } } Err(err) => { - if let Ok(mut jobs) = background_jobs.jobs.lock() { - if let Some(j) = jobs.get_mut(&id) { - j.messages - .push(format!("exporting corpora failed: {:?}", err)); - j.status = JobStatus::Failed; - } + if let Ok(mut jobs) = background_jobs.jobs.lock() + && let Some(j) = jobs.get_mut(&id) + { + j.messages + .push(format!("exporting corpora failed: {:?}", err)); + j.status = JobStatus::Failed; } } } @@ -275,11 +275,11 @@ pub async fn jobs( let uuid = uuid::Uuid::parse_str(&uuid)?; let mut jobs = background_jobs.jobs.lock()?; - if let Some(j) = jobs.get(&uuid) { - if let JobStatus::Running = j.status { - // Job still running, do not remove it from the job list - return Ok(HttpResponse::Accepted().json(j)); - } + if let Some(j) = jobs.get(&uuid) + && let JobStatus::Running = j.status + { + // Job still running, do not remove it from the job list + return Ok(HttpResponse::Accepted().json(j)); } // Job is finished/errored: remove it from the list and process it if let Some(j) = jobs.remove(&uuid) { diff --git a/webservice/src/api/corpora.rs b/webservice/src/api/corpora.rs index 1637ee223..604aa1367 100644 --- a/webservice/src/api/corpora.rs +++ b/webservice/src/api/corpora.rs @@ -1,13 +1,13 @@ use super::{check_corpora_authorized_read, check_is_admin}; use crate::{ - actions, errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings, DbPool, + DbPool, actions, errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings, }; use actix_files::NamedFile; -use actix_web::{web, HttpResponse}; +use actix_web::{HttpResponse, web}; use graphannis::{ - corpusstorage::QueryLanguage, graph, model::AnnotationComponentType, CorpusStorage, + CorpusStorage, corpusstorage::QueryLanguage, graph, model::AnnotationComponentType, }; -use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS}; +use percent_encoding::{AsciiSet, CONTROLS, utf8_percent_encode}; use std::{borrow::Cow, path::PathBuf}; pub const PATH_SEGMENT_ENCODE_SET: &AsciiSet = &CONTROLS diff --git a/webservice/src/api/mod.rs b/webservice/src/api/mod.rs index 7ed1c768b..02f793e52 100644 --- a/webservice/src/api/mod.rs +++ b/webservice/src/api/mod.rs @@ -1,4 +1,4 @@ -use crate::{actions, auth::Claims, errors::ServiceError, settings::Settings, DbPool}; +use crate::{DbPool, actions, auth::Claims, errors::ServiceError, settings::Settings}; use actix_web::web; pub mod administration; diff --git a/webservice/src/api/search.rs b/webservice/src/api/search.rs index 999e0093b..c283cbe91 100644 --- a/webservice/src/api/search.rs +++ b/webservice/src/api/search.rs @@ -1,15 +1,15 @@ use std::time::Duration; use super::check_corpora_authorized_read; -use crate::{errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings, DbPool}; +use crate::{DbPool, errors::ServiceError, extractors::ClaimsFromAuth, settings::Settings}; use actix_web::{ - web::{self, Bytes}, HttpResponse, + web::{self, Bytes}, }; use futures::stream::iter; use graphannis::{ - corpusstorage::{FrequencyDefEntry, QueryLanguage, ResultOrder, SearchQuery}, CorpusStorage, + corpusstorage::{FrequencyDefEntry, QueryLanguage, ResultOrder, SearchQuery}, }; use serde::Deserialize; diff --git a/webservice/src/errors.rs b/webservice/src/errors.rs index 26b034ca1..17b5326b1 100644 --- a/webservice/src/errors.rs +++ b/webservice/src/errors.rs @@ -1,8 +1,8 @@ use std::sync::PoisonError; use actix_web::{ - error::{BlockingError, ResponseError}, HttpResponse, + error::{BlockingError, ResponseError}, }; use graphannis::errors::{AQLError, GraphAnnisError}; use graphannis_core::errors::GraphAnnisCoreError; @@ -50,9 +50,7 @@ enum BadRequestError { impl ResponseError for ServiceError { fn error_response(&self) -> HttpResponse { match self { - ServiceError::InvalidJWTToken(ref message) => { - HttpResponse::Unauthorized().json(message) - } + ServiceError::InvalidJWTToken(message) => HttpResponse::Unauthorized().json(message), ServiceError::NonAuthorizedCorpus(corpora) => HttpResponse::Forbidden().json(format!( "Not authorized to access corpus/corpora {}", corpora.join(", ") diff --git a/webservice/src/extractors.rs b/webservice/src/extractors.rs index ad14165e6..c73a73eb5 100644 --- a/webservice/src/extractors.rs +++ b/webservice/src/extractors.rs @@ -1,6 +1,6 @@ use crate::{auth::Claims, errors::ServiceError, settings::Settings}; -use actix_web::{web, FromRequest}; -use futures::future::{err, ok, ready, Ready}; +use actix_web::{FromRequest, web}; +use futures::future::{Ready, err, ok, ready}; #[derive(Debug, Clone, Serialize)] pub struct ClaimsFromAuth(pub Claims); @@ -26,21 +26,21 @@ impl FromRequest for ClaimsFromAuth { req: &actix_web::HttpRequest, _payload: &mut actix_web::dev::Payload, ) -> Self::Future { - if let Some(settings) = req.app_data::>() { - if let Some(authen_header) = req.headers().get("Authorization") { - // Parse header - if let Ok(authen_str) = authen_header.to_str() { - if authen_str.starts_with("bearer") || authen_str.starts_with("Bearer") { - // Parse and verify token - let token = authen_str[6..authen_str.len()].trim(); - return match verify_token(token, settings) { - // Use the verified claim - Ok(claim) => ok(ClaimsFromAuth(claim)), - // If a token was given but invalid, report an error - Err(e) => err(e), - }; - } - } + if let Some(settings) = req.app_data::>() + && let Some(authen_header) = req.headers().get("Authorization") + { + // Parse header + if let Ok(authen_str) = authen_header.to_str() + && (authen_str.starts_with("bearer") || authen_str.starts_with("Bearer")) + { + // Parse and verify token + let token = authen_str[6..authen_str.len()].trim(); + return match verify_token(token, settings) { + // Use the verified claim + Ok(claim) => ok(ClaimsFromAuth(claim)), + // If a token was given but invalid, report an error + Err(e) => err(e), + }; } } diff --git a/webservice/src/main.rs b/webservice/src/main.rs index da64ea35a..e6e7cde5f 100644 --- a/webservice/src/main.rs +++ b/webservice/src/main.rs @@ -17,14 +17,14 @@ use ::r2d2::Pool; use actix_cors::Cors; use actix_web::body::MessageBody; use actix_web::dev::{ServiceFactory, ServiceRequest, ServiceResponse}; -use actix_web::{http, middleware::Logger, web, App, HttpRequest, HttpResponse, HttpServer}; +use actix_web::{App, HttpRequest, HttpResponse, HttpServer, http, middleware::Logger, web}; use administration::BackgroundJobs; use anyhow::bail; use api::administration; use clap::Arg; use diesel::prelude::*; use diesel::r2d2::{self, ConnectionManager}; -use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness}; +use diesel_migrations::{EmbeddedMigrations, MigrationHarness, embed_migrations}; use graphannis::CorpusStorage; use log::{set_boxed_logger, set_max_level}; use settings::Settings; @@ -81,7 +81,9 @@ fn init_app_state() -> anyhow::Result<(graphannis::CorpusStorage, settings::Sett let (logger, fallback_logger) = create_logger(&settings)?; let log_level = logger.level(); if let Err(e) = set_boxed_logger(logger) { - println!("Error, can't initialize the terminal log output: {e}.\nWill degrade to a more simple logger"); + println!( + "Error, can't initialize the terminal log output: {e}.\nWill degrade to a more simple logger" + ); if let Err(e_simple) = set_boxed_logger(fallback_logger) { println!("Simple logging failed too: {e_simple}"); } diff --git a/webservice/src/tests.rs b/webservice/src/tests.rs index c4c615bcf..41f3d4ff2 100644 --- a/webservice/src/tests.rs +++ b/webservice/src/tests.rs @@ -4,16 +4,16 @@ use std::{ }; use actix_web::{ + App, body::MessageBody, dev::{ServiceFactory, ServiceRequest, ServiceResponse}, http::StatusCode, test, web::{self, Bytes}, - App, }; -use diesel::{r2d2::ConnectionManager, SqliteConnection}; +use diesel::{SqliteConnection, r2d2::ConnectionManager}; use diesel_migrations::MigrationHarness; -use graphannis::{corpusstorage::ImportFormat, CorpusStorage}; +use graphannis::{CorpusStorage, corpusstorage::ImportFormat}; use insta::assert_snapshot; use jsonwebtoken::EncodingKey; use log::{Level, Log, RecordBuilder};