From 17c783b4db36179de8544241aa92df27f5cef22e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Sun, 11 Jun 2023 11:49:27 +0200 Subject: [PATCH 1/3] Align numerical type priority order on the search side. --- src/core/json_utils.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/json_utils.rs b/src/core/json_utils.rs index 0dec432d89..a02d86293d 100644 --- a/src/core/json_utils.rs +++ b/src/core/json_utils.rs @@ -212,12 +212,12 @@ pub fn convert_to_fast_value_and_get_term( DateTime::from_utc(dt_utc), )); } - if let Ok(u64_val) = str::parse::(phrase) { - return Some(set_fastvalue_and_get_term(json_term_writer, u64_val)); - } if let Ok(i64_val) = str::parse::(phrase) { return Some(set_fastvalue_and_get_term(json_term_writer, i64_val)); } + if let Ok(u64_val) = str::parse::(phrase) { + return Some(set_fastvalue_and_get_term(json_term_writer, u64_val)); + } if let Ok(f64_val) = str::parse::(phrase) { return Some(set_fastvalue_and_get_term(json_term_writer, f64_val)); } From 0cb53207ec561d40bc850365ffd6310412ee82aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Sun, 11 Jun 2023 12:13:35 +0200 Subject: [PATCH 2/3] Fix tests. --- src/query/query_parser/query_parser.rs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 72a735848b..4a8b864699 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -1203,7 +1203,7 @@ mod test { fn test_json_field_possibly_a_number() { test_parse_query_to_logical_ast_helper( "json.titi:5", - r#"(Term(field=14, type=Json, path=titi, type=U64, 5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#, + r#"(Term(field=14, type=Json, path=titi, type=I64, 5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#, true, ); test_parse_query_to_logical_ast_helper( @@ -1211,6 +1211,11 @@ mod test { r#"(Term(field=14, type=Json, path=titi, type=I64, -5) Term(field=14, type=Json, path=titi, type=Str, "5"))"#, //< Yes this is a bit weird after going through the tokenizer we lose the "-". true, ); + test_parse_query_to_logical_ast_helper( + "json.titi:10000000000000000000", + r#"(Term(field=14, type=Json, path=titi, type=U64, 10000000000000000000) Term(field=14, type=Json, path=titi, type=Str, "10000000000000000000"))"#, + true, + ); test_parse_query_to_logical_ast_helper( "json.titi:-5.2", r#"(Term(field=14, type=Json, path=titi, type=F64, -5.2) "[(0, Term(field=14, type=Json, path=titi, type=Str, "5")), (1, Term(field=14, type=Json, path=titi, type=Str, "2"))]")"#, @@ -1260,7 +1265,7 @@ mod test { fn test_json_default() { test_query_to_logical_ast_with_default_json( "titi:4", - "(Term(field=14, type=Json, path=titi, type=U64, 4) Term(field=14, type=Json, \ + "(Term(field=14, type=Json, path=titi, type=I64, 4) Term(field=14, type=Json, \ path=titi, type=Str, \"4\"))", false, ); @@ -1282,7 +1287,7 @@ mod test { for conjunction in [false, true] { test_query_to_logical_ast_with_default_json( "json:4", - r#"(Term(field=14, type=Json, path=, type=U64, 4) Term(field=14, type=Json, path=, type=Str, "4"))"#, + r#"(Term(field=14, type=Json, path=, type=I64, 4) Term(field=14, type=Json, path=, type=Str, "4"))"#, conjunction, ); } From 07023948aa619d1526a7e473104bebfcc642f97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Massot?= Date: Sun, 11 Jun 2023 15:05:28 +0200 Subject: [PATCH 3/3] Add test that indexes and searches a JSON field. --- src/lib.rs | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index bba4846cc7..ce4f2a31b8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -302,6 +302,7 @@ pub struct DocAddress { #[cfg(test)] pub mod tests { use common::{BinarySerializable, FixedSize}; + use query_grammar::{UserInputAst, UserInputLeaf, UserInputLiteral}; use rand::distributions::{Bernoulli, Uniform}; use rand::rngs::StdRng; use rand::{Rng, SeedableRng}; @@ -857,6 +858,95 @@ pub mod tests { Ok(()) } + #[test] + fn test_searcher_on_json_field_with_type_inference() { + // When indexing and searching a json value, we infer its type. + // This tests aims to check the type infereence is consistent between indexing and search. + // Inference order is date, i64, u64, f64, bool. + let mut schema_builder = Schema::builder(); + let json_field = schema_builder.add_json_field("json", STORED | TEXT); + let schema = schema_builder.build(); + let json_val: serde_json::Map = serde_json::from_str( + r#"{ + "signed": 2, + "float": 2.0, + "unsigned": 10000000000000, + "date": "1985-04-12T23:20:50.52Z", + "bool": true + }"#, + ) + .unwrap(); + let doc = doc!(json_field=>json_val.clone()); + let index = Index::create_in_ram(schema.clone()); + let mut writer = index.writer_for_tests().unwrap(); + writer.add_document(doc).unwrap(); + writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let get_doc_ids = |user_input_literal: UserInputLiteral| { + let query_parser = crate::query::QueryParser::for_index(&index, Vec::new()); + let query = query_parser + .build_query_from_user_input_ast(UserInputAst::from(UserInputLeaf::Literal( + user_input_literal, + ))) + .unwrap(); + searcher + .search(&query, &TEST_COLLECTOR_WITH_SCORE) + .map(|topdocs| topdocs.docs().to_vec()) + .unwrap() + }; + { + let user_input_literal = UserInputLiteral { + field_name: Some("json.signed".to_string()), + phrase: "2".to_string(), + delimiter: crate::query_grammar::Delimiter::None, + slop: 0, + prefix: false, + }; + assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]); + } + { + let user_input_literal = UserInputLiteral { + field_name: Some("json.float".to_string()), + phrase: "2.0".to_string(), + delimiter: crate::query_grammar::Delimiter::None, + slop: 0, + prefix: false, + }; + assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]); + } + { + let user_input_literal = UserInputLiteral { + field_name: Some("json.date".to_string()), + phrase: "1985-04-12T23:20:50.52Z".to_string(), + delimiter: crate::query_grammar::Delimiter::None, + slop: 0, + prefix: false, + }; + assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]); + } + { + let user_input_literal = UserInputLiteral { + field_name: Some("json.unsigned".to_string()), + phrase: "10000000000000".to_string(), + delimiter: crate::query_grammar::Delimiter::None, + slop: 0, + prefix: false, + }; + assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]); + } + { + let user_input_literal = UserInputLiteral { + field_name: Some("json.bool".to_string()), + phrase: "true".to_string(), + delimiter: crate::query_grammar::Delimiter::None, + slop: 0, + prefix: false, + }; + assert_eq!(get_doc_ids(user_input_literal), vec![DocAddress::new(0, 0)]); + } + } + #[test] fn test_doc_macro() { let mut schema_builder = Schema::builder();