From 3487666ee4405f814a4f79e4339d9654efdc010c Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 8 Jul 2019 15:16:13 -0700 Subject: [PATCH 01/19] Reorder functions and remove excess clones --- src/ast.rs | 168 ++++++++++++++++++++++++++--------------------------- 1 file changed, 84 insertions(+), 84 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 638b5f5..83d5a72 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -286,17 +286,6 @@ impl Tag { } } - pub fn fully_qualified_name(&self) -> String { - let name = match &self.name { - Some(name) => name.clone(), - None => "__unknown__".to_string(), - }; - match &self.namespace { - Some(ns) => format!("{}.{}", ns, name), - None => name, - } - } - pub fn is_null(&self) -> bool { match self.data_type { Type::Null => true, @@ -332,14 +321,83 @@ impl Tag { } } - fn fill_names(&mut self, name: String, namespace: String) { - self.name = Some(name); + pub fn fully_qualified_name(&self) -> String { + let name = match &self.name { + Some(name) => name.clone(), + None => "__unknown__".to_string(), + }; + match &self.namespace { + Some(ns) => format!("{}.{}", ns, name), + None => name, + } + } + + /// Set a fully qualified name to a tag with references to the name and the + /// namespace. + fn set_name(&mut self, name: &str, namespace: &str) { + self.name = Some(name.to_string()); if !namespace.is_empty() { - self.namespace = Some(namespace); + self.namespace = Some(namespace.to_string()); + } + } + + /// Renames a column name so it contains only letters, numbers, and + /// underscores while starting with a letter or underscore. This requirement + /// is enforced by BigQuery during table creation. + fn rename_string_bigquery(string: &str) -> Option { + let re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); + let mut renamed = string.replace(".", "_").replace("-", "_"); + if renamed.chars().next().unwrap().is_numeric() { + renamed = format!("_{}", renamed); + }; + if re.is_match(&renamed) { + Some(renamed) + } else { + None + } + } + + /// Fix properties of an object to adhere the BigQuery column name + /// specification. + /// + /// This modifies field names as well as required fields. + /// See: https://cloud.google.com/bigquery/docs/schemas + pub fn fix_properties(&mut self) { + if let Type::Object(ref mut object) = self.data_type { + let fields = &mut object.fields; + let keys: Vec = fields.keys().cloned().collect(); + for key in keys { + if let Some(renamed) = Tag::rename_string_bigquery(&key) { + if renamed.as_str() != key.as_str() { + warn!("{} replaced with {}", key, renamed); + fields.insert(renamed.clone(), fields[&key].clone()); + fields.remove(&key.clone()); + } + } else { + warn!( + "{} is not a valid property name and will not be included", + key + ); + fields.remove(&key.clone()); + } + } + object.required = match &object.required { + Some(required) => { + let renamed: HashSet = required + .iter() + .map(String::as_str) + .map(Tag::rename_string_bigquery) + .filter(Option::is_some) + .map(Option::unwrap) + .collect(); + Some(renamed) + } + None => None, + }; } } - fn infer_name_helper(&mut self, namespace: String) { + fn recurse_infer_name(&mut self, namespace: String) { // We remove invalid field names from the schema when we infer the names // for the schema (e.g. `$schema`). We also apply rules to make the // names consistent with BigQuery's naming scheme, like avoiding columns @@ -349,28 +407,26 @@ impl Tag { match &mut self.data_type { Type::Object(object) => { for (key, value) in object.fields.iter_mut() { - value.fill_names(key.to_string(), namespace.clone()); - value.infer_name_helper(format!("{}.{}", namespace, key)); + value.set_name(key, &namespace); + value.recurse_infer_name(format!("{}.{}", namespace, key)); } } Type::Map(map) => { - map.key.fill_names("key".into(), namespace.clone()); - map.value.fill_names("value".into(), namespace.clone()); - map.key - .infer_name_helper(format!("{}.key", namespace.clone())); - map.value - .infer_name_helper(format!("{}.value", namespace.clone())); + map.key.set_name("key", &namespace); + map.value.set_name("value", &namespace); + map.key.recurse_infer_name(format!("{}.key", &namespace)); + map.value.recurse_infer_name(format!("{}.value", &namespace)); } Type::Array(array) => { - array.items.fill_names("items".into(), namespace.clone()); + array.items.set_name("items", &namespace); array .items - .infer_name_helper(format!("{}.items", namespace.clone())); + .recurse_infer_name(format!("{}.items", &namespace)); } Type::Union(union) => { for item in union.items.iter_mut() { - item.fill_names("__union__".into(), namespace.clone()); - item.infer_name_helper(format!("{}.__union__", namespace.clone())); + item.set_name("__union__", &namespace); + item.recurse_infer_name(format!("{}.__union__", &namespace)); } } _ => (), @@ -383,7 +439,7 @@ impl Tag { Some(name) => name.clone(), None => "".into(), }; - self.infer_name_helper(namespace); + self.recurse_infer_name(namespace); } /// These rules are primarily focused on BigQuery, although they should @@ -446,62 +502,6 @@ impl Tag { _ => (), } } - - /// Renames a column name so it contains only letters, numbers, and - /// underscores while starting with a letter or underscore. This requirement - /// is enforced by BigQuery during table creation. - fn rename_string_bigquery(string: &str) -> Option { - let re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); - let mut renamed = string.replace(".", "_").replace("-", "_"); - if renamed.chars().next().unwrap().is_numeric() { - renamed = format!("_{}", renamed); - }; - if re.is_match(&renamed) { - Some(renamed) - } else { - None - } - } - - /// Fix properties of an object to adhere the BigQuery column name - /// specification. - /// - /// This modifies field names as well as required fields. - /// See: https://cloud.google.com/bigquery/docs/schemas - pub fn fix_properties(&mut self) { - if let Type::Object(ref mut object) = self.data_type { - let fields = &mut object.fields; - let keys: Vec = fields.keys().cloned().collect(); - for key in keys { - if let Some(renamed) = Tag::rename_string_bigquery(&key) { - if renamed.as_str() != key.as_str() { - warn!("{} replaced with {}", key, renamed); - fields.insert(renamed.clone(), fields[&key].clone()); - fields.remove(&key.clone()); - } - } else { - warn!( - "{} is not a valid property name and will not be included", - key - ); - fields.remove(&key.clone()); - } - } - object.required = match &object.required { - Some(required) => { - let renamed: HashSet = required - .iter() - .map(String::as_str) - .map(Tag::rename_string_bigquery) - .filter(Option::is_some) - .map(Option::unwrap) - .collect(); - Some(renamed) - } - None => None, - }; - } - } } impl TranslateFrom for Tag { From 447d5612a231d8f24970724a10ed3987d7d1ba92 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 8 Jul 2019 16:20:48 -0700 Subject: [PATCH 02/19] Rename normalize_properties and refactor recurse_infer_name --- src/ast.rs | 94 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 83d5a72..02ff11a 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -321,6 +321,27 @@ impl Tag { } } + /// Get the path the the current tag in the context of the larger schema. + /// + /// Each tag in the schema can be unambiguously referenced by concatenating + /// the name of tag with the tag's namespace. For example, a document may + /// contain a `timestamp` field nested under different sub-documents. + /// + /// ```json + /// { + /// "environment": { "timestamp": 64 }, + /// "payload": { + /// "measurement": 10, + /// "timestamp": 64 + /// } + /// } + /// ``` + /// + /// The fully qualified names are as follows: + /// + /// * `root.attributes.timestamp` + /// * `root.payload.measurement` + /// * `root.payload.timestamp` pub fn fully_qualified_name(&self) -> String { let name = match &self.name { Some(name) => name.clone(), @@ -332,8 +353,7 @@ impl Tag { } } - /// Set a fully qualified name to a tag with references to the name and the - /// namespace. + /// Sets a tag with references to the name and the namespace. fn set_name(&mut self, name: &str, namespace: &str) { self.name = Some(name.to_string()); if !namespace.is_empty() { @@ -344,7 +364,7 @@ impl Tag { /// Renames a column name so it contains only letters, numbers, and /// underscores while starting with a letter or underscore. This requirement /// is enforced by BigQuery during table creation. - fn rename_string_bigquery(string: &str) -> Option { + fn normalize_name_bigquery(string: &str) -> Option { let re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); let mut renamed = string.replace(".", "_").replace("-", "_"); if renamed.chars().next().unwrap().is_numeric() { @@ -360,33 +380,39 @@ impl Tag { /// Fix properties of an object to adhere the BigQuery column name /// specification. /// + /// This removes invalid field names from the schema when inferring the + /// names for the schema (e.g. `$schema`). It also applies rules to be + /// consistent with BigQuery's naming scheme, like avoiding columns that + /// start with a number. + /// /// This modifies field names as well as required fields. /// See: https://cloud.google.com/bigquery/docs/schemas - pub fn fix_properties(&mut self) { + pub fn normalize_properties(&mut self) { if let Type::Object(ref mut object) = self.data_type { let fields = &mut object.fields; let keys: Vec = fields.keys().cloned().collect(); + for key in keys { - if let Some(renamed) = Tag::rename_string_bigquery(&key) { + // Replace property names with the normalized property name + if let Some(renamed) = Tag::normalize_name_bigquery(&key) { if renamed.as_str() != key.as_str() { warn!("{} replaced with {}", key, renamed); - fields.insert(renamed.clone(), fields[&key].clone()); - fields.remove(&key.clone()); + fields.insert(renamed, fields[&key].clone()); + fields.remove(&key); } } else { - warn!( - "{} is not a valid property name and will not be included", - key - ); - fields.remove(&key.clone()); + warn!("Omitting {} - not a valid property name", key); + fields.remove(&key); } } + + // Replace the corresponding names in the required field object.required = match &object.required { Some(required) => { let renamed: HashSet = required .iter() .map(String::as_str) - .map(Tag::rename_string_bigquery) + .map(Tag::normalize_name_bigquery) .filter(Option::is_some) .map(Option::unwrap) .collect(); @@ -397,36 +423,35 @@ impl Tag { } } + /// A helper function for calculating the names and namespaces within the + /// schema. + /// + /// The namespaces are built from the top-down and follows the depth-first + /// traversal of the schema. fn recurse_infer_name(&mut self, namespace: String) { - // We remove invalid field names from the schema when we infer the names - // for the schema (e.g. `$schema`). We also apply rules to make the - // names consistent with BigQuery's naming scheme, like avoiding columns - // that start with a number. - self.fix_properties(); + self.normalize_properties(); + + let set_and_recurse = |tag: &mut Tag, name: &str| { + tag.set_name(name, &namespace); + tag.recurse_infer_name(format!("{}.{}", &namespace, name)) + }; match &mut self.data_type { Type::Object(object) => { for (key, value) in object.fields.iter_mut() { - value.set_name(key, &namespace); - value.recurse_infer_name(format!("{}.{}", namespace, key)); + set_and_recurse(value, key) } } Type::Map(map) => { - map.key.set_name("key", &namespace); - map.value.set_name("value", &namespace); - map.key.recurse_infer_name(format!("{}.key", &namespace)); - map.value.recurse_infer_name(format!("{}.value", &namespace)); + set_and_recurse(&mut map.key, "key"); + set_and_recurse(&mut map.value, "value"); } Type::Array(array) => { - array.items.set_name("items", &namespace); - array - .items - .recurse_infer_name(format!("{}.items", &namespace)); + set_and_recurse(&mut array.items, "items"); } Type::Union(union) => { for item in union.items.iter_mut() { - item.set_name("__union__", &namespace); - item.recurse_infer_name(format!("{}.__union__", &namespace)); + set_and_recurse(item, "__union__"); } } _ => (), @@ -442,6 +467,9 @@ impl Tag { self.recurse_infer_name(namespace); } + /// Infer whether the current tag in the schema allows for the value to be + /// null. + /// /// These rules are primarily focused on BigQuery, although they should /// translate into other schemas. This should be run after unions have been /// eliminated from the tree since the behavior is currently order @@ -479,7 +507,7 @@ impl Tag { } } - // An interface to collapse the schema of all unions + /// Factor out the shared parts of the union between two schemas. pub fn collapse(&mut self) { match &mut self.data_type { Type::Object(object) => { @@ -1136,7 +1164,7 @@ mod tests { } #[test] - fn test_tag_fix_properties() { + fn test_tag_normalize_properties() { let data = json!({ "type": { "object": { @@ -1153,7 +1181,7 @@ mod tests { "64bit", ]}}}); let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.fix_properties(); + tag.normalize_properties(); if let Type::Object(object) = &tag.data_type { let expected: HashSet = ["valid_name", "renamed_value_0", "_64bit"] .iter() From c577171dfedd0d83d4748b5f6b767cbfd5eafeb8 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 8 Jul 2019 17:03:10 -0700 Subject: [PATCH 03/19] Add `--normalize-case` and implement Default for Context --- build.rs | 16 ++- src/avro.rs | 2 +- src/bigquery.rs | 4 +- src/jsonschema.rs | 4 +- src/lib.rs | 9 +- src/main.rs | 7 ++ tests/resolve_method.rs | 6 + tests/transpile_avro.rs | 58 +++++----- tests/transpile_bigquery.rs | 213 ++++++++++++++++++++++++++---------- 9 files changed, 222 insertions(+), 97 deletions(-) diff --git a/build.rs b/build.rs index d5a2cf5..efecc1b 100644 --- a/build.rs +++ b/build.rs @@ -82,7 +82,7 @@ fn avro_{name}() {{ {expected} "#; let mut context = Context {{ - resolve_method: ResolveMethod::Cast, + ..Default::default() }}; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -109,7 +109,7 @@ fn write_bigquery_tests(mut outfile: &File, suite: &TestSuite) { for case in &suite.tests { let formatted = format!( r##" -#[test] +#[test]{should_panic} fn bigquery_{name}() {{ let input_data = r#" {input_data} @@ -117,15 +117,23 @@ fn bigquery_{name}() {{ let expected_data = r#" {expected} "#; - let context = Context {{ - resolve_method: ResolveMethod::Cast, + let mut context = Context {{ + ..Default::default() }}; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); }} "##, name = case.name, + should_panic = if case.compatible { + "" + } else { + "\n#[should_panic]" + }, input_data = format_json(case.test.json.clone()), expected = format_json(case.test.bigquery.clone()), ); diff --git a/src/avro.rs b/src/avro.rs index 52232af..3618efd 100644 --- a/src/avro.rs +++ b/src/avro.rs @@ -232,7 +232,7 @@ mod tests { fn assert_from_ast_eq(ast: Value, avro: Value) { let context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let tag: ast::Tag = serde_json::from_value(ast).unwrap(); let from_tag = Type::translate_from(tag, context).unwrap(); diff --git a/src/bigquery.rs b/src/bigquery.rs index d961eee..d468146 100644 --- a/src/bigquery.rs +++ b/src/bigquery.rs @@ -230,7 +230,7 @@ mod tests { fn transform_tag(data: Value) -> Value { let context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let ast_tag: ast::Tag = serde_json::from_value(data).unwrap(); let bq_tag: Tag = ast_tag.translate_into(context).unwrap(); @@ -239,7 +239,7 @@ mod tests { fn transform_schema(data: Value) -> Value { let context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let ast_tag: ast::Tag = serde_json::from_value(data).unwrap(); let bq_tag: Schema = ast_tag.translate_into(context).unwrap(); diff --git a/src/jsonschema.rs b/src/jsonschema.rs index 62c10fe..64ab1b3 100644 --- a/src/jsonschema.rs +++ b/src/jsonschema.rs @@ -226,13 +226,13 @@ impl Tag { #[cfg(test)] mod tests { use super::super::traits::TranslateInto; - use super::super::{Context, ResolveMethod}; + use super::super::Context; use super::*; use pretty_assertions::assert_eq; fn translate(data: Value) -> Value { let context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let schema: Tag = serde_json::from_value(data).unwrap(); let ast: ast::Tag = schema.translate_into(context).unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 7886428..1c15b46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,12 @@ pub enum ResolveMethod { Panic, } +impl Default for ResolveMethod { + fn default() -> Self { + ResolveMethod::Cast + } +} + /// Options for modifying the behavior of translating between two schema /// formats. /// @@ -49,9 +55,10 @@ pub enum ResolveMethod { /// particular, the context is useful for resolving edge-cases in ambiguous /// situations. This can includes situations like casting or dropping an empty /// object. -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Default)] pub struct Context { pub resolve_method: ResolveMethod, + pub normalize_case: bool, } fn into_ast(input: &Value, context: Context) -> ast::Tag { diff --git a/src/main.rs b/src/main.rs index 396104e..180138d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -37,6 +37,12 @@ fn main() { .possible_values(&["cast", "panic", "drop"]) .default_value("cast"), ) + .arg( + Arg::with_name("normalize-case") + .help("snake_case column-names for consistent behavior between SQL engines") + .short("c") + .long("normalize-case"), + ) .get_matches(); let reader: Box = match matches.value_of("file") { @@ -55,6 +61,7 @@ fn main() { "drop" => ResolveMethod::Drop, _ => panic!("Unknown resolution method!"), }, + ..Default::default() }; let output = match matches.value_of("type").unwrap() { diff --git a/tests/resolve_method.rs b/tests/resolve_method.rs index a2627a7..b86eb4c 100644 --- a/tests/resolve_method.rs +++ b/tests/resolve_method.rs @@ -21,6 +21,7 @@ fn test_data() -> Value { fn test_bigquery_resolve_error_cast() { let context = Context { resolve_method: ResolveMethod::Cast, + ..Default::default() }; let expected: Value = serde_json::from_str( r#" @@ -47,6 +48,7 @@ fn test_bigquery_resolve_error_cast() { fn test_bigquery_resolve_error_drop() { let context = Context { resolve_method: ResolveMethod::Drop, + ..Default::default() }; let expected: Value = serde_json::from_str( r#" @@ -68,6 +70,7 @@ fn test_bigquery_resolve_error_drop() { fn test_bigquery_resolve_error_panic() { let context = Context { resolve_method: ResolveMethod::Panic, + ..Default::default() }; convert_bigquery(&test_data(), context); } @@ -76,6 +79,7 @@ fn test_bigquery_resolve_error_panic() { fn test_avro_resolve_error_cast() { let context = Context { resolve_method: ResolveMethod::Cast, + ..Default::default() }; let expected: Value = serde_json::from_str( r#" @@ -112,6 +116,7 @@ fn test_avro_resolve_error_cast() { fn test_avro_resolve_error_drop() { let context = Context { resolve_method: ResolveMethod::Drop, + ..Default::default() }; let expected: Value = serde_json::from_str( r#" @@ -140,6 +145,7 @@ fn test_avro_resolve_error_drop() { fn test_avro_resolve_error_panic() { let context = Context { resolve_method: ResolveMethod::Panic, + ..Default::default() }; convert_avro(&test_data(), context); } diff --git a/tests/transpile_avro.rs b/tests/transpile_avro.rs index c86f47b..5615374 100644 --- a/tests/transpile_avro.rs +++ b/tests/transpile_avro.rs @@ -22,7 +22,7 @@ fn avro_test_array_with_atomics() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -87,7 +87,7 @@ fn avro_test_array_with_complex() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -110,7 +110,7 @@ fn avro_test_atomic() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -141,7 +141,7 @@ fn avro_test_atomic_with_null() { ] "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -168,7 +168,7 @@ fn avro_test_incompatible_atomic_multitype() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -201,7 +201,7 @@ fn avro_test_incompatible_atomic_multitype_with_null() { ] "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -225,7 +225,7 @@ fn avro_test_datetime() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -254,7 +254,7 @@ fn avro_test_map_with_atomics() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -319,7 +319,7 @@ fn avro_test_map_with_complex() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -351,7 +351,7 @@ fn avro_test_map_with_pattern_properties() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -385,7 +385,7 @@ fn avro_test_map_with_pattern_and_additional_properties() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -421,7 +421,7 @@ fn avro_test_incompatible_map_with_pattern_properties() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -456,7 +456,7 @@ fn avro_test_incompatible_map_with_pattern_and_additional_properties() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -544,7 +544,7 @@ fn avro_test_object_with_atomics_is_sorted() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -609,7 +609,7 @@ fn avro_test_object_with_atomics_required() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -683,7 +683,7 @@ fn avro_test_object_with_atomics_required_with_null() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -762,7 +762,7 @@ fn avro_test_object_with_complex() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -787,7 +787,7 @@ fn avro_test_object_empty_record() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -817,7 +817,7 @@ fn avro_test_oneof_atomic() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -852,7 +852,7 @@ fn avro_test_oneof_atomic_with_null() { ] "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -883,7 +883,7 @@ fn avro_test_incompatible_oneof_atomic() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -922,7 +922,7 @@ fn avro_test_incompatible_oneof_atomic_with_null() { ] "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -995,7 +995,7 @@ fn avro_test_oneof_object_with_atomics() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1080,7 +1080,7 @@ fn avro_test_oneof_object_merge() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1226,7 +1226,7 @@ fn avro_test_oneof_object_merge_with_complex() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1262,7 +1262,7 @@ fn avro_test_incompatible_oneof_atomic_and_object() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1303,7 +1303,7 @@ fn avro_test_incompatible_oneof_object() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1360,7 +1360,7 @@ fn avro_test_incompatible_oneof_object_with_complex() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); @@ -1445,7 +1445,7 @@ fn avro_test_oneof_object_merge_nullability() { } "#; let mut context = Context { - resolve_method: ResolveMethod::Cast, + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); diff --git a/tests/transpile_bigquery.rs b/tests/transpile_bigquery.rs index 70e9ebc..7146648 100644 --- a/tests/transpile_bigquery.rs +++ b/tests/transpile_bigquery.rs @@ -22,12 +22,15 @@ fn bigquery_test_array_with_atomics() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -69,12 +72,15 @@ fn bigquery_test_array_with_complex() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -93,12 +99,15 @@ fn bigquery_test_atomic() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -120,15 +129,19 @@ fn bigquery_test_atomic_with_null() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_atomic_multitype() { let input_data = r#" { @@ -147,15 +160,19 @@ fn bigquery_test_incompatible_atomic_multitype() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_atomic_multitype_with_null() { let input_data = r#" { @@ -175,12 +192,15 @@ fn bigquery_test_incompatible_atomic_multitype_with_null() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -200,12 +220,15 @@ fn bigquery_test_datetime() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -239,12 +262,15 @@ fn bigquery_test_map_with_atomics() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -298,12 +324,15 @@ fn bigquery_test_map_with_complex() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -340,12 +369,15 @@ fn bigquery_test_map_with_pattern_properties() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -384,15 +416,19 @@ fn bigquery_test_map_with_pattern_and_additional_properties() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_map_with_pattern_properties() { let input_data = r#" { @@ -429,15 +465,19 @@ fn bigquery_test_incompatible_map_with_pattern_properties() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_map_with_pattern_and_additional_properties() { let input_data = r#" { @@ -473,12 +513,15 @@ fn bigquery_test_incompatible_map_with_pattern_and_additional_properties() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -526,12 +569,15 @@ fn bigquery_test_object_with_atomics_is_sorted() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -575,12 +621,15 @@ fn bigquery_test_object_with_atomics_required() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -627,12 +676,15 @@ fn bigquery_test_object_with_atomics_required_with_null() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -676,15 +728,19 @@ fn bigquery_test_object_with_complex() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_object_empty_record() { let input_data = r#" { @@ -701,12 +757,15 @@ fn bigquery_test_object_empty_record() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -732,12 +791,15 @@ fn bigquery_test_oneof_atomic() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -763,15 +825,19 @@ fn bigquery_test_oneof_atomic_with_null() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_oneof_atomic() { let input_data = r#" { @@ -794,15 +860,19 @@ fn bigquery_test_incompatible_oneof_atomic() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_oneof_atomic_with_null() { let input_data = r#" { @@ -828,12 +898,15 @@ fn bigquery_test_incompatible_oneof_atomic_with_null() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -880,12 +953,15 @@ fn bigquery_test_oneof_object_with_atomics() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -937,12 +1013,15 @@ fn bigquery_test_oneof_object_merge() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -1032,15 +1111,19 @@ fn bigquery_test_oneof_object_merge_with_complex() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_oneof_atomic_and_object() { let input_data = r#" { @@ -1068,15 +1151,19 @@ fn bigquery_test_incompatible_oneof_atomic_and_object() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_oneof_object() { let input_data = r#" { @@ -1109,15 +1196,19 @@ fn bigquery_test_incompatible_oneof_object() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] +#[should_panic] fn bigquery_test_incompatible_oneof_object_with_complex() { let input_data = r#" { @@ -1166,12 +1257,15 @@ fn bigquery_test_incompatible_oneof_object_with_complex() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } #[test] @@ -1229,10 +1323,13 @@ fn bigquery_test_oneof_object_merge_nullability() { } ] "#; - let context = Context { - resolve_method: ResolveMethod::Cast, + let mut context = Context { + ..Default::default() }; let input: Value = serde_json::from_str(input_data).unwrap(); let expected: Value = serde_json::from_str(expected_data).unwrap(); assert_eq!(expected, convert_bigquery(&input, context)); + + context.resolve_method = ResolveMethod::Panic; + convert_bigquery(&input, context); } From 80a25051799b4bd69ba6b4d9be2dcf1fd341d220 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 8 Jul 2019 17:25:01 -0700 Subject: [PATCH 04/19] Add failing test for normalizing casing --- tests/normalize_case.rs | 101 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tests/normalize_case.rs diff --git a/tests/normalize_case.rs b/tests/normalize_case.rs new file mode 100644 index 0000000..a8e5c9e --- /dev/null +++ b/tests/normalize_case.rs @@ -0,0 +1,101 @@ +use jst::{convert_avro, convert_bigquery}; +use jst::{Context, ResolveMethod}; +use pretty_assertions::assert_eq; +use serde_json::Value; + +fn test_data() -> Value { + serde_json::from_str( + r#" + { + "type": "object", + "properties": { + "test_snake_case": {"type": "boolean"}, + "testCamelCase": {"type": "boolean"}, + "TestPascalCase": {"type": "boolean"}, + "TEST_SCREAMING_SNAKE_CASE": {"type": "boolean"} + }, + "required": [ + "test_snake_case", + "testCamelCase", + "TestPascalCase", + "TEST_SCREAMING_SNAKE_CASE" + ] + } + "#, + ) + .unwrap() +} + +#[test] +fn test_bigquery_normalize_snake_casing() { + let context = Context { + normalize_case: true, + resolve_method: ResolveMethod::Panic, + }; + let expected: Value = serde_json::from_str( + r#" + [ + { + "mode": "REQUIRED", + "name": "test_camel_case", + "type": "BOOLEAN" + }, + { + "mode": "REQUIRED", + "name": "test_pascal_case", + "type": "BOOLEAN" + }, + { + "mode": "REQUIRED", + "name": "test_snake_case", + "type": "BOOLEAN" + }, + { + "mode": "REQUIRED", + "name": "test_screaming_snake_case", + "type": "BOOLEAN" + } + ] + "#, + ) + .unwrap(); + + assert_eq!(expected, convert_bigquery(&test_data(), context)); +} + +#[test] +fn test_avro_normalize_snake_casing() { + let context = Context { + resolve_method: ResolveMethod::Cast, + ..Default::default() + }; + let expected: Value = serde_json::from_str( + r#" + { + "fields": [ + { + "name": "test_camel_case", + "type": {"type": "boolean"} + }, + { + "name": "test_pascal_case", + "type": {"type": "boolean"} + }, + { + "name": "test_snake_case", + "type": {"type": "boolean"} + }, + { + "name": "test_screaming_snake_case", + "type": {"type": "boolean"} + } + ], + "name": "root", + "type": "record" + } + "#, + ) + .unwrap(); + + assert_eq!(expected, convert_avro(&test_data(), context)); +} From 2333ccc88bcb1f8416b6108a95740cc9b2d1c2c5 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 14:27:08 -0700 Subject: [PATCH 05/19] Add normalize_case to function definitions --- src/ast.rs | 83 ++++++++++++++++++++++++------------------------- src/avro.rs | 2 +- src/bigquery.rs | 2 +- 3 files changed, 43 insertions(+), 44 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 02ff11a..ec5d328 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -353,14 +353,6 @@ impl Tag { } } - /// Sets a tag with references to the name and the namespace. - fn set_name(&mut self, name: &str, namespace: &str) { - self.name = Some(name.to_string()); - if !namespace.is_empty() { - self.namespace = Some(namespace.to_string()); - } - } - /// Renames a column name so it contains only letters, numbers, and /// underscores while starting with a letter or underscore. This requirement /// is enforced by BigQuery during table creation. @@ -387,7 +379,7 @@ impl Tag { /// /// This modifies field names as well as required fields. /// See: https://cloud.google.com/bigquery/docs/schemas - pub fn normalize_properties(&mut self) { + pub fn normalize_properties(&mut self, normalize_case: bool) { if let Type::Object(ref mut object) = self.data_type { let fields = &mut object.fields; let keys: Vec = fields.keys().cloned().collect(); @@ -423,17 +415,25 @@ impl Tag { } } + /// Sets a tag with references to the name and the namespace. + fn set_name(&mut self, name: &str, namespace: &str) { + self.name = Some(name.to_string()); + if !namespace.is_empty() { + self.namespace = Some(namespace.to_string()); + } + } + /// A helper function for calculating the names and namespaces within the /// schema. /// /// The namespaces are built from the top-down and follows the depth-first /// traversal of the schema. - fn recurse_infer_name(&mut self, namespace: String) { - self.normalize_properties(); + fn recurse_infer_name(&mut self, namespace: String, normalize_case: bool) { + self.normalize_properties(normalize_case); let set_and_recurse = |tag: &mut Tag, name: &str| { tag.set_name(name, &namespace); - tag.recurse_infer_name(format!("{}.{}", &namespace, name)) + tag.recurse_infer_name(format!("{}.{}", &namespace, name), normalize_case) }; match &mut self.data_type { @@ -459,12 +459,12 @@ impl Tag { } /// Assign names and namespaces to tags from parent tags. - pub fn infer_name(&mut self) { + pub fn infer_name(&mut self, normalize_case: bool) { let namespace = match &self.name { Some(name) => name.clone(), None => "".into(), }; - self.recurse_infer_name(namespace); + self.recurse_infer_name(namespace, normalize_case); } /// Infer whether the current tag in the schema allows for the value to be @@ -535,9 +535,9 @@ impl Tag { impl TranslateFrom for Tag { type Error = &'static str; - fn translate_from(tag: jsonschema::Tag, _context: Context) -> Result { + fn translate_from(tag: jsonschema::Tag, context: Context) -> Result { let mut tag = tag.type_into_ast(); - tag.infer_name(); + tag.infer_name(context.normalize_case); tag.infer_nullability(); tag.is_root = true; Ok(tag) @@ -548,7 +548,7 @@ impl TranslateFrom for Tag { mod tests { use super::*; use pretty_assertions::assert_eq; - use serde_json::json; + use serde_json::{json, Value}; #[test] fn test_serialize_null() { @@ -928,6 +928,12 @@ mod tests { } } + fn assert_infer_name(expect: Value, actual: Value) { + let mut tag: Tag = serde_json::from_value(actual).unwrap(); + tag.infer_name(false); + assert_eq!(expect, json!(tag)) + } + #[test] fn test_tag_infer_name_object() { let data = json!({ @@ -938,8 +944,6 @@ mod tests { "atom_1": {"type": {"atom": "integer"}}, "atom_2": {"type": {"atom": "integer"}}, }}}}); - let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.infer_name(); let expect = json!({ "nullable": false, "type": { @@ -949,7 +953,7 @@ mod tests { "atom_1": {"name": "atom_1", "type": {"atom": "integer"}, "nullable": false}, "atom_2": {"name": "atom_2", "type": {"atom": "integer"}, "nullable": false}, }}}}); - assert_eq!(expect, json!(tag)); + assert_infer_name(expect, data); } #[test] @@ -964,8 +968,6 @@ mod tests { "fields": { "bar": {"type": {"atom": "integer"}} }}}}}}}); - let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.infer_name(); let expect = json!({ "nullable": false, "name": "foo", @@ -985,7 +987,7 @@ mod tests { "namespace": "foo.items", "type": {"atom": "integer"}} }}}}}}}); - assert_eq!(expect, json!(tag)); + assert_infer_name(expect, data); } #[test] @@ -1001,8 +1003,6 @@ mod tests { "fields": { "bar": {"type": {"atom": "integer"}} }}}}}}}); - let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.infer_name(); let expect = json!({ "nullable": false, "name": "foo", @@ -1030,12 +1030,11 @@ mod tests { "namespace": "foo.value", "type": {"atom": "integer"}} }}}}}}}); - assert_eq!(expect, json!(tag)); + assert_infer_name(expect, data); } - #[test] - fn test_tag_infer_name_union_object() { - let data = json!({ + fn fixture_union_object() -> Value { + json!({ "name": "foo", "type": { "union": { @@ -1052,9 +1051,11 @@ mod tests { "fields": { "baz": {"type": {"atom": "boolean"}} }}}}, - ]}}}); - let mut tag: Tag = serde_json::from_value(data.clone()).unwrap(); - tag.infer_name(); + ]}}}) + } + + #[test] + fn test_tag_infer_name_union_object() { let expect = json!({ "nullable": false, "name": "foo", @@ -1090,7 +1091,7 @@ mod tests { "type": {"atom": "boolean"}} }}}}, ]}}}); - assert_eq!(expect, json!(tag)); + assert_infer_name(expect, fixture_union_object()); let collapse_expect = json!({ "nullable": false, @@ -1111,19 +1112,19 @@ mod tests { "type": {"atom": "boolean"}}, }}}}); // collapse and infer name - let mut tag_collapse: Tag = serde_json::from_value(data.clone()).unwrap(); + let mut tag_collapse: Tag = serde_json::from_value(fixture_union_object()).unwrap(); tag_collapse.collapse(); - tag_collapse.infer_name(); + tag_collapse.infer_name(false); assert_eq!(collapse_expect, json!(tag_collapse)); // infer and then collapse // NOTE: The behavior is not the same, the name and namespace need to be inferred again - tag_collapse = serde_json::from_value(data.clone()).unwrap(); - tag_collapse.infer_name(); + tag_collapse = serde_json::from_value(fixture_union_object()).unwrap(); + tag_collapse.infer_name(false); tag_collapse.collapse(); assert_ne!(collapse_expect, json!(tag_collapse)); - tag_collapse.infer_name(); + tag_collapse.infer_name(false); assert_eq!(collapse_expect, json!(tag_collapse)); } @@ -1140,8 +1141,6 @@ mod tests { "bar": { "type": "null" }}}}}}}}}); - let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.infer_name(); let expect = json!({ "nullable": false, "type": { @@ -1160,7 +1159,7 @@ mod tests { "type": "null", "nullable": false, }}}}}}}}}); - assert_eq!(expect, json!(tag)); + assert_infer_name(expect, data); } #[test] @@ -1181,7 +1180,7 @@ mod tests { "64bit", ]}}}); let mut tag: Tag = serde_json::from_value(data).unwrap(); - tag.normalize_properties(); + tag.normalize_properties(false); if let Type::Object(object) = &tag.data_type { let expected: HashSet = ["valid_name", "renamed_value_0", "_64bit"] .iter() diff --git a/src/avro.rs b/src/avro.rs index 3618efd..45cd42c 100644 --- a/src/avro.rs +++ b/src/avro.rs @@ -116,7 +116,7 @@ impl TranslateFrom for Type { // top-down approach. tag.collapse(); tag.name = Some("root".into()); - tag.infer_name(); + tag.infer_name(context.normalize_case); } tag.infer_nullability(); diff --git a/src/bigquery.rs b/src/bigquery.rs index d468146..40daf8f 100644 --- a/src/bigquery.rs +++ b/src/bigquery.rs @@ -61,7 +61,7 @@ impl TranslateFrom for Tag { fn translate_from(tag: ast::Tag, context: Context) -> Result { let mut tag = tag; tag.collapse(); - tag.infer_name(); + tag.infer_name(context.normalize_case); tag.infer_nullability(); let fmt_reason = From a8a5e5a6220ec90a415c892e229c8fed23400fe5 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 14:51:29 -0700 Subject: [PATCH 06/19] Fix broken tests and sort property names properly --- tests/normalize_case.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/normalize_case.rs b/tests/normalize_case.rs index a8e5c9e..9938f2d 100644 --- a/tests/normalize_case.rs +++ b/tests/normalize_case.rs @@ -38,22 +38,22 @@ fn test_bigquery_normalize_snake_casing() { { "mode": "REQUIRED", "name": "test_camel_case", - "type": "BOOLEAN" + "type": "BOOL" }, { "mode": "REQUIRED", "name": "test_pascal_case", - "type": "BOOLEAN" + "type": "BOOL" }, { "mode": "REQUIRED", - "name": "test_snake_case", - "type": "BOOLEAN" + "name": "test_screaming_snake_case", + "type": "BOOL" }, { "mode": "REQUIRED", - "name": "test_screaming_snake_case", - "type": "BOOLEAN" + "name": "test_snake_case", + "type": "BOOL" } ] "#, @@ -66,8 +66,8 @@ fn test_bigquery_normalize_snake_casing() { #[test] fn test_avro_normalize_snake_casing() { let context = Context { - resolve_method: ResolveMethod::Cast, - ..Default::default() + normalize_case: true, + resolve_method: ResolveMethod::Panic, }; let expected: Value = serde_json::from_str( r#" @@ -82,11 +82,11 @@ fn test_avro_normalize_snake_casing() { "type": {"type": "boolean"} }, { - "name": "test_snake_case", + "name": "test_screaming_snake_case", "type": {"type": "boolean"} }, { - "name": "test_screaming_snake_case", + "name": "test_snake_case", "type": {"type": "boolean"} } ], From 95024dc77e07051b71bb8a6672051a5ad5abe98d Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 14:52:10 -0700 Subject: [PATCH 07/19] Use heck to snake_case column names --- Cargo.lock | 16 ++++++++++++++++ Cargo.toml | 1 + src/ast.rs | 16 ++++++++++++++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index daf29c9..d0a08bf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,6 +76,14 @@ dependencies = [ "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "heck" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "humantime" version = "1.2.0" @@ -95,6 +103,7 @@ version = "1.1.0" dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.7 (registry+https://github.com/rust-lang/crates.io-index)", @@ -291,6 +300,11 @@ name = "ucd-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "unicode-segmentation" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unicode-width" version = "0.1.5" @@ -357,6 +371,7 @@ dependencies = [ "checksum ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3b4c17619643c1252b5f690084b82639dd7fac141c57c8e77a00e0148132092c" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" +"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum humantime 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ca7e5f2e110db35f93b837c81797f3714500b81d517bf20c431b16d3ca4f114" "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" @@ -384,6 +399,7 @@ dependencies = [ "checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" +"checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9d50aa7650df78abf942826607c62468ce18d9019673d4a2ebe1865dbb96ffde" diff --git a/Cargo.toml b/Cargo.toml index 0640169..09c1b8d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ name = "jst" [dependencies] clap = "~2.32" env_logger = "0.6.1" +heck = "0.3.1" log = "0.4" regex = "1" serde = { version = "1.0", features = ["derive"] } diff --git a/src/ast.rs b/src/ast.rs index ec5d328..242a221 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,6 +1,9 @@ +extern crate heck; + use super::jsonschema; use super::Context; use super::TranslateFrom; +use heck::SnakeCase; use regex::Regex; use std::collections::{HashMap, HashSet}; @@ -386,7 +389,12 @@ impl Tag { for key in keys { // Replace property names with the normalized property name - if let Some(renamed) = Tag::normalize_name_bigquery(&key) { + if let Some(mut renamed) = Tag::normalize_name_bigquery(&key) { + renamed = if normalize_case { + renamed.to_snake_case() + } else { + renamed + }; if renamed.as_str() != key.as_str() { warn!("{} replaced with {}", key, renamed); fields.insert(renamed, fields[&key].clone()); @@ -408,7 +416,11 @@ impl Tag { .filter(Option::is_some) .map(Option::unwrap) .collect(); - Some(renamed) + if normalize_case { + Some(renamed.iter().map(|s| s.to_snake_case()).collect()) + } else { + Some(renamed) + } } None => None, }; From cae22801863efa8f4d151b7cefff2ef55f47ee50 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 15:07:04 -0700 Subject: [PATCH 08/19] Add a new test-case asserting names that start with numbers --- src/ast.rs | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 242a221..d93ebb4 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -356,15 +356,22 @@ impl Tag { } } + /// If a name starts with a number, prefix it with an underscore. + fn prefix_numeric(name: String) -> String { + if name.chars().next().unwrap().is_numeric() { + format!("_{}", name) + } else { + name + } + } + /// Renames a column name so it contains only letters, numbers, and /// underscores while starting with a letter or underscore. This requirement /// is enforced by BigQuery during table creation. fn normalize_name_bigquery(string: &str) -> Option { let re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); let mut renamed = string.replace(".", "_").replace("-", "_"); - if renamed.chars().next().unwrap().is_numeric() { - renamed = format!("_{}", renamed); - }; + renamed = Tag::prefix_numeric(renamed); if re.is_match(&renamed) { Some(renamed) } else { @@ -391,7 +398,7 @@ impl Tag { // Replace property names with the normalized property name if let Some(mut renamed) = Tag::normalize_name_bigquery(&key) { renamed = if normalize_case { - renamed.to_snake_case() + Tag::prefix_numeric(renamed.to_snake_case()) } else { renamed }; @@ -417,7 +424,12 @@ impl Tag { .map(Option::unwrap) .collect(); if normalize_case { - Some(renamed.iter().map(|s| s.to_snake_case()).collect()) + Some( + renamed + .iter() + .map(|s| Tag::prefix_numeric(s.to_snake_case())) + .collect(), + ) } else { Some(renamed) } @@ -1176,6 +1188,17 @@ mod tests { #[test] fn test_tag_normalize_properties() { + fn assert_normalize(tag: &Tag, renamed: Vec<&str>) { + if let Type::Object(object) = &tag.data_type { + let expected: HashSet = renamed.iter().map(|x| x.to_string()).collect(); + let actual: HashSet = object.fields.keys().cloned().collect(); + assert_eq!(expected, actual); + assert_eq!(expected, object.required.clone().unwrap()); + } else { + panic!() + } + } + let data = json!({ "type": { "object": { @@ -1191,18 +1214,12 @@ mod tests { "$schema", "64bit", ]}}}); + let mut tag: Tag = serde_json::from_value(data).unwrap(); tag.normalize_properties(false); - if let Type::Object(object) = &tag.data_type { - let expected: HashSet = ["valid_name", "renamed_value_0", "_64bit"] - .iter() - .map(|x| x.to_string()) - .collect(); - let actual: HashSet = object.fields.keys().cloned().collect(); - assert_eq!(expected, actual); - assert_eq!(expected, object.required.clone().unwrap()); - } else { - panic!() - } + assert_normalize(&tag, vec!["valid_name", "renamed_value_0", "_64bit"]); + + tag.normalize_properties(true); + assert_normalize(&tag, vec!["valid_name", "renamed_value_0", "_64bit"]); } } From ac64a47b54364cf3e8903dbd363a1aad103c88e9 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 15:16:12 -0700 Subject: [PATCH 09/19] Rename prefix_numeric to normalize_numeric_prefix --- src/ast.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index d93ebb4..35cb30f 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -357,7 +357,7 @@ impl Tag { } /// If a name starts with a number, prefix it with an underscore. - fn prefix_numeric(name: String) -> String { + fn normalize_numeric_prefix(name: String) -> String { if name.chars().next().unwrap().is_numeric() { format!("_{}", name) } else { @@ -370,8 +370,7 @@ impl Tag { /// is enforced by BigQuery during table creation. fn normalize_name_bigquery(string: &str) -> Option { let re = Regex::new(r"^[a-zA-Z_][a-zA-Z0-9_]*$").unwrap(); - let mut renamed = string.replace(".", "_").replace("-", "_"); - renamed = Tag::prefix_numeric(renamed); + let renamed = Tag::normalize_numeric_prefix(string.replace(".", "_").replace("-", "_")); if re.is_match(&renamed) { Some(renamed) } else { @@ -398,7 +397,8 @@ impl Tag { // Replace property names with the normalized property name if let Some(mut renamed) = Tag::normalize_name_bigquery(&key) { renamed = if normalize_case { - Tag::prefix_numeric(renamed.to_snake_case()) + // heck::SnakeCase will strip all punctuation outside of word boundaries. + Tag::normalize_numeric_prefix(renamed.to_snake_case()) } else { renamed }; @@ -427,7 +427,7 @@ impl Tag { Some( renamed .iter() - .map(|s| Tag::prefix_numeric(s.to_snake_case())) + .map(|s| Tag::normalize_numeric_prefix(s.to_snake_case())) .collect(), ) } else { From 5132a170f0041f657c532b54789fa60277b2fdfb Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 15:38:39 -0700 Subject: [PATCH 10/19] Update context in main --- src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 180138d..515ed16 100644 --- a/src/main.rs +++ b/src/main.rs @@ -61,7 +61,7 @@ fn main() { "drop" => ResolveMethod::Drop, _ => panic!("Unknown resolution method!"), }, - ..Default::default() + normalize_case: matches.is_present("normalize-case"), }; let output = match matches.value_of("type").unwrap() { From 5567a02ec12e8426d55642d69de00b46fe1951f1 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 9 Jul 2019 15:43:41 -0700 Subject: [PATCH 11/19] Update scripts for generating a diff --- scripts/README.md | 9 ++++++++- scripts/mps-generate-avro-data-helper.py | 2 +- ...-generate-avro-schemas.sh => mps-generate-schemas.sh} | 8 +++++--- 3 files changed, 14 insertions(+), 5 deletions(-) rename scripts/{mps-generate-avro-schemas.sh => mps-generate-schemas.sh} (80%) diff --git a/scripts/README.md b/scripts/README.md index 4686a03..0bde5be 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -49,7 +49,14 @@ $ ./scripts/mps-download-sampled-schemas.py $ ./scripts/mps-download-sampled-data.py # Generates a folder avro/ -$ ./scripts/mps-generate-avro-schemas.sh +$ ./scripts/mps-generate-schemas.sh + +# Alternatively, specify a folder and pass flags +$ ./scripts/mps-generate-schemas.sh \ + bq_schemas \ + --type bigquery \ + --resolve drop \ + --normalize-case # Generates a folder avro-data/ $ ./scripts/mps-generate-avro-data.sh diff --git a/scripts/mps-generate-avro-data-helper.py b/scripts/mps-generate-avro-data-helper.py index 0ce2eb6..bb2d316 100755 --- a/scripts/mps-generate-avro-data-helper.py +++ b/scripts/mps-generate-avro-data-helper.py @@ -81,7 +81,7 @@ def convert(data, schema): if not os.path.exists(outdir): os.makedirs(outdir) -with open(f"avro/{document}.avro.json", "r") as f: +with open(f"avro/{document}.schema.json", "r") as f: schema_data = f.read() schema = avro.schema.Parse(schema_data) diff --git a/scripts/mps-generate-avro-schemas.sh b/scripts/mps-generate-schemas.sh similarity index 80% rename from scripts/mps-generate-avro-schemas.sh rename to scripts/mps-generate-schemas.sh index b42ffe7..33a010b 100755 --- a/scripts/mps-generate-avro-schemas.sh +++ b/scripts/mps-generate-schemas.sh @@ -9,7 +9,7 @@ if [[ ! -d "schemas/" ]]; then fi cargo build -bin="target/debug/jsonschema_transpiler" +bin="target/debug/jsonschema-transpiler" schemas=$(find schemas/ -name "*.schema.json") @@ -18,16 +18,18 @@ outdir=${1:-"avro"} if [[ -d $outdir ]]; then rm -r $outdir fi +shift; + mkdir $outdir total=0 failed=0 for schema in $schemas; do namespace=$(basename $(dirname $(dirname $schema))) - schema_filename=$(basename $schema | sed 's/schema.json/avro.json/g') + schema_filename=$(basename $schema) outfile="$outdir/$namespace.$schema_filename" - if ! $bin -f "$schema" --type avro > $outfile; then + if ! $bin "$@" "$schema" > $outfile; then echo "Failed on $schema" rm $outfile ((failed++)) From 708689f4ea465f4ba56d348041dcc7f1a70f2ff3 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 13:41:20 -0700 Subject: [PATCH 12/19] Add test cases for casing and move cases for translating schemas --- build.rs | 2 +- tests/resources/casing/alphanum_3.csv | 27 ++ .../resources/casing/mps-diff-integration.csv | 232 ++++++++++++++++ tests/resources/casing/word_4.csv | 256 ++++++++++++++++++ tests/resources/{ => translate}/array.json | 0 tests/resources/{ => translate}/atomic.json | 0 tests/resources/{ => translate}/map.json | 0 tests/resources/{ => translate}/object.json | 0 tests/resources/{ => translate}/oneof.json | 0 9 files changed, 516 insertions(+), 1 deletion(-) create mode 100644 tests/resources/casing/alphanum_3.csv create mode 100644 tests/resources/casing/mps-diff-integration.csv create mode 100644 tests/resources/casing/word_4.csv rename tests/resources/{ => translate}/array.json (100%) rename tests/resources/{ => translate}/atomic.json (100%) rename tests/resources/{ => translate}/map.json (100%) rename tests/resources/{ => translate}/object.json (100%) rename tests/resources/{ => translate}/oneof.json (100%) diff --git a/build.rs b/build.rs index efecc1b..a3ce9eb 100644 --- a/build.rs +++ b/build.rs @@ -142,7 +142,7 @@ fn bigquery_{name}() {{ } fn main() { - let test_cases = "tests/resources"; + let test_cases = "tests/resources/translate"; let mut avro_fp = File::create("tests/transpile_avro.rs").unwrap(); let mut bq_fp = File::create("tests/transpile_bigquery.rs").unwrap(); let format_tests = get_env_var_as_bool("FORMAT_TESTS", true); diff --git a/tests/resources/casing/alphanum_3.csv b/tests/resources/casing/alphanum_3.csv new file mode 100644 index 0000000..5c7a56a --- /dev/null +++ b/tests/resources/casing/alphanum_3.csv @@ -0,0 +1,27 @@ +AAA,aaa +AAa,a_aa +AA7,aa7 +AaA,aa_a +Aaa,aaa +Aa7,aa7 +A7A,a7a +A7a,a7a +A77,a77 +aAA,a_aa +aAa,a_aa +aA7,a_a7 +aaA,aa_a +aaa,aaa +aa7,aa7 +a7A,a7_a +a7a,a7a +a77,a77 +7AA,7aa +7Aa,7aa +7A7,7a7 +7aA,7a_a +7aa,7aa +7a7,7a7 +77A,77a +77a,77a +777,777 diff --git a/tests/resources/casing/mps-diff-integration.csv b/tests/resources/casing/mps-diff-integration.csv new file mode 100644 index 0000000..9449291 --- /dev/null +++ b/tests/resources/casing/mps-diff-integration.csv @@ -0,0 +1,232 @@ +AvailablePageFile,available_page_file +AvailablePhysicalMemory,available_physical_memory +AvailableVirtualMemory,available_virtual_memory +BuildID,build_id +D2DEnabled,d2d_enabled +DWriteEnabled,d_write_enabled +GPUActive,gpu_active +Headless,headless +IsGarbageCollecting,is_garbage_collecting +LowEndMachine,low_end_machine +ProductID,product_id +ProductName,product_name +RAM,ram +ReleaseChannel,release_channel +SecondsSinceLastCrash,seconds_since_last_crash +StartupCrash,startup_crash +SystemMemoryUsePercentage,system_memory_use_percentage +TotalPageFile,total_page_file +TotalPhysicalMemory,total_physical_memory +TotalVirtualMemory,total_virtual_memory +Version,version +acceptLanguages,accept_languages +accessibilityServices,accessibility_services +activeAddons,active_addons +activeExperiment,active_experiment +activeGMPlugins,active_gm_plugins +activePlugins,active_plugins +adHocTablesDir,ad_hoc_tables_dir +additionalProperties,additional_properties +addonCompatibilityCheckEnabled,addon_compatibility_check_enabled +addonId,addon_id +addonVersion,addon_version +advancedLayers,advanced_layers +allowAutoplay,allow_autoplay +apiCall,api_call +apiVersion,api_version +appDisabled,app_disabled +appLocales,app_locales +appName,app_name +appUpdateChannel,app_update_channel +appVersion,app_version +appleModelId,apple_model_id +applicationId,application_id +applicationName,application_name +architecturesInBinary,architectures_in_binary +autoDownload,auto_download +availableLocales,available_locales +baseAddress,base_address +blocklistEnabled,blocklist_enabled +buildId,build_id +certSubject,cert_subject +changedFiles,changed_files +changesetID,changeset_id +clientId,client_id +closedTS,closed_ts +connType,conn_type +crashDate,crash_date +createdDate,created_date +createdTimestamp,created_timestamp +creationDate,creation_date +debugID,debug_id +debugName,debug_name +defaultBrowser,default_browser +defaultSearch,default_search +defaultSearchEngine,default_search_engine +defaultSearchEngineData,default_search_engine_data +description,description +detectedUri,detected_uri +detectedVersion,detected_version +deviceID,device_id +displayVersion,display_version +distributionId,distribution_id +distributionVersion,distribution_version +distributorChannel,distributor_channel +driver,driver +driverDate,driver_date +driverVersion,driver_version +e10sCohort,e10s_cohort +e10sEnabled,e10s_enabled +ecosystemClientId,ecosystem_client_id +effectiveContentProcessLevel,effective_content_process_level +encryptedData,encrypted_data +encryptionKeyId,encryption_key_id +engagedTS,engaged_ts +engagementType,engagement_type +errorModules,error_modules +eventId,event_id +expiredTS,expired_ts +fileSize,file_size +fileVersion,file_version +firstUseDate,first_use_date +firstView,first_view +flashUsage,flash_usage +flowId,flow_id +globalSettings,global_settings +gpuProcess,gpu_process +hasBinaryComponents,has_binary_components +hasCrashEnvironment,has_crash_environment +hasSync,has_sync +hotfixVersion,hotfix_version +installDay,install_day +installYear,install_year +ipc_channel_error,ipc_channel_error +isDefaultBrowser,is_default_browser +isStartup,is_startup +isStubProfile,is_stub_profile +isSystem,is_system +isTablet,is_tablet +isWow64,is_wow64 +kernelVersion,kernel_version +keyedHistograms,keyed_histograms +l2cacheKB,l2cache_kb +l3cacheKB,l3cache_kb +landingSystem,landing_system +lastBuildId,last_build_id +lastVersion,last_version +launcherProcessState,launcher_process_state +learnMoreTS,learn_more_ts +loadDurationMS,load_duration_ms +loadPath,load_path +loaderName,loader_name +lostEventsCount,lost_events_count +memoryMB,memory_mb +mimeTypes,mime_types +moduleName,module_name +moduleTrustFlags,module_trust_flags +offeredTS,offered_ts +osName,os_name +osVersion,os_version +packetVersion,packet_version +pageId,page_id +pageSpecific,page_specific +partnerId,partner_id +partnerNames,partner_names +pingDiscardedForSize,ping_discarded_for_size +pioneerAddonMetadata,pioneer_addon_metadata +pioneerId,pioneer_id +pioneerUtilsVersion,pioneer_utils_version +placesBookmarksCount,places_bookmarks_count +placesPagesCount,places_pages_count +platformVersion,platform_version +pocketId,pocket_id +previousBuildId,previous_build_id +previousChannel,previous_channel +previousVersion,previous_version +prioData,prio_data +processStartTimestamp,process_start_timestamp +processType,process_type +processUptimeMS,process_uptime_ms +profileCreationDate,profile_creation_date +profileDate,profile_date +profileSubsessionCounter,profile_subsession_counter +promptResponse,prompt_response +pseudoDisplay,pseudo_display +pushDate,push_date +refreshRate,refresh_rate +regionalPrefsLocales,regional_prefs_locales +rememberCheckbox,remember_checkbox +requestedLocales,requested_locales +resetDate,reset_date +responseTime,response_time +reviewSystemUsed,review_system_used +runId,run_id +schemaName,schema_name +schemaVersion,schema_version +screenHeight,screen_height +screenWidth,screen_width +searchCohort,search_cohort +searchCounts,search_counts +sendFailure,send_failure +servicePackMajor,service_pack_major +servicePackMinor,service_pack_minor +sessionId,session_id +sessionState,session_state +settingsChanged,settings_changed +showTrackerStatsShare,show_tracker_stats_share +signedState,signed_state +sourcesJson,sources_json +spbeMaxConcurrentTabCount,spbe_max_concurrent_tab_count +spbeMaxConcurrentWindowCount,spbe_max_concurrent_window_count +spbeNavigationAboutNewtab,spbe_navigation_about_newtab +spbeNavigationContextmenu,spbe_navigation_contextmenu +spbeNavigationSearchbar,spbe_navigation_searchbar +spbeNavigationUrlbar,spbe_navigation_urlbar +spbeTabOpenEventCount,spbe_tab_open_event_count +spbeTotalUriCount,spbe_total_uri_count +spbeUnfilteredUriCount,spbe_unfiltered_uri_count +spbeUniqueDomainsCount,spbe_unique_domains_count +spbeWindowOpenEventCount,spbe_window_open_event_count +speedMHz,speed_m_hz +sqlTableName,sql_table_name +standardDeviation,standard_deviation +structVersion,struct_version +studyName,study_name +submissionURL,submission_url +subsessionId,subsession_id +subsessionLength,subsession_length +subsessionStartDate,subsession_start_date +subsysID,subsys_id +surveyId,survey_id +surveyVersion,survey_version +systemCpuCores,system_cpu_cores +systemCpuSpeedMhz,system_cpu_speed_mhz +systemGfxMonitors1ScreenWidth,system_gfx_monitors1_screen_width +systemGfxMonitors1ScreenWidthZeroIndexed,system_gfx_monitors1_screen_width_zero_indexed +systemLocales,system_locales +systemMemoryMb,system_memory_mb +tableName,table_name +targetBuildId,target_build_id +targetChannel,target_channel +targetDisplayVersion,target_display_version +targetVersion,target_version +telemetryEnabled,telemetry_enabled +textureSharing,texture_sharing +threadID,thread_id +threadName,thread_name +timezoneOffest,timezone_offest +totalBlockedAudibleMedia,total_blocked_audible_media +totalPages,total_pages +totalPagesAM,total_pages_am +totalTime,total_time +updateDay,update_day +updaterAvailable,updater_available +userDisabled,user_disabled +vendorID,vendor_id +virtualMaxMB,virtual_max_mb +votedTS,voted_ts +windowClosedTS,window_closed_ts +windowsBuildNumber,windows_build_number +windowsUBR,windows_ubr +xpcomAbi,xpcom_abi +xulLoadDurationMS,xul_load_duration_ms diff --git a/tests/resources/casing/word_4.csv b/tests/resources/casing/word_4.csv new file mode 100644 index 0000000..0b9e05f --- /dev/null +++ b/tests/resources/casing/word_4.csv @@ -0,0 +1,256 @@ +AAAA,aaaa +AAAa,aa_aa +AAA7,aaa7 +AAA_,aaa +AAaA,a_aa_a +AAaa,a_aaa +AAa7,a_aa7 +AAa_,a_aa +AA7A,aa7a +AA7a,aa7a +AA77,aa77 +AA7_,aa7 +AA_A,aa_a +AA_a,aa_a +AA_7,aa_7 +AA__,aa +AaAA,aa_aa +AaAa,aa_aa +AaA7,aa_a7 +AaA_,aa_a +AaaA,aaa_a +Aaaa,aaaa +Aaa7,aaa7 +Aaa_,aaa +Aa7A,aa7_a +Aa7a,aa7a +Aa77,aa77 +Aa7_,aa7 +Aa_A,aa_a +Aa_a,aa_a +Aa_7,aa_7 +Aa__,aa +A7AA,a7aa +A7Aa,a7_aa +A7A7,a7a7 +A7A_,a7a +A7aA,a7a_a +A7aa,a7aa +A7a7,a7a7 +A7a_,a7a +A77A,a77a +A77a,a77a +A777,a777 +A77_,a77 +A7_A,a7_a +A7_a,a7_a +A7_7,a7_7 +A7__,a7 +A_AA,a_aa +A_Aa,a_aa +A_A7,a_a7 +A_A_,a_a +A_aA,a_a_a +A_aa,a_aa +A_a7,a_a7 +A_a_,a_a +A_7A,a_7a +A_7a,a_7a +A_77,a_77 +A_7_,a_7 +A__A,a_a +A__a,a_a +A__7,a_7 +A___,a +aAAA,a_aaa +aAAa,a_a_aa +aAA7,a_aa7 +aAA_,a_aa +aAaA,a_aa_a +aAaa,a_aaa +aAa7,a_aa7 +aAa_,a_aa +aA7A,a_a7a +aA7a,a_a7a +aA77,a_a77 +aA7_,a_a7 +aA_A,a_a_a +aA_a,a_a_a +aA_7,a_a_7 +aA__,a_a +aaAA,aa_aa +aaAa,aa_aa +aaA7,aa_a7 +aaA_,aa_a +aaaA,aaa_a +aaaa,aaaa +aaa7,aaa7 +aaa_,aaa +aa7A,aa7_a +aa7a,aa7a +aa77,aa77 +aa7_,aa7 +aa_A,aa_a +aa_a,aa_a +aa_7,aa_7 +aa__,aa +a7AA,a7_aa +a7Aa,a7_aa +a7A7,a7_a7 +a7A_,a7_a +a7aA,a7a_a +a7aa,a7aa +a7a7,a7a7 +a7a_,a7a +a77A,a77_a +a77a,a77a +a777,a777 +a77_,a77 +a7_A,a7_a +a7_a,a7_a +a7_7,a7_7 +a7__,a7 +a_AA,a_aa +a_Aa,a_aa +a_A7,a_a7 +a_A_,a_a +a_aA,a_a_a +a_aa,a_aa +a_a7,a_a7 +a_a_,a_a +a_7A,a_7a +a_7a,a_7a +a_77,a_77 +a_7_,a_7 +a__A,a_a +a__a,a_a +a__7,a_7 +a___,a +7AAA,7aaa +7AAa,7a_aa +7AA7,7aa7 +7AA_,7aa +7AaA,7aa_a +7Aaa,7aaa +7Aa7,7aa7 +7Aa_,7aa +7A7A,7a7a +7A7a,7a7a +7A77,7a77 +7A7_,7a7 +7A_A,7a_a +7A_a,7a_a +7A_7,7a_7 +7A__,7a +7aAA,7a_aa +7aAa,7a_aa +7aA7,7a_a7 +7aA_,7a_a +7aaA,7aa_a +7aaa,7aaa +7aa7,7aa7 +7aa_,7aa +7a7A,7a7_a +7a7a,7a7a +7a77,7a77 +7a7_,7a7 +7a_A,7a_a +7a_a,7a_a +7a_7,7a_7 +7a__,7a +77AA,77aa +77Aa,77aa +77A7,77a7 +77A_,77a +77aA,77a_a +77aa,77aa +77a7,77a7 +77a_,77a +777A,777a +777a,777a +7777,7777 +777_,777 +77_A,77_a +77_a,77_a +77_7,77_7 +77__,77 +7_AA,7_aa +7_Aa,7_aa +7_A7,7_a7 +7_A_,7_a +7_aA,7_a_a +7_aa,7_aa +7_a7,7_a7 +7_a_,7_a +7_7A,7_7a +7_7a,7_7a +7_77,7_77 +7_7_,7_7 +7__A,7_a +7__a,7_a +7__7,7_7 +7___,7 +_AAA,aaa +_AAa,a_aa +_AA7,aa7 +_AA_,aa +_AaA,aa_a +_Aaa,aaa +_Aa7,aa7 +_Aa_,aa +_A7A,a7a +_A7a,a7a +_A77,a77 +_A7_,a7 +_A_A,a_a +_A_a,a_a +_A_7,a_7 +_A__,a +_aAA,a_aa +_aAa,a_aa +_aA7,a_a7 +_aA_,a_a +_aaA,aa_a +_aaa,aaa +_aa7,aa7 +_aa_,aa +_a7A,a7_a +_a7a,a7a +_a77,a77 +_a7_,a7 +_a_A,a_a +_a_a,a_a +_a_7,a_7 +_a__,a +_7AA,7aa +_7Aa,7aa +_7A7,7a7 +_7A_,7a +_7aA,7a_a +_7aa,7aa +_7a7,7a7 +_7a_,7a +_77A,77a +_77a,77a +_777,777 +_77_,77 +_7_A,7_a +_7_a,7_a +_7_7,7_7 +_7__,7 +__AA,aa +__Aa,aa +__A7,a7 +__A_,a +__aA,a_a +__aa,aa +__a7,a7 +__a_,a +__7A,7a +__7a,7a +__77,77 +__7_,7 +___A,a +___a,a +___7,7 +____, diff --git a/tests/resources/array.json b/tests/resources/translate/array.json similarity index 100% rename from tests/resources/array.json rename to tests/resources/translate/array.json diff --git a/tests/resources/atomic.json b/tests/resources/translate/atomic.json similarity index 100% rename from tests/resources/atomic.json rename to tests/resources/translate/atomic.json diff --git a/tests/resources/map.json b/tests/resources/translate/map.json similarity index 100% rename from tests/resources/map.json rename to tests/resources/translate/map.json diff --git a/tests/resources/object.json b/tests/resources/translate/object.json similarity index 100% rename from tests/resources/object.json rename to tests/resources/translate/object.json diff --git a/tests/resources/oneof.json b/tests/resources/translate/oneof.json similarity index 100% rename from tests/resources/oneof.json rename to tests/resources/translate/oneof.json From 1ef4ced10239098f51331b24f122294ad74086b3 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 14:14:33 -0700 Subject: [PATCH 13/19] Expose snake casing as a public module for integration testing --- src/ast.rs | 8 +++---- src/lib.rs | 1 + tests/normalize_case.rs | 48 +++++++++++++++++++++++++++++++++++++++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 35cb30f..4ebb579 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,9 +1,7 @@ -extern crate heck; - +use super::casing::to_snake_case; use super::jsonschema; use super::Context; use super::TranslateFrom; -use heck::SnakeCase; use regex::Regex; use std::collections::{HashMap, HashSet}; @@ -398,7 +396,7 @@ impl Tag { if let Some(mut renamed) = Tag::normalize_name_bigquery(&key) { renamed = if normalize_case { // heck::SnakeCase will strip all punctuation outside of word boundaries. - Tag::normalize_numeric_prefix(renamed.to_snake_case()) + Tag::normalize_numeric_prefix(to_snake_case(&renamed)) } else { renamed }; @@ -427,7 +425,7 @@ impl Tag { Some( renamed .iter() - .map(|s| Tag::normalize_numeric_prefix(s.to_snake_case())) + .map(|s| Tag::normalize_numeric_prefix(to_snake_case(&s))) .collect(), ) } else { diff --git a/src/lib.rs b/src/lib.rs index 1c15b46..79b1808 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,7 @@ mod avro; mod bigquery; mod jsonschema; mod traits; +pub mod casing; use serde_json::{json, Value}; use traits::TranslateFrom; diff --git a/tests/normalize_case.rs b/tests/normalize_case.rs index 9938f2d..75280e4 100644 --- a/tests/normalize_case.rs +++ b/tests/normalize_case.rs @@ -1,8 +1,15 @@ -use jst::{convert_avro, convert_bigquery}; -use jst::{Context, ResolveMethod}; +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::PathBuf; + use pretty_assertions::assert_eq; use serde_json::Value; +use jst::{convert_avro, convert_bigquery}; +use jst::{Context, ResolveMethod}; +use jst::casing::to_snake_case; + + fn test_data() -> Value { serde_json::from_str( r#" @@ -26,6 +33,43 @@ fn test_data() -> Value { .unwrap() } +/// Get the resource path for all the casing tests +fn resource_path() -> PathBuf { + let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + path.push("tests/resources/casing"); + path +} + +/// Test the `to_snake_case` method against a test file in the format +/// `reference,expected` +fn snake_case_test(case_name: &str) { + let mut path = resource_path(); + path.push(case_name); + let file = File::open(&path).unwrap(); + let reader = BufReader::new(file); + for line in reader.lines() { + let line = line.unwrap().to_string(); + let cols: Vec<&str> = line.split(",").collect(); + assert_eq!(cols.len(), 2); + assert_eq!(to_snake_case(cols[0]), cols[1]); + } +} + +#[test] +fn test_snake_casing_alphanum_3() { + snake_case_test("alphanum_3.csv"); +} + +#[test] +fn test_snake_casing_word_4() { + snake_case_test("word_4.csv"); +} + +#[test] +fn test_snake_casing_mps_diff_integration() { + snake_case_test("mps-diff-integration.csv"); +} + #[test] fn test_bigquery_normalize_snake_casing() { let context = Context { From a50eda3372367a0081a5bbdc6d6405039604b199 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 15:14:09 -0700 Subject: [PATCH 14/19] Replace regex with oniguruma; implement to_snake_case with regexes --- Cargo.lock | 52 +++++++++++++++++++++++++++-------------- Cargo.toml | 3 +-- src/ast.rs | 2 +- src/lib.rs | 4 ++-- tests/normalize_case.rs | 3 +-- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d0a08bf..6e1fe9d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,6 +31,11 @@ name = "bitflags" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "cc" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cfg-if" version = "0.1.9" @@ -76,14 +81,6 @@ dependencies = [ "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "heck" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "humantime" version = "1.2.0" @@ -103,10 +100,9 @@ version = "1.1.0" dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", - "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", + "onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.94 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -139,6 +135,26 @@ name = "numtoa" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "onig" +version = "4.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.58 (registry+https://github.com/rust-lang/crates.io-index)", + "onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "onig_sys" +version = "69.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)", + "pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "output_vt100" version = "0.1.2" @@ -147,6 +163,11 @@ dependencies = [ "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "pkg-config" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "pretty_assertions" version = "0.6.1" @@ -300,11 +321,6 @@ name = "ucd-util" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "unicode-segmentation" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "unicode-width" version = "0.1.5" @@ -366,12 +382,12 @@ dependencies = [ "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" "checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" +"checksum cc 1.0.37 (registry+https://github.com/rust-lang/crates.io-index)" = "39f75544d7bbaf57560d2168f28fd649ff9c76153874db88bdbdfd839b1a7e7d" "checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e" "checksum ctor 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3b4c17619643c1252b5f690084b82639dd7fac141c57c8e77a00e0148132092c" "checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198" "checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" -"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" "checksum humantime 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ca7e5f2e110db35f93b837c81797f3714500b81d517bf20c431b16d3ca4f114" "checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" @@ -379,7 +395,10 @@ dependencies = [ "checksum log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c84ec4b527950aa83a329754b01dbe3f58361d1c5efacd1f6d68c494d08a17c6" "checksum memchr 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2efc7bc57c883d4a4d6e3246905283d8dae951bb3bd32f49d6ef297f546e1c39" "checksum numtoa 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" +"checksum onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a646989adad8a19f49be2090374712931c3a59835cb5277b4530f48b417f26e7" +"checksum onig_sys 69.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388410bf5fa341f10e58e6db3975f4bea1ac30247dd79d37a9e5ced3cb4cc3b0" "checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" +"checksum pkg-config 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "676e8eb2b1b4c9043511a9b7bea0915320d7e502b0a079fb03f9635a5252b18c" "checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" "checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" "checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" @@ -399,7 +418,6 @@ dependencies = [ "checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6" "checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" "checksum ucd-util 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "535c204ee4d8434478593480b8f86ab45ec9aae0e83c568ca81abf0fd0e88f86" -"checksum unicode-segmentation 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1967f4cdfc355b37fd76d2a954fb2ed3871034eb4f26d60537d88795cfc332a9" "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "9d50aa7650df78abf942826607c62468ce18d9019673d4a2ebe1865dbb96ffde" diff --git a/Cargo.toml b/Cargo.toml index 09c1b8d..fe7cc17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,8 @@ name = "jst" [dependencies] clap = "~2.32" env_logger = "0.6.1" -heck = "0.3.1" log = "0.4" -regex = "1" +onig = "4.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/src/ast.rs b/src/ast.rs index 4ebb579..a93d585 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -2,7 +2,7 @@ use super::casing::to_snake_case; use super::jsonschema; use super::Context; use super::TranslateFrom; -use regex::Regex; +use onig::Regex; use std::collections::{HashMap, HashSet}; #[derive(Serialize, Deserialize, Debug, Copy, Clone)] diff --git a/src/lib.rs b/src/lib.rs index 79b1808..117e8b3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #![recursion_limit = "128"] #[macro_use] extern crate log; -extern crate regex; +extern crate onig; #[macro_use] extern crate serde; extern crate serde_json; @@ -9,9 +9,9 @@ extern crate serde_json; mod ast; mod avro; mod bigquery; +pub mod casing; mod jsonschema; mod traits; -pub mod casing; use serde_json::{json, Value}; use traits::TranslateFrom; diff --git a/tests/normalize_case.rs b/tests/normalize_case.rs index 75280e4..354b0b0 100644 --- a/tests/normalize_case.rs +++ b/tests/normalize_case.rs @@ -5,10 +5,9 @@ use std::path::PathBuf; use pretty_assertions::assert_eq; use serde_json::Value; +use jst::casing::to_snake_case; use jst::{convert_avro, convert_bigquery}; use jst::{Context, ResolveMethod}; -use jst::casing::to_snake_case; - fn test_data() -> Value { serde_json::from_str( From 4a376fd1a657a75162049ae7c97cbf367b6a2532 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 15:43:31 -0700 Subject: [PATCH 15/19] Check-in latest implementation of casing; use static_lazy --- Cargo.lock | 1 + Cargo.toml | 1 + src/casing.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 2 ++ 4 files changed, 52 insertions(+) create mode 100644 src/casing.rs diff --git a/Cargo.lock b/Cargo.lock index 6e1fe9d..eb61e41 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -100,6 +100,7 @@ version = "1.1.0" dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "onig 4.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index fe7cc17..25827e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,6 +17,7 @@ log = "0.4" onig = "4.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +lazy_static = "1.3.0" [build-dependencies] serde = { version = "1.0", features = ["derive"] } diff --git a/src/casing.rs b/src/casing.rs new file mode 100644 index 0000000..c102947 --- /dev/null +++ b/src/casing.rs @@ -0,0 +1,48 @@ +use onig::Regex; + +pub fn to_snake_case(input: &str) -> String { + lazy_static! { + static ref EXTRA_SYMBOL: Regex = Regex::new(r"[^\w]|_").unwrap(); + static ref REV_WORD_BOUNDARY: Regex = Regex::new( + r"(?x) + \b # standard word boundary + |(?<=[a-z][A-Z])(?=\d*[A-Z]) # break on runs of uppercase e.g. A7Aa -> A7|Aa + |(?<=[a-z][A-Z])(?=\d*[a-z]) # break in runs of lowercase e.g a7Aa -> a7|Aa + |(?<=[A-Z])(?=\d*[a-z]) # ends with an uppercase e.g. a7A -> a7|A + ", + ) + .unwrap(); + } + let subbed: String = EXTRA_SYMBOL.replace_all(input, " ").chars().rev().collect(); + let words: Vec<&str> = REV_WORD_BOUNDARY + .split(&subbed) + .filter(|s| !s.trim().is_empty()) + .collect(); + words.join("_").to_lowercase().chars().rev().collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! case { + ($test:expr, $expect:expr) => { + assert_eq!(to_snake_case($test), $expect) + }; + } + #[test] + fn test_to_snake_case() { + // one word + case!("Aa", "aa"); + // two words + case!("aA", "a_a"); + // underscores are word boundaries + case!("_a__a_", "a_a"); + // mnemonics are considered words + case!("RAM", "ram"); + // numbers can be lowercase + case!("a7aAa", "a7a_aa"); + // numbers can be uppercase + case!("A7AAa", "a7a_aa"); + } +} diff --git a/src/lib.rs b/src/lib.rs index 117e8b3..2c4807f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,6 +5,8 @@ extern crate onig; #[macro_use] extern crate serde; extern crate serde_json; +#[macro_use] +extern crate lazy_static; mod ast; mod avro; From c6f7e6f98a6e022f636e36d62399c60acc02d669 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 15:49:18 -0700 Subject: [PATCH 16/19] Add comment for test case when normalizing property names --- src/ast.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ast.rs b/src/ast.rs index a93d585..4b9c794 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1217,6 +1217,8 @@ mod tests { tag.normalize_properties(false); assert_normalize(&tag, vec!["valid_name", "renamed_value_0", "_64bit"]); + // Test that numbers are properly prefixed with underscores after + // normalizing the case. tag.normalize_properties(true); assert_normalize(&tag, vec!["valid_name", "renamed_value_0", "_64bit"]); } From b8c9bb6e364970fdf2d62dee721f15de25ff697f Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Mon, 15 Jul 2019 16:17:53 -0700 Subject: [PATCH 17/19] Add docstring to to_snake_case --- src/casing.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/casing.rs b/src/casing.rs index c102947..6f2524c 100644 --- a/src/casing.rs +++ b/src/casing.rs @@ -1,8 +1,21 @@ use onig::Regex; +/// Normalize the casing of a string to be `snake_case`. +/// +/// This function produces strings that are transformed consistently from a +/// variety of different input casing. The rule-set for word boundaries are +/// derived from the withoutboats/heck crate. Underscores are considered word +/// boundaries in addition to the standard pattern e.g. `\b`. `camelCasing` is +/// detected by a lowercase followed by an uppercase. Numbers can take on either +/// case depending on the preceeding symbol. +/// +/// See: https://github.com/withoutboats/heck/blob/master/src/lib.rs#L7-L17 pub fn to_snake_case(input: &str) -> String { lazy_static! { static ref EXTRA_SYMBOL: Regex = Regex::new(r"[^\w]|_").unwrap(); + // This regex matches camelCase in reverse, since the lookbehind + // operation only accepts patterns of fixed length. Reversing let's us + // determine whether several digits will be uppercase or lowercase. static ref REV_WORD_BOUNDARY: Regex = Regex::new( r"(?x) \b # standard word boundary From d307cc37725b3bd05d305d9a495c45781f91375c Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 16 Jul 2019 12:57:59 -0700 Subject: [PATCH 18/19] Update documentation to be more specific --- src/ast.rs | 7 ++++--- src/casing.rs | 38 ++++++++++++++++++++++++++------------ tests/normalize_case.rs | 5 +++++ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/ast.rs b/src/ast.rs index 4b9c794..a219b9b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -322,7 +322,7 @@ impl Tag { } } - /// Get the path the the current tag in the context of the larger schema. + /// Get the path to the current tag in the context of the larger schema. /// /// Each tag in the schema can be unambiguously referenced by concatenating /// the name of tag with the tag's namespace. For example, a document may @@ -340,7 +340,7 @@ impl Tag { /// /// The fully qualified names are as follows: /// - /// * `root.attributes.timestamp` + /// * `root.environment.timestamp` /// * `root.payload.measurement` /// * `root.payload.timestamp` pub fn fully_qualified_name(&self) -> String { @@ -395,7 +395,8 @@ impl Tag { // Replace property names with the normalized property name if let Some(mut renamed) = Tag::normalize_name_bigquery(&key) { renamed = if normalize_case { - // heck::SnakeCase will strip all punctuation outside of word boundaries. + // snake_casing strips symbols outside of word + // boundaries e.g. _64bit -> 64bit Tag::normalize_numeric_prefix(to_snake_case(&renamed)) } else { renamed diff --git a/src/casing.rs b/src/casing.rs index 6f2524c..20137b9 100644 --- a/src/casing.rs +++ b/src/casing.rs @@ -1,27 +1,41 @@ use onig::Regex; -/// Normalize the casing of a string to be `snake_case`. +/// Normalize the case of a string to be `snake_case`. /// -/// This function produces strings that are transformed consistently from a -/// variety of different input casing. The rule-set for word boundaries are -/// derived from the withoutboats/heck crate. Underscores are considered word -/// boundaries in addition to the standard pattern e.g. `\b`. `camelCasing` is -/// detected by a lowercase followed by an uppercase. Numbers can take on either -/// case depending on the preceeding symbol. +/// This function produces internally-consistent snake-casing that performs well +/// in many situations. The rule-set for word boundaries are consistent with the +/// withoutboats/heck crate. Several benefits include treating embedded +/// mnemonics like `RAM` and `XMLHttpRequest` in an intuitive fashion. See +/// `tests/resources/casing/mps-diff-integration.csv` in the test sources for +/// empirical use of this casing logic. /// -/// See: https://github.com/withoutboats/heck/blob/master/src/lib.rs#L7-L17 +/// Underscores are considered word boundaries alongside the standard `\b` +/// pattern. Boundaries in `camelCasing` are found by instances of a lowercase +/// followed by an uppercase. Digits can be either lowercase or uppercase +/// depending on the case of the most recent letter. Sequences of underscores +/// are not significant and therefore cannot be used to encode other characters +/// e.g. `-` cannot be represented via `__` because `_` is a word boundary. +/// +/// ## References +/// +/// * [Reference Python3 implementation](https://github.com/acmiyaguchi/test-casing/blob/8ca3d68db512fd3a17868c0b08cc84909ebebbc7/src/main.py#L1-L34) +/// * [[withoutboats/heck] - Definition of a word boundary](https://github.com/withoutboats/heck/blob/093d56fbf001e1506e56dbfa38631d99b1066df1/src/lib.rs#L7-L17) +/// * [[RexEgg] - Regex Boundaries and Delimiters—Standard and Advanced](https://www.rexegg.com/regex-boundaries.html) +/// * [[StackOverflow] - RegEx to split camelCase or TitleCase (advanced)](https://stackoverflow.com/a/7599674) +/// * [[StackOverflow] - What's the technical reason for “lookbehind assertion MUST be fixed length” in regex?](https://stackoverflow.com/a/40078049) pub fn to_snake_case(input: &str) -> String { lazy_static! { static ref EXTRA_SYMBOL: Regex = Regex::new(r"[^\w]|_").unwrap(); // This regex matches camelCase in reverse, since the lookbehind - // operation only accepts patterns of fixed length. Reversing let's us - // determine whether several digits will be uppercase or lowercase. + // operation only accepts patterns of fixed length. This "inverted" + // lookahead can help determine whether a digit is lowercase or + // uppercase. static ref REV_WORD_BOUNDARY: Regex = Regex::new( r"(?x) \b # standard word boundary |(?<=[a-z][A-Z])(?=\d*[A-Z]) # break on runs of uppercase e.g. A7Aa -> A7|Aa - |(?<=[a-z][A-Z])(?=\d*[a-z]) # break in runs of lowercase e.g a7Aa -> a7|Aa - |(?<=[A-Z])(?=\d*[a-z]) # ends with an uppercase e.g. a7A -> a7|A + |(?<=[a-z][A-Z])(?=\d*[a-z]) # break on runs of lowercase e.g a7Aa -> a7|Aa + |(?<=[A-Z])(?=\d*[a-z]) # break on final uppercase e.g. a7A -> a7|A ", ) .unwrap(); diff --git a/tests/normalize_case.rs b/tests/normalize_case.rs index 354b0b0..e99960d 100644 --- a/tests/normalize_case.rs +++ b/tests/normalize_case.rs @@ -56,16 +56,21 @@ fn snake_case_test(case_name: &str) { #[test] fn test_snake_casing_alphanum_3() { + // all strings of length 3 drawn from the alphabet "aA7" snake_case_test("alphanum_3.csv"); } #[test] fn test_snake_casing_word_4() { + // all strings of length 4 drawn from the alphabet "aA7_" snake_case_test("word_4.csv"); } #[test] fn test_snake_casing_mps_diff_integration() { + // all column names from mozilla-pipeline-schemas affected by snake_casing + // https://github.com/mozilla/jsonschema-transpiler/pull/79#issuecomment-509839572 + // https://gist.github.com/acmiyaguchi/3f526c440b67ebe469bcb6ab2da5123f#file-readme-md snake_case_test("mps-diff-integration.csv"); } From bc9c0ec4aa5f515f09a4f909ab071762ad975616 Mon Sep 17 00:00:00 2001 From: Anthony Miyaguchi Date: Tue, 16 Jul 2019 12:59:18 -0700 Subject: [PATCH 19/19] Bump version to 1.2.0 --- Cargo.lock | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eb61e41..6068132 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -96,7 +96,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "jsonschema-transpiler" -version = "1.1.0" +version = "1.2.0" dependencies = [ "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 25827e9..a888817 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jsonschema-transpiler" -version = "1.1.0" +version = "1.2.0" authors = ["Anthony Miyaguchi "] description = "A tool to transpile JSON Schema into schemas for data processing" license = "MPL-2.0"