Skip to content

Commit

Permalink
Merge pull request #76 from mozilla/drop-fields
Browse files Browse the repository at this point in the history
Add a flag to drop fields instead of casting
  • Loading branch information
acmiyaguchi committed Jul 2, 2019
2 parents a532e90 + 79e9768 commit 3e688be
Show file tree
Hide file tree
Showing 18 changed files with 943 additions and 321 deletions.
161 changes: 84 additions & 77 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "jsonschema-transpiler"
version = "1.0.0"
version = "1.1.0"
authors = ["Anthony Miyaguchi <amiyaguchi@mozilla.com>"]
description = "A tool to transpile JSON Schema into schemas for data processing"
license = "MPL-2.0"
Expand Down
25 changes: 22 additions & 3 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ struct TestCase {
#[serde(default, skip_serializing_if = "Value::is_null")]
description: Value,
name: String,
// True if the schema does not involve ambiguous sections
#[serde(default)]
compatible: bool,
test: TestData,
}

Expand Down Expand Up @@ -70,20 +73,31 @@ fn write_avro_tests(mut outfile: &File, suite: &TestSuite) {
for case in &suite.tests {
let formatted = format!(
r##"
#[test]
#[test]{should_panic}
fn avro_{name}() {{
let input_data = r#"
{input_data}
"#;
let expected_data = r#"
{expected}
"#;
let mut context = Context {{
resolve_method: ResolveMethod::Cast,
}};
let input: Value = serde_json::from_str(input_data).unwrap();
let expected: Value = serde_json::from_str(expected_data).unwrap();
assert_eq!(expected, convert_avro(&input));
assert_eq!(expected, convert_avro(&input, context));
context.resolve_method = ResolveMethod::Panic;
convert_avro(&input, context);
}}
"##,
name = case.name,
should_panic = if case.compatible {
""
} else {
"\n#[should_panic]"
},
input_data = format_json(case.test.json.clone()),
expected = format_json(case.test.avro.clone()),
);
Expand All @@ -103,9 +117,12 @@ fn bigquery_{name}() {{
let expected_data = r#"
{expected}
"#;
let context = Context {{
resolve_method: ResolveMethod::Cast,
}};
let input: Value = serde_json::from_str(input_data).unwrap();
let expected: Value = serde_json::from_str(expected_data).unwrap();
assert_eq!(expected, convert_bigquery(&input));
assert_eq!(expected, convert_bigquery(&input, context));
}}
"##,
name = case.name,
Expand All @@ -126,6 +143,7 @@ fn main() {
write!(
avro_fp,
r#"use jst::convert_avro;
use jst::{{Context, ResolveMethod}};
use pretty_assertions::assert_eq;
use serde_json::Value;
"#
Expand All @@ -135,6 +153,7 @@ use serde_json::Value;
write!(
bq_fp,
r#"use jst::convert_bigquery;
use jst::{{Context, ResolveMethod}};
use pretty_assertions::assert_eq;
use serde_json::Value;
"#
Expand Down
36 changes: 19 additions & 17 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use super::jsonschema;
use super::Context;
use super::TranslateFrom;
use regex::Regex;
use std::collections::{HashMap, HashSet};

Expand Down Expand Up @@ -77,14 +79,12 @@ impl Map {

#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct Union {
items: Vec<Box<Tag>>,
items: Vec<Tag>,
}

impl Union {
pub fn new(items: Vec<Tag>) -> Self {
Union {
items: items.into_iter().map(Box::new).collect(),
}
Union { items }
}

/// Collapse a union of types into a structurally compatible type.
Expand All @@ -95,7 +95,7 @@ impl Union {
/// be consumed by the JSON type. In a similar fashion, a table schema is determined
/// to be nullable or required via occurances of null types in unions.
pub fn collapse(&self) -> Tag {
let is_null = self.items.iter().any(|x| x.is_null());
let is_null = self.items.iter().any(Tag::is_null);

if self.items.is_empty() {
panic!("empty union is not allowed")
Expand All @@ -109,15 +109,15 @@ impl Union {
};
}

let items: Vec<Box<Tag>> = self
let items: Vec<Tag> = self
.items
.iter()
.filter(|x| !x.is_null())
.map(|x| {
if let Type::Union(union) = &x.data_type {
let mut tag = union.collapse();
tag.name = x.name.clone();
Box::new(tag)
tag
} else {
x.clone()
}
Expand All @@ -128,7 +128,7 @@ impl Union {
// the preprocessing step, check for nullability based on the immediate level of tags
let nullable = is_null || items.iter().any(|tag| tag.nullable);

let data_type: Type = if items.iter().all(|x| x.is_atom()) {
let data_type: Type = if items.iter().all(Tag::is_atom) {
items
.into_iter()
.fold(Type::Null, |acc, x| match (acc, &x.data_type) {
Expand All @@ -153,7 +153,7 @@ impl Union {
Type::Atom(Atom::JSON)
}
})
} else if items.iter().all(|x| x.is_object()) {
} else if items.iter().all(Tag::is_object) {
items
.into_iter()
.fold(Type::Null, |acc, x| match (&acc, &x.data_type) {
Expand Down Expand Up @@ -185,7 +185,7 @@ impl Union {
let required: Option<HashSet<String>> =
match (&left.required, &right.required) {
(Some(x), Some(y)) => {
Some(x.intersection(&y).map(|x| x.to_string()).collect())
Some(x.intersection(&y).map(ToString::to_string).collect())
}
(Some(x), None) | (None, Some(x)) => Some(x.clone()),
_ => None,
Expand All @@ -201,7 +201,7 @@ impl Union {
Type::Atom(Atom::JSON)
}
})
} else if items.iter().all(|x| x.is_map()) {
} else if items.iter().all(Tag::is_map) {
let tags: Vec<Tag> = items
.into_iter()
.map(|x| match &x.data_type {
Expand All @@ -210,7 +210,7 @@ impl Union {
})
.collect();
Type::Map(Map::new(None, Union::new(tags).collapse()))
} else if items.iter().all(|x| x.is_array()) {
} else if items.iter().all(Tag::is_array) {
let tags: Vec<Tag> = items
.into_iter()
.map(|x| match &x.data_type {
Expand Down Expand Up @@ -494,7 +494,7 @@ impl Tag {
.map(String::as_str)
.map(Tag::rename_string_bigquery)
.filter(Option::is_some)
.map(|x| x.unwrap())
.map(Option::unwrap)
.collect();
Some(renamed)
}
Expand All @@ -504,13 +504,15 @@ impl Tag {
}
}

impl From<jsonschema::Tag> for Tag {
fn from(tag: jsonschema::Tag) -> Self {
impl TranslateFrom<jsonschema::Tag> for Tag {
type Error = &'static str;

fn translate_from(tag: jsonschema::Tag, _context: Context) -> Result<Self, Self::Error> {
let mut tag = tag.type_into_ast();
tag.infer_name();
tag.infer_nullability();
tag.is_root = true;
tag
Ok(tag)
}
}

Expand Down Expand Up @@ -674,7 +676,7 @@ mod tests {
};
let union = Tag {
data_type: Type::Union(Union {
items: vec![Box::new(test_int), Box::new(test_null)],
items: vec![test_int, test_null],
}),
..Default::default()
};
Expand Down
139 changes: 87 additions & 52 deletions src/avro.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
/// https://avro.apache.org/docs/current/spec.html
use super::ast;
use super::TranslateFrom;
use super::{Context, ResolveMethod};
use serde_json::{json, Value};

#[derive(Serialize, Deserialize, Debug)]
Expand Down Expand Up @@ -103,8 +105,10 @@ impl Default for Type {
}
}

impl From<ast::Tag> for Type {
fn from(tag: ast::Tag) -> Self {
impl TranslateFrom<ast::Tag> for Type {
type Error = String;

fn translate_from(tag: ast::Tag, context: Context) -> Result<Self, Self::Error> {
let mut tag = tag;
if tag.is_root {
// Name inference is run only from the root for the proper
Expand All @@ -115,6 +119,21 @@ impl From<ast::Tag> for Type {
tag.infer_name();
}
tag.infer_nullability();

let fmt_reason =
|reason: &str| -> String { format!("{} - {}", tag.fully_qualified_name(), reason) };
let handle_error = |reason: &str| -> Result<Type, Self::Error> {
let message = fmt_reason(reason);
match context.resolve_method {
ResolveMethod::Cast => {
warn!("{}", message);
Ok(Type::Primitive(Primitive::String))
}
ResolveMethod::Drop => Err(message),
ResolveMethod::Panic => panic!(message),
}
};

let data_type = match &tag.data_type {
ast::Type::Null => Type::Primitive(Primitive::Null),
ast::Type::Atom(atom) => Type::Primitive(match atom {
Expand All @@ -123,63 +142,76 @@ impl From<ast::Tag> for Type {
ast::Atom::Number => Primitive::Double,
ast::Atom::String => Primitive::String,
ast::Atom::Datetime => Primitive::String,
ast::Atom::JSON => {
warn!(
"{} - Treating subschema as JSON string",
tag.fully_qualified_name()
);
Primitive::String
}
ast::Atom::JSON => match handle_error("json atom") {
Ok(_) => Primitive::String,
Err(reason) => return Err(reason),
},
}),
ast::Type::Object(object) if object.fields.is_empty() => {
warn!(
"{} - Empty records are not supported, casting into a JSON string",
tag.fully_qualified_name()
);
Type::Primitive(Primitive::String)
}
ast::Type::Object(object) => {
let mut fields: Vec<Field> = object
.fields
.iter()
.map(|(k, v)| Field {
name: k.to_string(),
data_type: Type::from(*v.clone()),
default: if v.nullable { Some(json!(null)) } else { None },
..Default::default()
})
.collect();
fields.sort_by_key(|v| v.name.to_string());

let record = Record {
common: CommonAttributes {
// This is not a safe assumption
name: tag.name.clone().unwrap_or_else(|| "__UNNAMED__".into()),
namespace: tag.namespace.clone(),
..Default::default()
},
fields,
let mut fields: Vec<Field> = if object.fields.is_empty() {
Vec::new()
} else {
object
.fields
.iter()
.map(|(k, v)| {
let default = if v.nullable { Some(json!(null)) } else { None };
(
k.to_string(),
Type::translate_from(*v.clone(), context),
default,
)
})
.filter(|(_, v, _)| v.is_ok())
.map(|(name, data_type, default)| Field {
name,
data_type: data_type.unwrap(),
default,
..Default::default()
})
.collect()
};
if record.common.name == "__UNNAMED__" {
warn!("{} - Unnamed field", tag.fully_qualified_name());

if fields.is_empty() {
handle_error("empty object")?
} else {
fields.sort_by_key(|v| v.name.to_string());
let record = Record {
common: CommonAttributes {
// This is not a safe assumption
name: tag.name.clone().unwrap_or_else(|| "__UNNAMED__".into()),
namespace: tag.namespace.clone(),
..Default::default()
},
fields,
};
if record.common.name == "__UNNAMED__" {
warn!("{} - Unnamed field", tag.fully_qualified_name());
}
Type::Complex(Complex::Record(record))
}
Type::Complex(Complex::Record(record))
}
ast::Type::Array(array) => Type::Complex(Complex::Array(Array {
items: Box::new(Type::from(*array.items.clone())),
})),
ast::Type::Map(map) => Type::Complex(Complex::Map(Map {
values: Box::new(Type::from(*map.value.clone())),
})),
_ => {
warn!("{} - Unsupported conversion", tag.fully_qualified_name());
Type::Primitive(Primitive::String)
}
ast::Type::Array(array) => match Type::translate_from(*array.items.clone(), context) {
Ok(data_type) => Type::Complex(Complex::Array(Array {
items: Box::new(data_type),
})),
Err(_) => return Err(fmt_reason("untyped array")),
},
ast::Type::Map(map) => match Type::translate_from(*map.value.clone(), context) {
Ok(data_type) => Type::Complex(Complex::Map(Map {
values: Box::new(data_type),
})),
Err(_) => return Err(fmt_reason("untyped map value")),
},
_ => handle_error("unknown type")?,
};
if tag.nullable && !tag.is_null() {
Type::Union(vec![Type::Primitive(Primitive::Null), data_type])
Ok(Type::Union(vec![
Type::Primitive(Primitive::Null),
data_type,
]))
} else {
data_type
Ok(data_type)
}
}
}
Expand All @@ -199,8 +231,11 @@ mod tests {
}

fn assert_from_ast_eq(ast: Value, avro: Value) {
let context = Context {
resolve_method: ResolveMethod::Cast,
};
let tag: ast::Tag = serde_json::from_value(ast).unwrap();
let from_tag = Type::from(tag);
let from_tag = Type::translate_from(tag, context).unwrap();
assert_eq!(avro, json!(from_tag))
}

Expand Down
Loading

0 comments on commit 3e688be

Please sign in to comment.