Skip to content

Commit

Permalink
Align type coercion with missing number bounds
Browse files Browse the repository at this point in the history
When bounds are missing for numbers, both in XML schemata and JSON
schemata, the result is now always coerced the largest generic type.

The choice between LONG or DOUBLE depends on whether it is, or can be,
an integer number or not.

This fixes #1 and #2.
  • Loading branch information
opwvhk committed Sep 12, 2023
1 parent 36fcdca commit 6613374
Show file tree
Hide file tree
Showing 13 changed files with 74 additions and 20 deletions.
8 changes: 6 additions & 2 deletions src/main/java/opwvhk/avro/json/SchemaAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,9 @@ public ValueResolver createResolver(Schema readSchema) {
case INTEGER -> {
DecimalRange range = require(schemaProperties, SchemaProperties::isIntegerNumberRange,
"Integer numbers require an integer number range").numberRange();
int bitSize = range.integerBitSize(); // 0 if not specified
int bitSize = range.integerBitSize();
if (bitSize == 0) {
// Not specified: use LONG instead of throwing an error
yield Schema.create(Schema.Type.LONG);
} else if (bitSize <= 32) {
yield Schema.create(Schema.Type.INT);
Expand All @@ -187,7 +188,10 @@ public ValueResolver createResolver(Schema readSchema) {
case NUMBER -> {
DecimalRange range = schemaProperties.numberRange();
int precision = range.requiredPrecision();
if (precision < 7) {
if (precision == 0) {
// Not specified: use DOUBLE instead of throwing an error
yield Schema.create(Schema.Type.DOUBLE);
} else if (precision < 7) {
// Floats have a precision of ~7 digits
yield Schema.create(Schema.Type.FLOAT);
} else if (precision < 16) {
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/opwvhk/avro/util/DecimalRange.java
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,15 @@ public DecimalRange restrictTo(DecimalRange other) {
return new DecimalRange(lowerBound, includeLowerBound, upperBound, includeUpperBound);
}

/**
* Determine whether the range is bounded. A range is bounded if there is both a lower and upper bound.
*
* @return {@code true} if this range is bounded
*/
public boolean isBounded() {
return lowerBound != null && upperBound != null;
}

/**
* Determine whether the range can be an integer range.
*
Expand All @@ -169,6 +178,7 @@ public int integerBitSize() {
return Stream.of(lowerBound, upperBound).filter(Objects::nonNull)
.map(bd -> bd.setScale(0, DOWN))
.map(BigDecimal::toBigInteger)
.map(BigInteger::abs)
.mapToInt(BigInteger::bitLength)
.max()
.orElse(-1) + 1;
Expand Down
20 changes: 15 additions & 5 deletions src/main/java/opwvhk/avro/xml/TypeBuildingVisitor.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,13 @@ private ScalarType mapScalarType(TypeData typeData, XmlSchemaTypeInfo typeInfo)
} else if (XSD_LONG.equals(recognizedType)) {
return DecimalType.LONG_TYPE;
} else if (XSD_DECIMAL.equals(recognizedType)) {
// XSD_INT and XSD_LONG would match this branch, but they're overridden.
final int fractionDigits = restriction(typeInfo, DIGITS_FRACTION, Integer::valueOf).orElseThrow(
() -> new IllegalArgumentException("xs:decimal without precision"));
// XSD_INT and XSD_LONG would match this branch if it weren't overridden via XsdAnalyzer.USER_RECOGNIZED_TYPES
Optional<Integer> fractionDigitsRestriction = restriction(typeInfo, DIGITS_FRACTION, Integer::valueOf);
if (fractionDigitsRestriction.isEmpty()) {
// xs:decimal without scale -> use a double instead
return FixedType.DOUBLE;
}
int fractionDigits = fractionDigitsRestriction.get();
Optional<Integer> totalDigits = restriction(typeInfo, DIGITS_TOTAL, Integer::valueOf);
UnaryOperator<BigDecimal> round = bd -> bd.setScale(fractionDigits, HALF_UP);
UnaryOperator<BigDecimal> incULP = bd -> new BigDecimal(bd.unscaledValue().add(ONE), fractionDigits);
Expand All @@ -210,8 +214,14 @@ private ScalarType mapScalarType(TypeData typeData, XmlSchemaTypeInfo typeInfo)
Optional<BigDecimal> maxDigits = totalDigits.map(BigDecimal.TEN::pow).map(decULP);
Optional<BigDecimal> minDigits = maxDigits.map(BigDecimal::negate);

int numberOfDigits = Stream.concat(totalDigits.stream(), Stream.of(minInclusive, minExclusive, maxInclusive, maxExclusive)
.flatMap(Optional::stream).map(BigDecimal::precision)).max(Integer::compareTo).orElse(Integer.MAX_VALUE);
Optional<Integer> calculatedNumberOfDigits = Stream.concat(totalDigits.stream(),
Stream.of(minInclusive, minExclusive, maxInclusive, maxExclusive).flatMap(Optional::stream).map(BigDecimal::precision))
.max(Integer::compareTo);
if (fractionDigits > 0 && calculatedNumberOfDigits.isEmpty()) {
// xs:decimal with scale, but without precision in any form -> use a double instead
return FixedType.DOUBLE;
}
int numberOfDigits = calculatedNumberOfDigits.orElse(Integer.MAX_VALUE);
if (fractionDigits > 0) {
return DecimalType.withFraction(numberOfDigits, fractionDigits);
} else {
Expand Down
1 change: 1 addition & 0 deletions src/main/java/opwvhk/avro/xml/XsdAnalyzer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.ws.commons.schema.utils.NamespacePrefixList;
import org.apache.ws.commons.schema.walker.FixedXmlSchemaWalker;
import org.apache.ws.commons.schema.walker.XmlSchemaVisitor;
import org.apache.ws.commons.schema.walker.XmlSchemaWalker;
import org.xml.sax.helpers.DefaultHandler;

import static java.nio.charset.StandardCharsets.UTF_8;
Expand Down
1 change: 1 addition & 0 deletions src/test/java/opwvhk/avro/SchemaManipulatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public void testSortedDocumentationViaJsonSchema() throws GenerationException, U
| singleFloat? | float | |
| doubleFloat? | double | |
| fixedPoint? | decimal(17,6) | |
| defaultNumber? | double | |
| choice | enum | |
| date? | date | |
| time? | time-millis | |
Expand Down
10 changes: 5 additions & 5 deletions src/test/java/opwvhk/avro/json/JsonAsAvroParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ public void testMostTypesWithJsonSchema() throws IOException, GenerationExceptio

assertThat(fullRecord.toString()).isEqualTo(
("{'bool': true, 'shortInt': 42, 'longInt': 6789012345, 'hugeInt': 123456789012345678901, 'defaultInt': 4242, " +
"'singleFloat': 123.456, 'doubleFloat': 1234.56789, 'fixedPoint': 12345678901.123456, 'choice': 'maybe', " +
"'date': '2023-04-17', 'time': '17:08:34.567123Z', 'timestamp': '2023-04-17T17:08:34.567123Z', " +
"'singleFloat': 123.456, 'doubleFloat': 1234.56789, 'fixedPoint': 12345678901.123456, 'defaultNumber': 98765.4321, " +
"'choice': 'maybe', 'date': '2023-04-17', 'time': '17:08:34.567123Z', 'timestamp': '2023-04-17T17:08:34.567123Z', " +
"'binary': 'Hello World!', 'hexBytes': 'Hello World!', 'texts': ['Hello', 'World!'], 'weirdStuff': {" +
"'explanation': 'No reason. I just felt like it.', 'fancy': '\uD83D\uDE04! You are on Candid Camera! \uD83D\uDCF9\uD83C\uDF4C', " +
"'rabbitHole': null}}").replace('\'', '"'));
Expand Down Expand Up @@ -95,8 +95,8 @@ public void testMostTypesFromAvroSchema() throws IOException {

assertThat(fullRecord.toString()).isEqualTo(
("{'bool': true, 'shortInt': 42, 'longInt': 6789012345, 'hugeInt': 123456789012345678901, 'defaultInt': 4242, " +
"'singleFloat': 123.456, 'doubleFloat': 1234.56789, 'fixedPoint': 12345678901.123456, 'choice': 'maybe', " +
"'date': '2023-04-17', 'time': '17:08:34.567123Z', 'timestamp': '2023-04-17T17:08:34.567123Z', " +
"'singleFloat': 123.456, 'doubleFloat': 1234.56789, 'fixedPoint': 12345678901.123456, 'defaultNumber': 98765.4321, " +
"'choice': 'maybe', 'date': '2023-04-17', 'time': '17:08:34.567123Z', 'timestamp': '2023-04-17T17:08:34.567123Z', " +
"'texts': ['Hello', 'World!'], 'weirdStuff': {'explanation': 'No reason. I just felt like it.', " +
"'fancy': '\uD83D\uDE04! You are on Candid Camera! \uD83D\uDCF9\uD83C\uDF4C', 'rabbitHole': null}}").replace('\'', '"'));
}
Expand All @@ -117,7 +117,7 @@ public void testMinimalRecordFromAvroSchema() throws IOException {

assertThat(minimalRecord.toString()).isEqualTo(
("{'bool': false, 'shortInt': null, 'longInt': null, 'hugeInt': null, 'defaultInt': 42, " +
"'singleFloat': null, 'doubleFloat': null, 'fixedPoint': null, 'choice': 'no', " +
"'singleFloat': null, 'doubleFloat': null, 'fixedPoint': null, 'defaultNumber': 4.2, 'choice': 'no', " +
"'date': null, 'time': null, 'timestamp': null, " +
"'texts': [], 'weirdStuff': {'explanation': 'Please explain why', " +
"'fancy': null, 'rabbitHole': null}}").replace('\'', '"'));
Expand Down
17 changes: 15 additions & 2 deletions src/test/java/opwvhk/avro/util/DecimalRangeTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ public void testRangeInspection() {
assertThat(new DecimalRange(ZERO, true, ONE, true).isIntegerRange(true)).isTrue();

DecimalRange leftOpen = openClosed(null, ONE_AND_A_HALF);
assertThat(leftOpen.isBounded()).isFalse();
assertThat(leftOpen.isIntegerRange(false)).isFalse();
assertThat(leftOpen.isIntegerRange(true)).isFalse();
assertThat(leftOpen.requiredPrecision()).isEqualTo(2);
assertThat(leftOpen.requiredScale()).isEqualTo(1);

Expand All @@ -58,8 +61,18 @@ public void testRangeInspection() {
assertThat(rightOpen.requiredScale()).isEqualTo(1);

// Required bits include a sign bit
assertThat(closedOpen(ONE, new BigDecimal("31.00")).integerBitSize()).isEqualTo(6);
assertThat(closedOpen(ONE, null).integerBitSize()).isEqualTo(2);
DecimalRange range_1_31 = closedOpen(ONE, new BigDecimal("31.00"));
assertThat(range_1_31.integerBitSize()).isEqualTo(6);
assertThat(range_1_31.isBounded()).isTrue();
assertThat(range_1_31.isIntegerRange(false)).isFalse();
assertThat(range_1_31.isIntegerRange(true)).isTrue();

DecimalRange range_m1_null = closedOpen(ONE.negate(), null);
assertThat(range_m1_null.integerBitSize()).isEqualTo(2);
assertThat(range_m1_null.isBounded()).isFalse();
assertThat(range_m1_null.isIntegerRange(false)).isTrue();
assertThat(range_m1_null.isIntegerRange(true)).isTrue();

assertThat(openClosed(null, ONE).integerBitSize()).isEqualTo(2);
assertThat(openOpen(null, null).integerBitSize()).isEqualTo(0);
}
Expand Down
15 changes: 10 additions & 5 deletions src/test/java/opwvhk/avro/xml/XsdAnalyzerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import java.util.Objects;

import opwvhk.avro.xml.datamodel.DecimalType;
import opwvhk.avro.xml.datamodel.FixedType;
import opwvhk.avro.xml.datamodel.StructType;
import opwvhk.avro.xml.datamodel.Type;
import org.apache.avro.Schema;
Expand Down Expand Up @@ -581,7 +582,10 @@ public void unknownTypesAreNotAllowed() {

@Test
public void unconstrainedDecimalAttributesAreNotAllowed() {
assertFailureCreatingStructTypeFor("unconstrainedDecimalAttribute");
Type type = analyzer.typeOf("unconstrainedDecimalAttribute");
assertThat(type).isEqualTo(struct("namespace.unconstrainedDecimalAttribute").withFields(
optional("value", DOUBLE)
));
}

@Test
Expand All @@ -593,14 +597,15 @@ public void unconstrainedIntegerAttributesAreCoercedToLong() {
}

@Test
public void decimalsMustHaveConstraints() {
assertFailureCreatingStructTypeFor("unconstrainedDecimal");
public void unconstrainedDecimalsBecomeDouble() {
Type type = analyzer.typeOf("unconstrainedDecimal");
assertThat(type).isEqualTo(DOUBLE);
}

@Test
public void decimalsCanBeInfinite() {
public void infiniteDecimalsBecomeDouble() {
Type type = analyzer.typeOf("unboundedDecimal");
assertThat(type).is(decimalWithPrecisionAndScale(Integer.MAX_VALUE, 6));
assertThat(type).isEqualTo(DOUBLE);
}

@Test
Expand Down
1 change: 1 addition & 0 deletions src/test/resources/opwvhk/avro/json/TestRecord-full.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"singleFloat": 123.456,
"doubleFloat": 1234.56789,
"fixedPoint": 12345678901.123456,
"defaultNumber": 98765.4321,
"choice": "maybe",
"date": "2023-04-17",
"time": "17:08:34.567123",
Expand Down
3 changes: 2 additions & 1 deletion src/test/resources/opwvhk/avro/json/TestRecord.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@
{ "name": "shortInt", "type": ["null", "int"], "default": null },
{ "name": "longInt", "type": ["null", "long"], "default": null },
{ "name": "hugeInt", "type": ["null", { "type": "bytes", "logicalType": "decimal", "precision": 21, "scale": 0 }], "default": null },
{ "name": "defaultInt", "type": "long", "default": 42, "$comment": "the default differs from the JSON schema" },
{ "name": "defaultInt", "type": "long", "default": 24, "$comment": "the default differs from the JSON schema" },
{ "name": "singleFloat", "type": ["null", "float"], "default": null },
{ "name": "doubleFloat", "type": ["null", "double"], "default": null },
{ "name": "fixedPoint", "type": ["null", { "type": "bytes", "logicalType": "decimal", "precision": 17, "scale": 6 }], "default": null },
{ "name": "defaultNumber", "type": "double", "default": 2.4, "$comment": "the default differs from the JSON schema" },
{ "name": "choice", "type": { "type": "enum", "name": "choice", "symbols": ["yes", "no", "maybe"] } },
{ "name": "date", "type": ["null", { "type": "int", "logicalType": "date" }], "default": null },
{ "name": "time", "type": ["null", { "type": "int", "logicalType": "time-millis" }], "default": null },
Expand Down
6 changes: 6 additions & 0 deletions src/test/resources/opwvhk/avro/json/TestRecord.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
},
"defaultInt": {
"type": "integer",
"$comment": "no bounds, so the type defaults to LONG",
"default": 42
},
"singleFloat": {
Expand All @@ -45,6 +46,11 @@
"minimum": -9999999999.999999,
"maximum": 99999999999.99999
},
"defaultNumber": {
"type": "number",
"$comment": "no bounds, so the type defaults to DOUBLE",
"default": 4.2
},
"choice": {
"enum": ["yes", "no", "maybe"]
},
Expand Down
1 change: 1 addition & 0 deletions src/test/resources/opwvhk/avro/json/TestRecordAll.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
{ "name": "singleFloat", "type": ["null", "float"], "default": null },
{ "name": "doubleFloat", "type": ["null", "double"], "default": null },
{ "name": "fixedPoint", "type": ["null", { "type": "bytes", "logicalType": "decimal", "precision": 17, "scale": 6 }], "default": null },
{ "name": "defaultNumber", "type": ["double", "null"], "default": 4.2 },
{ "name": "choice", "type": { "type": "enum", "name": "choice", "symbols": ["yes", "no", "maybe"] } },
{ "name": "date", "type": ["null", { "type": "int", "logicalType": "date" }], "default": null },
{ "name": "time", "type": ["null", { "type": "int", "logicalType": "time-millis" }], "default": null },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
{ "name": "singleFloat", "type": ["null", "float"], "default": null },
{ "name": "doubleFloat", "type": ["null", "double"], "default": null },
{ "name": "fixedPoint", "type": ["null", { "type": "bytes", "logicalType": "decimal", "precision": 17, "scale": 6 }], "default": null },
{ "name": "defaultNumber", "type": ["double", "null"], "default": 4.2 },
{ "name": "choice", "type": { "type": "enum", "name": "choice", "symbols": ["yes", "no", "maybe"] } },
{ "name": "date", "type": ["null", { "type": "int", "logicalType": "date" }], "default": null },
{ "name": "time", "type": ["null", { "type": "int", "logicalType": "time-millis" }], "default": null },
Expand Down

0 comments on commit 6613374

Please sign in to comment.