Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed two bugs in FastDeserializerGenerator #46

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,11 @@ private void processRecord(JVar recordSchemaVar, String recordName, final Schema

private void updateActualExceptions(JMethod method) {
Set<Class<? extends Exception>> exceptionFromMethod = exceptionFromMethodMap.get(method);
for (Class<? extends Exception> exceptionClass : exceptionFromMethod) {
method._throws(exceptionClass);
schemaAssistant.getExceptionsFromStringable().add(exceptionClass);
if (exceptionFromMethod != null) {
for (Class<? extends Exception> exceptionClass : exceptionFromMethod) {
method._throws(exceptionClass);
schemaAssistant.getExceptionsFromStringable().add(exceptionClass);
}
}
}

Expand Down Expand Up @@ -696,9 +698,20 @@ private void processMap(JVar mapSchemaVar, final String name, final Schema mapSc
JBlock parentBody, FieldAction action, BiConsumer<JBlock, JExpression> putMapIntoParent,
Supplier<JExpression> reuseSupplier) {

/**
* Determine the action symbol for Map value. {@link ResolvingGrammarGenerator} generates
* resolving grammar symbols with reversed order of production sequence. If this symbol is
* a terminal, its production list will be <tt>null</tt>. Otherwise the production list
* holds the the sequence of the symbols that forms this symbol.
*
* The {@link FastDeserializerGenerator.generateDeserializer} tries to proceed as a depth-first,
* left-to-right traversal of the schema. So for a nested Map, we need to iterate production list
* in reverse order to get the correct "map-end" symbol of internal Maps.
*/
if (action.getShouldRead()) {
Symbol valuesActionSymbol = null;
for (Symbol symbol : action.getSymbol().production) {
for (int i = action.getSymbol().production.length - 1; i >= 0; --i) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you explain more about this change?
Why sequence matters in this situation?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's for nested Map deserializer generation. The "symbol.production" generated by ResolvingGrammarGenerator is in reversed order, and the REPEATER symbol holds the sequence of the symbols that forms the production for it.

Resolving grammar of nested Map is like:

- symbol/"map-end"/"REPEATER"                ----    1st layer Map
  - production
    - symbol/"map-end"/"REPEATER"            ----    1st layer Map
    - symbol/"map-end"/"REPEATER"            ----    2nd layer Map
    - symbol/"map-start"/"TERMINAL"          ----    2nd layer Map
- symbol/"map-start"/"TERMINAL"              ----    1st layer Map

The fastDeserializer generator tries to proceed as a depth-first, left-to-right traversal of the schema. So when it is processing the 2nd layer of Map (production of 1st layer Map Symbol), we need to iterate production list in reverse order to get the correct 2nd layer "map-end" symbol.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

needs at least some comment in the code :-)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed and good catch!
This logic seems tricky, and You can put your explanation as the javadoc for this section.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Added comments for the change.

Symbol symbol = action.getSymbol().production[i];
if (Symbol.Kind.REPEATER.equals(symbol.kind) && "map-end".equals(
getSymbolPrintName(((Symbol.Repeater) symbol).end))) {
valuesActionSymbol = symbol;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -677,6 +677,70 @@ public void shouldReadMapOfRecords(Boolean whetherUseFastDeserializer) {
Assert.assertEquals(new Utf8("abc"), map.get(new Utf8("2")).get("field"));
}

@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldReadNestedMap(Boolean whetherUseFastDeserializer) {
// given
Schema nestedMapSchema = createRecord("record", createMapFieldSchema(
"mapField", Schema.createMap(Schema.createArray(Schema.create(Schema.Type.INT)))));

Map<String, List> value = new HashMap<>();
value.put("subKey1", Arrays.asList(1));
value.put("subKey2", Arrays.asList(2));
Map<String, Map<String, List>> mapField = new HashMap<>();
mapField.put("key1", value);

GenericData.Record recordData = new GenericData.Record(nestedMapSchema);
recordData.put("mapField", mapField);

// when
GenericData.Record decodedRecord = null;
if (whetherUseFastDeserializer) {
decodedRecord = decodeRecordFast(nestedMapSchema, nestedMapSchema,
FastSerdeTestsSupport.genericDataAsDecoder(recordData, nestedMapSchema));
} else {
decodedRecord = decodeRecordSlow(nestedMapSchema, nestedMapSchema,
FastSerdeTestsSupport.genericDataAsDecoder(recordData, nestedMapSchema));
}

// then
Object decodedMapField = decodedRecord.get("mapField");
Assert.assertEquals("{key1={subKey1=[1], subKey2=[2]}}", decodedMapField.toString());
Assert.assertTrue(decodedMapField instanceof Map);
Object subMap = ((Map) decodedMapField).get(new Utf8("key1"));
Assert.assertTrue(subMap instanceof Map);
Assert.assertEquals(Arrays.asList(1), ((List) ((Map) subMap).get(new Utf8("subKey1"))));
Assert.assertEquals(Arrays.asList(2), ((List) ((Map) subMap).get(new Utf8("subKey2"))));
}

@Test(groups = {"deserializationTest"}, dataProvider = "SlowFastDeserializer")
public void shouldReadRecursiveUnionRecord(Boolean whetherUseFastDeserializer) {
// given
Schema unionRecordSchema = Schema.parse("{\"type\":\"record\",\"name\":\"recordName\",\"namespace\":\"com.linkedin.avro.fastserde.generated.avro\",\"fields\":[{\"name\":\"strField\",\"type\":\"string\"},{\"name\":\"unionField\",\"type\":[\"null\",\"recordName\"]}]}");

GenericData.Record recordData = new GenericData.Record(unionRecordSchema);
recordData.put("strField", "foo");

GenericData.Record unionField = new GenericData.Record(unionRecordSchema);
unionField.put("strField", "bar");
recordData.put("unionField", unionField);

// when
GenericData.Record decodedRecord = null;
if (whetherUseFastDeserializer) {
decodedRecord = decodeRecordFast(unionRecordSchema, unionRecordSchema,
FastSerdeTestsSupport.genericDataAsDecoder(recordData, unionRecordSchema));
} else {
decodedRecord = decodeRecordSlow(unionRecordSchema, unionRecordSchema,
FastSerdeTestsSupport.genericDataAsDecoder(recordData, unionRecordSchema));
}

// then
Assert.assertEquals(new Utf8("foo"), decodedRecord.get("strField"));
Object decodedUnionField = decodedRecord.get("unionField");
Assert.assertTrue(decodedUnionField instanceof GenericData.Record);
Assert.assertEquals(new Utf8("bar"), ((GenericData.Record) decodedUnionField).get("strField"));
}

public <T> T decodeRecordFast(Schema writerSchema, Schema readerSchema, Decoder decoder) {
FastDeserializer<T> deserializer =
new FastGenericDeserializerGenerator<T>(writerSchema, readerSchema, tempDir, classLoader,
Expand Down