Permalink
Browse files

SI-1952: (1) Enable file resolution for SchemaTranslator and (2) prop…

…agate validation options to all parsers spowned during schema translation.

RB=504603
R=cpettitt,gogenc,kjin,brajkuma,mtagle
A=kjin,brajkuma
  • Loading branch information...
1 parent d002cbc commit 169b8d14a1c7ef7641a48e03f889576c1fecc738 @chrisfong chrisfong committed Jun 18, 2015
View
@@ -1,7 +1,12 @@
-2.6.4
+2.7.1
-----
+2.7.0
+-----
+(RB=504603)
+Enable file resolution for SchemaTranslator and propagate validation options to all parsers spawned during schema translation.
+
2.6.3
-----
(RB=501374)
@@ -21,3 +21,8 @@ idea.module.iml.whenMerged { module ->
}
module.dependencies = jarDeps + module.dependencies
}
+
+sourceSets.test.resources {
+ final dataAvroProject = project.evaluationDependsOn(':data-avro')
+ it.source(dataAvroProject.sourceSets.test.resources)
+}
@@ -60,5 +60,23 @@ public AvroToDataSchemaTranslationMode getTranslationMode()
return _translationMode;
}
+ /**
+ * Set the Avro schema search paths, delimited by the default path separator.
+ */
+ public AvroToDataSchemaTranslationOptions setFileResolutionPaths(String schemaResolverPaths)
+ {
+ _schemaResolverPaths = schemaResolverPaths;
+ return this;
+ }
+
+ /**
+ * Returns the Avro schema search paths, delimited by the default path separator.
+ */
+ public String getFileResolutionPaths()
+ {
+ return _schemaResolverPaths;
+ }
+
private AvroToDataSchemaTranslationMode _translationMode;
+ private String _schemaResolverPaths = null;
}
@@ -25,6 +25,7 @@
import com.linkedin.data.schema.ArrayDataSchema;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaConstants;
+import com.linkedin.data.schema.DataSchemaResolver;
import com.linkedin.data.schema.DataSchemaTraverse;
import com.linkedin.data.schema.EnumDataSchema;
import com.linkedin.data.schema.FixedDataSchema;
@@ -33,6 +34,9 @@
import com.linkedin.data.schema.SchemaParser;
import com.linkedin.data.schema.SchemaParserFactory;
import com.linkedin.data.schema.UnionDataSchema;
+import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver;
+import com.linkedin.data.schema.resolver.FileDataSchemaResolver;
+import com.linkedin.data.schema.validation.ValidationOptions;
import com.linkedin.data.template.DataTemplateUtil;
import java.util.ArrayList;
@@ -58,6 +62,7 @@
public static final String DATA_PROPERTY = "com.linkedin.data";
public static final String SCHEMA_PROPERTY = "schema";
public static final String OPTIONAL_DEFAULT_MODE_PROPERTY = "optionalDefaultMode";
+ public static final String AVRO_FILE_EXTENSION = ".avsc";
private SchemaTranslator()
{
@@ -114,8 +119,13 @@ private SchemaTranslator()
public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSchemaTranslationOptions options)
throws IllegalArgumentException
{
- SchemaParser parser = SchemaParserFactory.instance().create(null);
- parser.getValidationOptions().setAvroUnionMode(true);
+ ValidationOptions validationOptions = SchemaParser.getDefaultSchemaParserValidationOptions();
+ validationOptions.setAvroUnionMode(true);
+
+ SchemaParserFactory parserFactory = SchemaParserFactory.instance(validationOptions);
+
+ DataSchemaResolver resolver = getResolver(parserFactory, options);
+ SchemaParser parser = parserFactory.create(resolver);
parser.parse(avroSchemaInJson);
if (parser.hasError())
{
@@ -314,6 +324,24 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchem
return schemaJson;
}
+ /**
+ * Allows caller to specify a file path for schema resolution.
+ */
+ private static DataSchemaResolver getResolver(SchemaParserFactory parserFactory, AvroToDataSchemaTranslationOptions options)
+ {
+ String resolverPath = options.getFileResolutionPaths();
+ if (resolverPath != null)
+ {
+ FileDataSchemaResolver resolver = new FileDataSchemaResolver(parserFactory, resolverPath);
+ resolver.setExtension(AVRO_FILE_EXTENSION);
+ return resolver;
+ }
+ else
+ {
+ return new DefaultDataSchemaResolver(parserFactory);
+ }
+ }
+
interface FieldDefaultValueProvider
{
Object defaultValue(RecordDataSchema.Field field);
@@ -16,7 +16,6 @@
package com.linkedin.data.avro;
-
import com.linkedin.data.Data;
import com.linkedin.data.DataMap;
import com.linkedin.data.TestUtil;
@@ -25,10 +24,11 @@
import com.linkedin.data.schema.SchemaParser;
import com.linkedin.data.schema.SchemaToJsonEncoder;
import com.linkedin.data.schema.validation.ValidationOptions;
-
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
-
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
@@ -47,6 +47,8 @@
public class TestSchemaTranslator
{
+ private static final String FS = File.separator;
+
static public GenericRecord genericRecordFromString(String jsonString, Schema writerSchema, Schema readerSchema) throws IOException
{
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(writerSchema, readerSchema);
@@ -1582,7 +1584,73 @@ public void testUnionDefaultValues() throws IOException
parser.parse(readerSchemaText);
if (debug) System.out.println(parser.errorMessage());
assertFalse(parser.hasError());
+ }
+ }
+
+ @Test
+ public void testAvroUnionModeChaining() throws IOException
+ {
+ String expectedSchema = "{ " +
+ " \"type\" : \"record\", " +
+ " \"name\" : \"A\", " +
+ " \"namespace\" : \"com.linkedin.pegasus.test\", " +
+ " \"fields\" : [ " +
+ " { " +
+ " \"name\" : \"someBorC\", " +
+ " \"type\" : [ " +
+ " { " +
+ " \"type\" : \"record\", " +
+ " \"name\" : \"B\", " +
+ " \"fields\" : [ " +
+ " { " +
+ " \"name\" : \"someAorC\", " +
+ " \"type\" : [ " +
+ " \"A\", " +
+ " { " +
+ " \"type\" : \"record\", " +
+ " \"name\" : \"C\", " +
+ " \"fields\" : [ " +
+ " { " +
+ " \"name\" : \"something\", " +
+ " \"type\" : \"int\", " +
+ " \"optional\" : true, " +
+ " \"default\" : 42" +
+ " } " +
+ " ] " +
+ " } " +
+ " ] " +
+ " } " +
+ " ] " +
+ " }, " +
+ " \"C\" " +
+ " ] " +
+ " } " +
+ " ]" +
+ "}";
+ String avroRootUrl = getClass().getClassLoader().getResource("avro").getFile();
+ String avroRootDir = new File(avroRootUrl).getAbsolutePath();
+ String avroFilePath = avroRootDir + FS + "com" + FS + "linkedin" + FS + "pegasus" + FS + "test" + FS + "A.avsc";
+ File avroFile = new File(avroFilePath);
+
+ String schema = readFile(avroFile);
+ AvroToDataSchemaTranslationOptions options =
+ new AvroToDataSchemaTranslationOptions(AvroToDataSchemaTranslationMode.TRANSLATE).setFileResolutionPaths(avroRootDir);
+ DataSchema pdscSchema = SchemaTranslator.avroToDataSchema(schema, options);
+ DataMap actual = TestUtil.dataMapFromString(pdscSchema.toString());
+ DataMap expected = TestUtil.dataMapFromString(expectedSchema);
+ assertEquals(actual, expected);
+ }
+
+ private static String readFile(File file) throws IOException
+ {
+ BufferedReader br = new BufferedReader(new FileReader(file));
+ StringBuilder sb = new StringBuilder();
+ String line;
+ while((line = br.readLine()) != null)
+ {
+ sb.append(line + "\n");
}
+ return sb.toString();
}
}
@@ -0,0 +1,9 @@
+{
+ "type" : "record",
+ "name" : "A",
+ "namespace" : "com.linkedin.pegasus.test",
+ "fields" : [ {
+ "name" : "someBorC",
+ "type" : ["B", "C"]
+ } ]
+}
@@ -0,0 +1,9 @@
+{
+ "type" : "record",
+ "name" : "B",
+ "namespace" : "com.linkedin.pegasus.test",
+ "fields" : [ {
+ "name" : "someAorC",
+ "type" : ["A", "C"]
+ } ]
+}
@@ -0,0 +1,10 @@
+{
+ "type" : "record",
+ "name" : "C",
+ "namespace" : "com.linkedin.pegasus.test",
+ "fields" : [ {
+ "name" : "something",
+ "type" : ["int", "null"],
+ "default" : 42
+ } ]
+}
@@ -1151,6 +1151,11 @@ public StringBuilder errorMessageBuilder()
return _dataLocationMap;
}
+ public static final ValidationOptions getDefaultSchemaParserValidationOptions()
+ {
+ return new ValidationOptions(RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT, CoercionMode.NORMAL);
+ }
+
/**
* Current namespace, used to determine full name from unqualified name.
*/
@@ -1160,5 +1165,5 @@ public StringBuilder errorMessageBuilder()
private final DataSchemaResolver _resolver;
private final Map<Object, DataLocation> _dataLocationMap = new IdentityHashMap<Object, DataLocation>();
private StringBuilder _errorMessageBuilder = new StringBuilder();
- private ValidationOptions _validationOptions = new ValidationOptions(RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT, CoercionMode.NORMAL);
+ private ValidationOptions _validationOptions = getDefaultSchemaParserValidationOptions();
}
@@ -16,27 +16,53 @@
package com.linkedin.data.schema;
+import com.linkedin.data.schema.validation.ValidationOptions;
+import java.util.HashMap;
+import java.util.Map;
+
public class SchemaParserFactory
{
/**
- * Create a new parser that will use the specified resolver.
+ * Create a new parser that will use the specified resolver and validation options.
*
* @param resolver to be provided to the parser.
* @return a new parser.
*/
public SchemaParser create(DataSchemaResolver resolver)
{
- return new SchemaParser(resolver);
+ SchemaParser parser = new SchemaParser(resolver);
+ if (_validationOptions != null)
+ {
+ parser.setValidationOptions(_validationOptions);
+ }
+ return parser;
}
- protected SchemaParserFactory()
+ protected SchemaParserFactory(ValidationOptions validationOptions)
{
+ _validationOptions = validationOptions;
}
static public final SchemaParserFactory instance()
{
- return _instance;
+ return instance(null);
+ }
+
+ static public final SchemaParserFactory instance(ValidationOptions validationOptions)
+ {
+ if (factoryMap.containsKey(validationOptions))
+ {
+ return factoryMap.get(validationOptions);
+ }
+ else
+ {
+ SchemaParserFactory factory = new SchemaParserFactory(validationOptions);
+ factoryMap.put(validationOptions, factory);
+ return factory;
+ }
}
- static private final SchemaParserFactory _instance = new SchemaParserFactory();
+ static private final Map<ValidationOptions, SchemaParserFactory> factoryMap =
+ new HashMap<ValidationOptions, SchemaParserFactory>();
+ private final ValidationOptions _validationOptions;
}
@@ -338,4 +338,4 @@ public String toString()
private final Set<DataSchemaLocation> _resolvedLocations = new HashSet<DataSchemaLocation>();
protected static final PrintStream out = new PrintStream(new FileOutputStream(FileDescriptor.out));
-}
+}
@@ -18,6 +18,7 @@
import com.linkedin.data.schema.DataSchemaLocation;
+import com.linkedin.data.schema.SchemaParserFactory;
import java.io.InputStream;
import java.util.Collections;
import java.util.Iterator;
@@ -35,6 +36,11 @@ public DefaultDataSchemaResolver()
super(null);
}
+ public DefaultDataSchemaResolver(SchemaParserFactory parserFactory)
+ {
+ super(parserFactory);
+ }
+
private static final Iterator<DataSchemaLocation> _it = Collections.<DataSchemaLocation>emptyList().iterator();
@Override
@@ -121,7 +121,7 @@ public RequiredMode getRequiredMode()
/**
* Set the required mode that indicates how required fields should be handled during validation.
- *
+ *
* @param requiredMode specifies the required mode.
*/
public void setRequiredMode(RequiredMode requiredMode)
@@ -132,10 +132,10 @@ public void setRequiredMode(RequiredMode requiredMode)
/**
* Set Avro union mode.
- *
+ *
* If Avro union mode is enabled, a union uses the name (instead of full name) of the
* member type as the key to specify the type of the value in the union.
- *
+ *
* @param value set to true to enable Avro union mode.
*/
public void setAvroUnionMode(boolean value)
Oops, something went wrong.

0 comments on commit 169b8d1

Please sign in to comment.