Permalink
Browse files

SI-1952: (1) Enable file resolution for SchemaTranslator and (2) prop…

…agate validation options to all parsers spowned during schema translation.

RB=504603
R=cpettitt,gogenc,kjin,brajkuma,mtagle
A=kjin,brajkuma
  • Loading branch information...
chrisfong committed Jun 18, 2015
1 parent d002cbc commit 169b8d14a1c7ef7641a48e03f889576c1fecc738
View
@@ -1,7 +1,12 @@
2.6.4
2.7.1
-----
2.7.0
-----
(RB=504603)
Enable file resolution for SchemaTranslator and propagate validation options to all parsers spawned during schema translation.
2.6.3
-----
(RB=501374)
@@ -21,3 +21,8 @@ idea.module.iml.whenMerged { module ->
}
module.dependencies = jarDeps + module.dependencies
}
sourceSets.test.resources {
final dataAvroProject = project.evaluationDependsOn(':data-avro')
it.source(dataAvroProject.sourceSets.test.resources)
}
@@ -60,5 +60,23 @@ public AvroToDataSchemaTranslationMode getTranslationMode()
return _translationMode;
}
/**
* Set the Avro schema search paths, delimited by the default path separator.
*/
public AvroToDataSchemaTranslationOptions setFileResolutionPaths(String schemaResolverPaths)
{
_schemaResolverPaths = schemaResolverPaths;
return this;
}
/**
* Returns the Avro schema search paths, delimited by the default path separator.
*/
public String getFileResolutionPaths()
{
return _schemaResolverPaths;
}
private AvroToDataSchemaTranslationMode _translationMode;
private String _schemaResolverPaths = null;
}
@@ -25,6 +25,7 @@
import com.linkedin.data.schema.ArrayDataSchema;
import com.linkedin.data.schema.DataSchema;
import com.linkedin.data.schema.DataSchemaConstants;
import com.linkedin.data.schema.DataSchemaResolver;
import com.linkedin.data.schema.DataSchemaTraverse;
import com.linkedin.data.schema.EnumDataSchema;
import com.linkedin.data.schema.FixedDataSchema;
@@ -33,6 +34,9 @@
import com.linkedin.data.schema.SchemaParser;
import com.linkedin.data.schema.SchemaParserFactory;
import com.linkedin.data.schema.UnionDataSchema;
import com.linkedin.data.schema.resolver.DefaultDataSchemaResolver;
import com.linkedin.data.schema.resolver.FileDataSchemaResolver;
import com.linkedin.data.schema.validation.ValidationOptions;
import com.linkedin.data.template.DataTemplateUtil;
import java.util.ArrayList;
@@ -58,6 +62,7 @@
public static final String DATA_PROPERTY = "com.linkedin.data";
public static final String SCHEMA_PROPERTY = "schema";
public static final String OPTIONAL_DEFAULT_MODE_PROPERTY = "optionalDefaultMode";
public static final String AVRO_FILE_EXTENSION = ".avsc";
private SchemaTranslator()
{
@@ -114,8 +119,13 @@ private SchemaTranslator()
public static DataSchema avroToDataSchema(String avroSchemaInJson, AvroToDataSchemaTranslationOptions options)
throws IllegalArgumentException
{
SchemaParser parser = SchemaParserFactory.instance().create(null);
parser.getValidationOptions().setAvroUnionMode(true);
ValidationOptions validationOptions = SchemaParser.getDefaultSchemaParserValidationOptions();
validationOptions.setAvroUnionMode(true);
SchemaParserFactory parserFactory = SchemaParserFactory.instance(validationOptions);
DataSchemaResolver resolver = getResolver(parserFactory, options);
SchemaParser parser = parserFactory.create(resolver);
parser.parse(avroSchemaInJson);
if (parser.hasError())
{
@@ -314,6 +324,24 @@ public static String dataToAvroSchemaJson(DataSchema dataSchema, DataToAvroSchem
return schemaJson;
}
/**
* Allows caller to specify a file path for schema resolution.
*/
private static DataSchemaResolver getResolver(SchemaParserFactory parserFactory, AvroToDataSchemaTranslationOptions options)
{
String resolverPath = options.getFileResolutionPaths();
if (resolverPath != null)
{
FileDataSchemaResolver resolver = new FileDataSchemaResolver(parserFactory, resolverPath);
resolver.setExtension(AVRO_FILE_EXTENSION);
return resolver;
}
else
{
return new DefaultDataSchemaResolver(parserFactory);
}
}
interface FieldDefaultValueProvider
{
Object defaultValue(RecordDataSchema.Field field);
@@ -16,7 +16,6 @@
package com.linkedin.data.avro;
import com.linkedin.data.Data;
import com.linkedin.data.DataMap;
import com.linkedin.data.TestUtil;
@@ -25,10 +24,11 @@
import com.linkedin.data.schema.SchemaParser;
import com.linkedin.data.schema.SchemaToJsonEncoder;
import com.linkedin.data.schema.validation.ValidationOptions;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Arrays;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericRecord;
@@ -47,6 +47,8 @@
public class TestSchemaTranslator
{
private static final String FS = File.separator;
static public GenericRecord genericRecordFromString(String jsonString, Schema writerSchema, Schema readerSchema) throws IOException
{
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(writerSchema, readerSchema);
@@ -1582,7 +1584,73 @@ public void testUnionDefaultValues() throws IOException
parser.parse(readerSchemaText);
if (debug) System.out.println(parser.errorMessage());
assertFalse(parser.hasError());
}
}
@Test
public void testAvroUnionModeChaining() throws IOException
{
String expectedSchema = "{ " +
" \"type\" : \"record\", " +
" \"name\" : \"A\", " +
" \"namespace\" : \"com.linkedin.pegasus.test\", " +
" \"fields\" : [ " +
" { " +
" \"name\" : \"someBorC\", " +
" \"type\" : [ " +
" { " +
" \"type\" : \"record\", " +
" \"name\" : \"B\", " +
" \"fields\" : [ " +
" { " +
" \"name\" : \"someAorC\", " +
" \"type\" : [ " +
" \"A\", " +
" { " +
" \"type\" : \"record\", " +
" \"name\" : \"C\", " +
" \"fields\" : [ " +
" { " +
" \"name\" : \"something\", " +
" \"type\" : \"int\", " +
" \"optional\" : true, " +
" \"default\" : 42" +
" } " +
" ] " +
" } " +
" ] " +
" } " +
" ] " +
" }, " +
" \"C\" " +
" ] " +
" } " +
" ]" +
"}";
String avroRootUrl = getClass().getClassLoader().getResource("avro").getFile();
String avroRootDir = new File(avroRootUrl).getAbsolutePath();
String avroFilePath = avroRootDir + FS + "com" + FS + "linkedin" + FS + "pegasus" + FS + "test" + FS + "A.avsc";
File avroFile = new File(avroFilePath);
String schema = readFile(avroFile);
AvroToDataSchemaTranslationOptions options =
new AvroToDataSchemaTranslationOptions(AvroToDataSchemaTranslationMode.TRANSLATE).setFileResolutionPaths(avroRootDir);
DataSchema pdscSchema = SchemaTranslator.avroToDataSchema(schema, options);
DataMap actual = TestUtil.dataMapFromString(pdscSchema.toString());
DataMap expected = TestUtil.dataMapFromString(expectedSchema);
assertEquals(actual, expected);
}
private static String readFile(File file) throws IOException
{
BufferedReader br = new BufferedReader(new FileReader(file));
StringBuilder sb = new StringBuilder();
String line;
while((line = br.readLine()) != null)
{
sb.append(line + "\n");
}
return sb.toString();
}
}
@@ -0,0 +1,9 @@
{
"type" : "record",
"name" : "A",
"namespace" : "com.linkedin.pegasus.test",
"fields" : [ {
"name" : "someBorC",
"type" : ["B", "C"]
} ]
}
@@ -0,0 +1,9 @@
{
"type" : "record",
"name" : "B",
"namespace" : "com.linkedin.pegasus.test",
"fields" : [ {
"name" : "someAorC",
"type" : ["A", "C"]
} ]
}
@@ -0,0 +1,10 @@
{
"type" : "record",
"name" : "C",
"namespace" : "com.linkedin.pegasus.test",
"fields" : [ {
"name" : "something",
"type" : ["int", "null"],
"default" : 42
} ]
}
@@ -1151,6 +1151,11 @@ public StringBuilder errorMessageBuilder()
return _dataLocationMap;
}
public static final ValidationOptions getDefaultSchemaParserValidationOptions()
{
return new ValidationOptions(RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT, CoercionMode.NORMAL);
}
/**
* Current namespace, used to determine full name from unqualified name.
*/
@@ -1160,5 +1165,5 @@ public StringBuilder errorMessageBuilder()
private final DataSchemaResolver _resolver;
private final Map<Object, DataLocation> _dataLocationMap = new IdentityHashMap<Object, DataLocation>();
private StringBuilder _errorMessageBuilder = new StringBuilder();
private ValidationOptions _validationOptions = new ValidationOptions(RequiredMode.CAN_BE_ABSENT_IF_HAS_DEFAULT, CoercionMode.NORMAL);
private ValidationOptions _validationOptions = getDefaultSchemaParserValidationOptions();
}
@@ -16,27 +16,53 @@
package com.linkedin.data.schema;
import com.linkedin.data.schema.validation.ValidationOptions;
import java.util.HashMap;
import java.util.Map;
public class SchemaParserFactory
{
/**
* Create a new parser that will use the specified resolver.
* Create a new parser that will use the specified resolver and validation options.
*
* @param resolver to be provided to the parser.
* @return a new parser.
*/
public SchemaParser create(DataSchemaResolver resolver)
{
return new SchemaParser(resolver);
SchemaParser parser = new SchemaParser(resolver);
if (_validationOptions != null)
{
parser.setValidationOptions(_validationOptions);
}
return parser;
}
protected SchemaParserFactory()
protected SchemaParserFactory(ValidationOptions validationOptions)
{
_validationOptions = validationOptions;
}
static public final SchemaParserFactory instance()
{
return _instance;
return instance(null);
}
static public final SchemaParserFactory instance(ValidationOptions validationOptions)
{
if (factoryMap.containsKey(validationOptions))
{
return factoryMap.get(validationOptions);
}
else
{
SchemaParserFactory factory = new SchemaParserFactory(validationOptions);
factoryMap.put(validationOptions, factory);
return factory;
}
}
static private final SchemaParserFactory _instance = new SchemaParserFactory();
static private final Map<ValidationOptions, SchemaParserFactory> factoryMap =
new HashMap<ValidationOptions, SchemaParserFactory>();
private final ValidationOptions _validationOptions;
}
@@ -338,4 +338,4 @@ public String toString()
private final Set<DataSchemaLocation> _resolvedLocations = new HashSet<DataSchemaLocation>();
protected static final PrintStream out = new PrintStream(new FileOutputStream(FileDescriptor.out));
}
}
@@ -18,6 +18,7 @@
import com.linkedin.data.schema.DataSchemaLocation;
import com.linkedin.data.schema.SchemaParserFactory;
import java.io.InputStream;
import java.util.Collections;
import java.util.Iterator;
@@ -35,6 +36,11 @@ public DefaultDataSchemaResolver()
super(null);
}
public DefaultDataSchemaResolver(SchemaParserFactory parserFactory)
{
super(parserFactory);
}
private static final Iterator<DataSchemaLocation> _it = Collections.<DataSchemaLocation>emptyList().iterator();
@Override
@@ -121,7 +121,7 @@ public RequiredMode getRequiredMode()
/**
* Set the required mode that indicates how required fields should be handled during validation.
*
*
* @param requiredMode specifies the required mode.
*/
public void setRequiredMode(RequiredMode requiredMode)
@@ -132,10 +132,10 @@ public void setRequiredMode(RequiredMode requiredMode)
/**
* Set Avro union mode.
*
*
* If Avro union mode is enabled, a union uses the name (instead of full name) of the
* member type as the key to specify the type of the value in the union.
*
*
* @param value set to true to enable Avro union mode.
*/
public void setAvroUnionMode(boolean value)
Oops, something went wrong.

0 comments on commit 169b8d1

Please sign in to comment.