Skip to content

Commit

Permalink
Remove 'type' based methods in UnionDataSchema and cleanup pegasus co…
Browse files Browse the repository at this point in the history
…debase that uses those methods.

RB=1022062
G=si-dev
R=kbalasub,nshankar,mnchen
A=kbalasub,mnchen
  • Loading branch information
saponniah committed Jul 10, 2017
1 parent f43087c commit 3f4e7c4
Show file tree
Hide file tree
Showing 16 changed files with 105 additions and 204 deletions.
Expand Up @@ -205,7 +205,7 @@ else if (currentElement.getValue().getClass() == DataList.class && component.get
childSchema = ((MapDataSchema) schema).getValues();
break;
case UNION:
childSchema = ((UnionDataSchema) schema).getType((String) name);
childSchema = ((UnionDataSchema) schema).getTypeByMemberKey((String) name);
break;
case RECORD:
RecordDataSchema.Field field = ((RecordDataSchema) schema).getField((String) name);
Expand Down
Expand Up @@ -262,7 +262,7 @@ private DataSchema currentSchema()
schema = (field == null ? null : field.getType());
break;
case UNION:
schema = ((UnionDataSchema) dereferencedSchema).getType(_currentEntry.getKey());
schema = ((UnionDataSchema) dereferencedSchema).getTypeByMemberKey(_currentEntry.getKey());
break;
case MAP:
schema = ((MapDataSchema) dereferencedSchema).getValues();
Expand Down
Expand Up @@ -497,7 +497,7 @@ public boolean setFields(List<Field> fields, StringBuilder errorMessageBuilder)
field.getType().getDereferencedType() == DataSchema.Type.UNION)
{
UnionDataSchema unionDataSchema = (UnionDataSchema) field.getType().getDereferencedDataSchema();
if (field.getOptional() == true && unionDataSchema.getType(DataSchemaConstants.NULL_TYPE) != null)
if (field.getOptional() == true && unionDataSchema.getTypeByMemberKey(DataSchemaConstants.NULL_TYPE) != null)
{
errorMessageBuilder.append("Field \"").append(field.getName());
errorMessageBuilder.append("\" is optional and its type is a union with null.\n");
Expand Down
Expand Up @@ -350,10 +350,10 @@ private void writeArray(ArrayDataSchema schema) throws IOException
private void writeUnion(UnionDataSchema schema) throws IOException
{
write("union[");
for(Iterator<DataSchema> iter = schema.getTypes().iterator(); iter.hasNext();)
for(Iterator<UnionDataSchema.Member> iter = schema.getMembers().iterator(); iter.hasNext();)
{
DataSchema member = iter.next();
writeReferenceOrInline(member, schema.isTypeDeclaredInline(member));
UnionDataSchema.Member member = iter.next();
writeReferenceOrInline(member.getType(), member.isDeclaredInline());
if (iter.hasNext())
{
write(", ");
Expand Down Expand Up @@ -655,9 +655,9 @@ else if (schema instanceof TyperefDataSchema)
else if (schema instanceof UnionDataSchema)
{
UnionDataSchema unionSchema = (UnionDataSchema) schema;
for (DataSchema member : unionSchema.getTypes())
for (UnionDataSchema.Member member : unionSchema.getMembers())
{
computeImports(member, unionSchema.isTypeDeclaredInline(member), importsAcc);
computeImports(member.getType(), member.isDeclaredInline(), importsAcc);
}
}
else if (schema instanceof MapDataSchema)
Expand Down
187 changes: 41 additions & 146 deletions data/src/main/java/com/linkedin/data/schema/UnionDataSchema.java
Expand Up @@ -16,15 +16,13 @@

package com.linkedin.data.schema;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import static com.linkedin.data.schema.DataSchemaConstants.FIELD_NAME_PATTERN;
import static com.linkedin.data.schema.DataSchemaConstants.RESTRICTED_UNION_ALIASES;
Expand Down Expand Up @@ -208,6 +206,39 @@ public boolean hasError()
return _hasError;
}

@Override
public boolean equals(Object object)
{
boolean result = false;
if (this == object)
{
result = true;
}
else if (object != null && object.getClass() == Member.class)
{
Member other = (Member) object;
result = ((_alias == null) ? other._alias == null : _alias.equals(other._alias)) &&
_type.equals(other._type) &&
_doc.equals(other._doc) &&
_properties.equals(other._properties) &&
_declaredInline == other._declaredInline &&
_hasError == other._hasError;
}

return result;
}

@Override
public int hashCode()
{
return (_alias == null) ? 0 : _alias.hashCode() ^
_type.hashCode() ^
_doc.hashCode() ^
_properties.hashCode() ^
(_declaredInline ? 0xAAAAAAAA : 0x55555555) ^
(_hasError ? 0xAAAAAAAA : 0x55555555);
}

private String _alias = null;
private DataSchema _type = DataSchemaConstants.NULL_DATA_SCHEMA;
private String _doc = "";
Expand All @@ -232,11 +263,7 @@ public boolean setMembers(List<Member> members, StringBuilder errorMessageBuilde
{
boolean ok = true;

List<DataSchema> types = new ArrayList<>(members.size());
Set<DataSchema> typesDeclaredInline = new HashSet<>(members.size());

Map<String, Integer> typeMap = new HashMap<>(members.size());
Map<String, Integer> nameMap = new HashMap<>(members.size());
Set<String> avroMemberKeys = new HashSet<>(members.size());
Map<String, Integer> memberKeyMap = new HashMap<>(members.size());

Optional<Boolean> areMembersAliased = Optional.empty();
Expand Down Expand Up @@ -281,32 +308,19 @@ else if (memberHasAlias)
}
else
{
String name = avroUnionMemberKey(memberType);
existing = nameMap.put(name, index);
if (existing != null && !memberHasAlias)
String avroMemberKey = avroUnionMemberKey(memberType);
boolean unique = avroMemberKeys.add(avroMemberKey);
if (!unique && !memberHasAlias)
{
errorMessageBuilder.append(memberType).append(" has name ").append(name).append(" that appears more than once in a union, this may cause compatibility problems with Avro.\n");
errorMessageBuilder.append(memberType).append(" has name ").append(avroMemberKey).append(" that appears more than once in a union, this may cause compatibility problems with Avro.\n");
ok = false;
}
}

types.add(memberType);
typeMap.put(memberType.getUnionMemberKey(), index);

if (member.isDeclaredInline())
{
typesDeclaredInline.add(memberType);
}

index++;
}

setTypesDeclaredInline(typesDeclaredInline);

_members = Collections.unmodifiableList(members);
_types = Collections.unmodifiableList(types);
_typesToIndexMap = Collections.unmodifiableMap(typeMap);
_namesToIndexMap = Collections.unmodifiableMap(nameMap);
_memberKeyToIndexMap = Collections.unmodifiableMap(memberKeyMap);
_membersAliased = areMembersAliased.orElse(false);

Expand All @@ -318,39 +332,6 @@ else if (memberHasAlias)
return ok;
}

/**
* Sets the types of the union.
*
* @param types that may be members of the union in the order they are defined.
* @param errorMessageBuilder to append error message to.
* @return true if types were set successfully, false otherwise.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced.
* Use {@link #setMembers(List, StringBuilder)} ()} instead.
*/
public boolean setTypes(List<DataSchema> types, StringBuilder errorMessageBuilder)
{
List<Member> members = types.stream()
.map(Member::new)
.collect(Collectors.toList());
return setMembers(members, errorMessageBuilder);
}

/**
* Union members in the order declared.
*
* @return union members in the the order declared.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced. Use
* {@link #getMembers()} instead.
*/
public List<DataSchema> getTypes()
{
return _types;
}

/**
* Union members in the order declared.
*
Expand All @@ -361,22 +342,6 @@ public List<Member> getMembers()
return _members;
}

/**
* Returns the index of a member.
*
* @param type to obtain index for.
* @return positive integer which is the index of the member if found else return -1.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced. For Unions declared with more
* than one member of the same type this method will return a wrong index.
*/
public int index(String type)
{
Integer index = _typesToIndexMap.get(type);
return (index == null ? -1 : index);
}

/**
* Returns whether the passed in member key maps to one of the members of the union. The key will be matched
* against the contained member's key returned from {@link Member#getUnionMemberKey()}.
Expand All @@ -389,24 +354,6 @@ public boolean contains(String memberKey)
return _memberKeyToIndexMap.containsKey(memberKey);
}

/**
* Returns the {@link DataSchema} for a member. For {@link NamedDataSchema} types, the method expects
* its fully qualified name (with namespace) and for others, the default union member key returned from
* {@link DataSchema#getUnionMemberKey()}.
*
* @param type Fully qualified name of the member's type to return.
* @return the {@link DataSchema} if type is a member of the union, else return null.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced.
* Use {@link #getTypeByMemberKey(String)} instead.
*/
public DataSchema getType(String type)
{
Integer index = _typesToIndexMap.get(type);
return (index != null ? _types.get(index) : null);
}

/**
* Returns the {@link DataSchema} for a member identified by its member key returned
* from {@link Member#getUnionMemberKey()}.
Expand All @@ -417,55 +364,7 @@ public DataSchema getType(String type)
public DataSchema getTypeByMemberKey(String memberKey)
{
Integer index = _memberKeyToIndexMap.get(memberKey);
return (index != null ? _types.get(index) : null);
}

/**
* Returns the {@link DataSchema} for a member. For {@link NamedDataSchema} types, the method expects
* its simple name (without namespace) and for others, the default union member key returned from
* {@link DataSchema#getUnionMemberKey()}.
*
* @param typeName Simple name of the member's type to return
* @return the {@link DataSchema} if type is a member of the union, else return null.
*
* @see #avroUnionMemberKey(DataSchema)
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced.
* Use {@link #getTypeByMemberKey(String)} instead.
*/
public DataSchema getTypeByName(String typeName)
{
Integer index = _namesToIndexMap.get(typeName);
return (index != null ? _types.get(index) : null);
}

/**
* Sets the union member types that are declared inline in the schema.
*
* @param typesDeclaredInline provides a set of member type that are declared inline.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced.
* Replaced by {@link #setMembers(List, StringBuilder)} to set the union members.
*/
public void setTypesDeclaredInline(Set<DataSchema> typesDeclaredInline)
{
_typesDeclaredInline = Collections.unmodifiableSet(typesDeclaredInline);
}

/**
* Checks if a union member type is declared inline.
*
* @return true if the union member type is declared inline, false if it is referenced by name.
*
* TODO (aponniah): Mark this method as deprecated.
* (@deprecated) Deprecated when the support for aliasing Union members was introduced. This method can potentially
* return a wrong value on Unions declared with aliases. Use {@link Member#isTypeDeclaredInline(DataSchema)} instead.
*/
public boolean isTypeDeclaredInline(DataSchema type)
{
return _typesDeclaredInline.contains(type);
return (index != null ? _members.get(index).getType() : null);
}

/**
Expand Down Expand Up @@ -496,15 +395,15 @@ public boolean equals(Object object)
if (object != null && object.getClass() == UnionDataSchema.class)
{
UnionDataSchema other = (UnionDataSchema) object;
return super.equals(other) && _types.equals(other._types);
return super.equals(other) && _members.equals(other._members);
}
return false;
}

@Override
public int hashCode()
{
return super.hashCode() ^ _types.hashCode();
return super.hashCode() ^ _members.hashCode();
}

/**
Expand Down Expand Up @@ -534,11 +433,7 @@ public static String avroUnionMemberKey(DataSchema schema)
}

private List<Member> _members = Collections.emptyList();
private List<DataSchema> _types = Collections.emptyList();
private Map<String, Integer> _typesToIndexMap = _emptyTypesToIndexMap;
private Map<String, Integer> _namesToIndexMap = _emptyTypesToIndexMap;
private Map<String, Integer> _memberKeyToIndexMap = _emptyTypesToIndexMap;
private Set<DataSchema> _typesDeclaredInline = Collections.emptySet();
private boolean _membersAliased = false;

private static final Map<String, Integer> _emptyTypesToIndexMap = Collections.emptyMap();
Expand Down
Expand Up @@ -33,12 +33,12 @@
import com.linkedin.data.schema.TyperefDataSchema;
import com.linkedin.data.schema.UnionDataSchema;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.stream.Collectors;


/**
Expand Down Expand Up @@ -342,15 +342,15 @@ private static Object buildData(ParentSchemas parentSchemas,
break;
case UNION:
final UnionDataSchema unionSchema = (UnionDataSchema) derefSchema;
final List<DataSchema> types = removeAlreadyTraversedSchemasFromUnionMemberList(parentSchemas, unionSchema.getTypes());
final int unionIndex = _random.nextInt(types.size());
final DataSchema unionItemSchema = types.get(unionIndex);
data = buildData(parentSchemas, unionItemSchema, fieldName, spec);
final List<UnionDataSchema.Member> members = removeAlreadyTraversedSchemasFromUnionMemberList(parentSchemas, unionSchema.getMembers());
final int unionIndex = _random.nextInt(members.size());
final UnionDataSchema.Member unionMember = members.get(unionIndex);
data = buildData(parentSchemas, unionMember.getType(), fieldName, spec);

if (data != null)
{
final DataMap unionMap = new DataMap();
unionMap.put(unionItemSchema.getUnionMemberKey(), data);
unionMap.put(unionMember.getUnionMemberKey(), data);
data = unionMap;
}
break;
Expand Down Expand Up @@ -403,10 +403,9 @@ private static DataGenerationOptions preventRecursionIntoAlreadyTraversedSchemas
return spec;
}

private static List<DataSchema> removeAlreadyTraversedSchemasFromUnionMemberList(ParentSchemas parentSchemas, List<DataSchema> unionMembers)
private static List<UnionDataSchema.Member> removeAlreadyTraversedSchemasFromUnionMemberList(ParentSchemas parentSchemas, List<UnionDataSchema.Member> unionMembers)
{
final ArrayList<DataSchema> copy = new ArrayList<DataSchema>(unionMembers);
copy.removeAll(parentSchemas.getAllReferenced());
final List<UnionDataSchema.Member> copy = unionMembers.stream().filter(member -> !parentSchemas.contains(member.getType())).collect(Collectors.toList());
if(copy.isEmpty()) return unionMembers; // eek, cannot safely filter out already traversed schemas, this code path will likely result in IllegalArgumentException being thrown from preventRecursionIntoAlreadyTraversedSchemas (which is the correct way to handle this).
else return copy;
}
Expand Down

0 comments on commit 3f4e7c4

Please sign in to comment.