-
Notifications
You must be signed in to change notification settings - Fork 2.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve speed when listing columns in BigQuery #21830
Conversation
|
||
private static ObjectAssert<TypeInfo> assertTypeInfo(String typeString, String toString) | ||
{ | ||
assertThat(getBigQueryTypeInfo(typeString)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is testing the same thing twice:
getBigQueryTypeInfo(typeString) is TypeInfoUtils.getTypeInfoFromTypeString(typeString)
|
||
public abstract Category getCategory(); | ||
|
||
public abstract String getTypeName(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need that? It's only used for test purpose.
{ | ||
protected TypeInfo() {} | ||
|
||
public abstract Category getCategory(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need a category - this information is already encoded in the sealed class hierarchy. Instead of a switch (typeInfo.getCategory())
we can justswitch (typeInfo)
private TypeSignature toTypeSignature(TypeInfo typeInfo) | ||
{ | ||
switch (typeInfo.getCategory()) { | ||
case PRIMITIVE: | ||
Type primitiveType = fromPrimitiveType((PrimitiveTypeInfo) typeInfo); | ||
if (primitiveType == null) { | ||
break; | ||
} | ||
return primitiveType.getTypeSignature(); | ||
case ARRAY: | ||
ArrayTypeInfo arrayTypeInfo = (ArrayTypeInfo) typeInfo; | ||
TypeSignature elementType = toTypeSignature(arrayTypeInfo.getListElementTypeInfo()); | ||
return arrayType(typeParameter(elementType)); | ||
case STRUCT: | ||
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; | ||
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos(); | ||
List<String> fieldNames = structTypeInfo.getAllStructFieldNames(); | ||
verify(fieldTypes.size() == fieldNames.size(), "Invalid BigQuery struct type: %s", typeInfo); | ||
return rowType(Streams.zip( | ||
fieldNames.stream(), | ||
fieldTypes.stream().map(this::toTypeSignature), | ||
TypeSignatureParameter::namedField) | ||
.collect(Collectors.toList())); | ||
} | ||
throw new TrinoException(NOT_SUPPORTED, format("Unsupported BigQuery type: %s", typeInfo)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
private TypeSignature toTypeSignature(TypeInfo typeInfo) | |
{ | |
switch (typeInfo.getCategory()) { | |
case PRIMITIVE: | |
Type primitiveType = fromPrimitiveType((PrimitiveTypeInfo) typeInfo); | |
if (primitiveType == null) { | |
break; | |
} | |
return primitiveType.getTypeSignature(); | |
case ARRAY: | |
ArrayTypeInfo arrayTypeInfo = (ArrayTypeInfo) typeInfo; | |
TypeSignature elementType = toTypeSignature(arrayTypeInfo.getListElementTypeInfo()); | |
return arrayType(typeParameter(elementType)); | |
case STRUCT: | |
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; | |
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos(); | |
List<String> fieldNames = structTypeInfo.getAllStructFieldNames(); | |
verify(fieldTypes.size() == fieldNames.size(), "Invalid BigQuery struct type: %s", typeInfo); | |
return rowType(Streams.zip( | |
fieldNames.stream(), | |
fieldTypes.stream().map(this::toTypeSignature), | |
TypeSignatureParameter::namedField) | |
.collect(Collectors.toList())); | |
} | |
throw new TrinoException(NOT_SUPPORTED, format("Unsupported BigQuery type: %s", typeInfo)); | |
} | |
private TypeSignature toTypeSignature(TypeInfo typeInfo) | |
{ | |
return switch (typeInfo) { | |
case PrimitiveTypeInfo primitiveTypeInfo: | |
Type primitiveType = fromPrimitiveType(primitiveTypeInfo); | |
if (primitiveType == null) { | |
throw new TrinoException(NOT_SUPPORTED, "Unsupported primitive type: " + primitiveTypeInfo); | |
} | |
yield primitiveType.getTypeSignature(); | |
case ArrayTypeInfo arrayTypeInfo: | |
TypeSignature elementType = toTypeSignature(arrayTypeInfo.getListElementTypeInfo()); | |
yield arrayType(typeParameter(elementType)); | |
case StructTypeInfo structTypeInfo: | |
List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos(); | |
List<String> fieldNames = structTypeInfo.getAllStructFieldNames(); | |
verify(fieldTypes.size() == fieldNames.size(), "Invalid BigQuery struct type: %s", typeInfo); | |
yield rowType(Streams.zip( | |
fieldNames.stream(), | |
fieldTypes.stream().map(this::toTypeSignature), | |
TypeSignatureParameter::namedField) | |
.collect(Collectors.toList())); | |
}; | |
} |
case GEOGRAPHY -> VARCHAR; | ||
case JSON -> jsonType; | ||
case TIMESTAMP -> BIGINT; | ||
default -> null; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
throw new TrinoException("Unsupported type"); then return null handling at the call-site
*/ | ||
package io.trino.plugin.bigquery.type; | ||
|
||
public enum Category |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not needed - we already encode the category information in the sealed classes hierarchy
return getTypeName(); | ||
} | ||
|
||
@Override |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need hashCode and equals? Where is it used?
Closing as @nineinchnick is going to take over this PR. |
Description
Using mini parser because BigQuery Java SDK doesn't support translating string to BigQuery type as far as I asked Google engineers.
This PR improves listing columns. Test with 169 tables is improved from 22s to 1.5s.
Release notes
(x) Release notes are required, with the following suggested text: