Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the identifier length in generated code for nested columns #537

Merged
merged 1 commit into from
Apr 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,23 @@ public static SqlTypeBytecodeExpression constantType(CallSiteBinder callSiteBind
return new SqlTypeBytecodeExpression(type, binding, BOOTSTRAP_METHOD);
}

private static String generateName(Type type)
{
String name = type.getTypeSignature().toString();
if (name.length() > 20) {
// Use type base to reduce the identifier size in generated code
name = type.getTypeSignature().getBase();
}
return name.replaceAll("\\W+", "_");
}

private final Type type;
private final Binding binding;
private final Method bootstrapMethod;

private SqlTypeBytecodeExpression(Type type, Binding binding, Method bootstrapMethod)
{
super(type(Type.class));

this.type = requireNonNull(type, "type is null");
this.binding = requireNonNull(binding, "binding is null");
this.bootstrapMethod = requireNonNull(bootstrapMethod, "bootstrapMethod is null");
Expand All @@ -56,7 +65,7 @@ private SqlTypeBytecodeExpression(Type type, Binding binding, Method bootstrapMe
@Override
public BytecodeNode getBytecode(MethodGenerationContext generationContext)
{
return InvokeInstruction.invokeDynamic(type.getTypeSignature().toString().replaceAll("\\W+", "_"), binding.getType(), bootstrapMethod, binding.getBindingId());
return InvokeInstruction.invokeDynamic(generateName(type), binding.getType(), bootstrapMethod, binding.getBindingId());
}

@Override
Expand Down
48 changes: 48 additions & 0 deletions presto-main/src/test/java/io/prestosql/block/BlockAssertions.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import io.prestosql.spi.block.Block;
import io.prestosql.spi.block.BlockBuilder;
import io.prestosql.spi.block.DictionaryBlock;
import io.prestosql.spi.block.RowBlockBuilder;
import io.prestosql.spi.block.RunLengthEncodedBlock;
import io.prestosql.spi.type.ArrayType;
import io.prestosql.spi.type.DecimalType;
Expand All @@ -30,6 +31,7 @@
import java.util.List;

import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.slice.Slices.utf8Slice;
import static io.prestosql.spi.type.BigintType.BIGINT;
import static io.prestosql.spi.type.BooleanType.BOOLEAN;
import static io.prestosql.spi.type.DateType.DATE;
Expand Down Expand Up @@ -276,6 +278,52 @@ public static Block createIntsBlock(Iterable<Integer> values)
return builder.build();
}

public static Block createRowBlock(List<Type> fieldTypes, Object[]... rows)
{
BlockBuilder rowBlockBuilder = new RowBlockBuilder(fieldTypes, null, 1);
for (Object[] row : rows) {
if (row == null) {
rowBlockBuilder.appendNull();
continue;
}
BlockBuilder singleRowBlockWriter = rowBlockBuilder.beginBlockEntry();
for (Object fieldValue : row) {
if (fieldValue == null) {
singleRowBlockWriter.appendNull();
continue;
}

if (fieldValue instanceof String) {
VARCHAR.writeSlice(singleRowBlockWriter, utf8Slice((String) fieldValue));
}
else if (fieldValue instanceof Slice) {
VARBINARY.writeSlice(singleRowBlockWriter, (Slice) fieldValue);
}
else if (fieldValue instanceof Double) {
DOUBLE.writeDouble(singleRowBlockWriter, ((Double) fieldValue).doubleValue());
}
else if (fieldValue instanceof Long) {
BIGINT.writeLong(singleRowBlockWriter, ((Long) fieldValue).longValue());
}
else if (fieldValue instanceof Boolean) {
BOOLEAN.writeBoolean(singleRowBlockWriter, ((Boolean) fieldValue).booleanValue());
}
else if (fieldValue instanceof Block) {
singleRowBlockWriter.appendStructure((Block) fieldValue);
}
else if (fieldValue instanceof Integer) {
INTEGER.writeLong(singleRowBlockWriter, ((Integer) fieldValue).intValue());
}
else {
throw new IllegalArgumentException();
}
}
rowBlockBuilder.closeEntry();
}

return rowBlockBuilder.build();
}

public static Block createEmptyLongsBlock()
{
return BIGINT.createFixedSizeBlockBuilder(0).build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
import io.prestosql.spi.connector.RecordPageSource;
import io.prestosql.spi.connector.RecordSet;
import io.prestosql.spi.predicate.Utils;
import io.prestosql.spi.type.RowType;
import io.prestosql.spi.type.TimeZoneKey;
import io.prestosql.spi.type.Type;
import io.prestosql.split.PageSourceProvider;
Expand Down Expand Up @@ -91,6 +92,7 @@
import java.lang.reflect.Modifier;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
Expand All @@ -110,6 +112,7 @@
import static io.prestosql.block.BlockAssertions.createDoublesBlock;
import static io.prestosql.block.BlockAssertions.createIntsBlock;
import static io.prestosql.block.BlockAssertions.createLongsBlock;
import static io.prestosql.block.BlockAssertions.createRowBlock;
import static io.prestosql.block.BlockAssertions.createSlicesBlock;
import static io.prestosql.block.BlockAssertions.createStringsBlock;
import static io.prestosql.block.BlockAssertions.createTimestampsWithTimezoneBlock;
Expand Down Expand Up @@ -153,6 +156,11 @@ public final class FunctionAssertions

private static final SqlParser SQL_PARSER = new SqlParser();

// Increase the number of fields to generate a wide column
private static final int TEST_ROW_NUMBER_OF_FIELDS = 2500;
private static final RowType TEST_ROW_TYPE = createTestRowType(TEST_ROW_NUMBER_OF_FIELDS);
private static final Block TEST_ROW_DATA = createTestRowData(TEST_ROW_TYPE);

private static final Page SOURCE_PAGE = new Page(
createLongsBlock(1234L),
createStringsBlock("hello"),
Expand All @@ -163,7 +171,8 @@ public final class FunctionAssertions
createStringsBlock((String) null),
createTimestampsWithTimezoneBlock(packDateTimeWithZone(new DateTime(1970, 1, 1, 0, 1, 0, 999, DateTimeZone.UTC).getMillis(), TimeZoneKey.getTimeZoneKey("Z"))),
createSlicesBlock(Slices.wrappedBuffer((byte) 0xab)),
createIntsBlock(1234));
createIntsBlock(1234),
TEST_ROW_DATA);

private static final Page ZERO_CHANNEL_PAGE = new Page(1);

Expand All @@ -178,6 +187,7 @@ public final class FunctionAssertions
.put(new Symbol("bound_timestamp_with_timezone"), TIMESTAMP_WITH_TIME_ZONE)
.put(new Symbol("bound_binary_literal"), VARBINARY)
.put(new Symbol("bound_integer"), INTEGER)
.put(new Symbol("bound_row"), TEST_ROW_TYPE)
.build();

private static final Map<Symbol, Integer> INPUT_MAPPING = ImmutableMap.<Symbol, Integer>builder()
Expand All @@ -191,6 +201,7 @@ public final class FunctionAssertions
.put(new Symbol("bound_timestamp_with_timezone"), 7)
.put(new Symbol("bound_binary_literal"), 8)
.put(new Symbol("bound_integer"), 9)
.put(new Symbol("bound_row"), 10)
.build();

private static final PageSourceProvider PAGE_SOURCE_PROVIDER = new TestPageSourceProvider();
Expand Down Expand Up @@ -1021,7 +1032,7 @@ public ConnectorPageSource createPageSource(Session session, Split split, TableH
assertInstanceOf(split.getConnectorSplit(), FunctionAssertions.TestSplit.class);
FunctionAssertions.TestSplit testSplit = (FunctionAssertions.TestSplit) split.getConnectorSplit();
if (testSplit.isRecordSet()) {
RecordSet records = InMemoryRecordSet.builder(ImmutableList.of(BIGINT, VARCHAR, DOUBLE, BOOLEAN, BIGINT, VARCHAR, VARCHAR, TIMESTAMP_WITH_TIME_ZONE, VARBINARY, INTEGER))
RecordSet records = InMemoryRecordSet.builder(ImmutableList.of(BIGINT, VARCHAR, DOUBLE, BOOLEAN, BIGINT, VARCHAR, VARCHAR, TIMESTAMP_WITH_TIME_ZONE, VARBINARY, INTEGER, TEST_ROW_TYPE))
.addRow(
1234L,
"hello",
Expand All @@ -1032,7 +1043,8 @@ public ConnectorPageSource createPageSource(Session session, Split split, TableH
null,
packDateTimeWithZone(new DateTime(1970, 1, 1, 0, 1, 0, 999, DateTimeZone.UTC).getMillis(), TimeZoneKey.getTimeZoneKey("Z")),
Slices.wrappedBuffer((byte) 0xab),
1234)
1234,
TEST_ROW_DATA.getObject(0, Block.class))
.build();
return new RecordPageSource(records);
}
Expand All @@ -1052,6 +1064,45 @@ private static Split createNormalSplit()
return new Split(new CatalogName("test"), new TestSplit(false), Lifespan.taskWide());
}

private static RowType createTestRowType(int numberOfFields)
{
Iterator<Type> types = Iterables.<Type>cycle(
BIGINT,
INTEGER,
VARCHAR,
DOUBLE,
BOOLEAN,
VARBINARY,
RowType.from(ImmutableList.of(RowType.field("nested_nested_column", VARCHAR)))).iterator();

List<RowType.Field> fields = new ArrayList<>();
for (int fieldIdx = 0; fieldIdx < numberOfFields; fieldIdx++) {
fields.add(new RowType.Field(Optional.of("nested_column_" + fieldIdx), types.next()));
}

return RowType.from(fields);
}

private static Block createTestRowData(RowType rowType)
{
Iterator<Object> values = Iterables.cycle(
1234L,
34,
"hello",
12.34d,
true,
Slices.wrappedBuffer((byte) 0xab),
createRowBlock(ImmutableList.of(VARCHAR), Collections.singleton("innerFieldValue").toArray()).getObject(0, Block.class)).iterator();

final int numFields = rowType.getFields().size();
Object[] rowValues = new Object[numFields];
for (int fieldIdx = 0; fieldIdx < numFields; fieldIdx++) {
rowValues[fieldIdx] = values.next();
}

return createRowBlock(rowType.getTypeParameters(), rowValues);
}

private static class TestSplit
implements ConnectorSplit
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,35 @@ public void testBinaryOperatorsString()
Futures.allAsList(futures).get();
}

@Test
public void testNestedColumnFilter()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sure this test fails without the changes to SqlTypeBytecodeExpression.java

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure @findepi. Setting FunctionAssertions.TEST_ROW_NUMBER_OF_FIELDS to a large number (eg. 4000) reproes the issue. I didn't include it because FunctionAssertions is used in lot of tests, not sure of the implications of the having such large schema on test runtimes. Let me see if I can separate out the wide row into a separate test data variable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@findepi Updated commit to repro the issue in test case. I tried to find a better way to simplify the test using CAST( ROW(1.0, 2.0, .... n) AS ROW(c1 DOUBLE, c2 DOUBLE,... cn DOUBLE)). This reproes the issue, but hits another limit (generated method for CAST is too big). I ran the TestExpressionComplier and TestRowOperators with and without the large row data and I only see increase of ms in test runtime. Hope this is ok. Let me know if there is a simpler way to test this.

{
assertFilter("bound_row.nested_column_0 = 1234", true);
assertFilter("bound_row.nested_column_0 = 1223", false);
assertFilter("bound_row.nested_column_1 = 34", true);
assertFilter("bound_row.nested_column_1 = 33", false);
assertFilter("bound_row.nested_column_2 = 'hello'", true);
assertFilter("bound_row.nested_column_2 = 'value1'", false);
assertFilter("bound_row.nested_column_3 = 12.34", true);
assertFilter("bound_row.nested_column_3 = 34.34", false);
assertFilter("bound_row.nested_column_4 = true", true);
assertFilter("bound_row.nested_column_4 = false", false);
assertFilter("bound_row.nested_column_6.nested_nested_column = 'innerFieldValue'", true);
assertFilter("bound_row.nested_column_6.nested_nested_column != 'innerFieldValue'", false);

// combination of types in one filter
assertFilter(
ImmutableList.of(
"bound_row.nested_column_0 = 1234", "bound_row.nested_column_7 >= 1234",
"bound_row.nested_column_1 = 34", "bound_row.nested_column_8 >= 33",
"bound_row.nested_column_2 = 'hello'", "bound_row.nested_column_9 >= 'hello'",
"bound_row.nested_column_3 = 12.34", "bound_row.nested_column_10 >= 12.34",
"bound_row.nested_column_4 = true", "NOT (bound_row.nested_column_11 = false)",
"bound_row.nested_column_6.nested_nested_column = 'innerFieldValue'", "bound_row.nested_column_13.nested_nested_column LIKE 'innerFieldValue'")
.stream().collect(joining(" AND ")),
true);
}

private static VarcharType varcharType(String... values)
{
return varcharType(Arrays.asList(values));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,10 @@ else if (type.getTypeSignature().getBase().equals("array")) {
checkArgument(value instanceof Block,
"Expected value %d to be an instance of Block, but is a %s", i, value.getClass().getSimpleName());
}
else if (type.getTypeSignature().getBase().equals("row")) {
checkArgument(value instanceof Block,
"Expected value %d to be an instance of Block, but is a %s", i, value.getClass().getSimpleName());
}
else {
throw new IllegalStateException("Unsupported column type " + types.get(i));
}
Expand Down