Skip to content

Commit

Permalink
Optimize Hive Connector ORC Readers and Filters using Java Vector API
Browse files Browse the repository at this point in the history
  • Loading branch information
AbhijitKulkarni1 committed Jun 17, 2024
1 parent e12162a commit 213e86c
Show file tree
Hide file tree
Showing 24 changed files with 1,249 additions and 28 deletions.
108 changes: 108 additions & 0 deletions presto-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,112 @@
<scope>test</scope>
</dependency>
</dependencies>

<profiles>
<profile>
<id>jdk19PrestoCommon</id>
<activation>
<jdk>19</jdk>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.6.1</version>
<dependencies>
<dependency>
<groupId>org.apache.maven.shared</groupId>
<artifactId>maven-dependency-analyzer</artifactId>
<version>1.13.2</version>
</dependency>
</dependencies>
<configuration>
<ignoredUnusedDeclaredDependencies>
<ignoredUnusedDeclaredDependency>javax.inject:javax.inject</ignoredUnusedDeclaredDependency>
</ignoredUnusedDeclaredDependencies>
<ignoredUsedUndeclaredDependencies>
<ignoredUsedUndeclaredDependency>javax.inject:javax.inject</ignoredUsedUndeclaredDependency>
</ignoredUsedUndeclaredDependencies>
<failOnWarning>${air.check.fail-dependency}</failOnWarning>
<ignoreNonCompile>true</ignoreNonCompile>
</configuration>
<executions>
<execution>
<id>default</id>
<phase>process-test-classes</phase>
<goals>
<goal>analyze-only</goal>
<goal>analyze-duplicate</goal>
<goal>analyze-dep-mgt</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.gaul</groupId>
<artifactId>modernizer-maven-plugin</artifactId>
<version>2.7.0</version>
<configuration>
<violationsFiles>
<violationsFile>${air.main.basedir}/src/modernizer/violations.xml</violationsFile>
</violationsFiles>
<exclusionPatterns>
<exclusionPattern>org/joda/time/.*</exclusionPattern>
<exclusionPattern>com/google/common/.*</exclusionPattern>
</exclusionPatterns>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<argLine>--add-opens java.base/java.util=ALL-UNNAMED --add-opens java.base/java.text=ALL-UNNAMED --add-opens java.base/java.net=ALL-UNNAMED --add-opens java.base/java.nio=ALL-UNNAMED --add-opens java.base/java.lang=ALL-UNNAMED --add-modules=jdk.incubator.vector</argLine>
</configuration>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>lessThanJava19PrestoCommon</id>
<activation>
<jdk>[1.8,19)</jdk>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<executions>
<execution>
<id>default-compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<excludes>
<exclude>**/com/facebook/presto/common/predicate/vector/**</exclude>
</excludes>
</configuration>
</execution>
<execution>
<id>default-testCompile</id>
<phase>test-compile</phase>
<goals>
<goal>testCompile</goal>
</goals>
<configuration>
<testExcludes>
<exclude>**/com/facebook/presto/common/predicate/vector/**</exclude>
</testExcludes>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

</project>
Original file line number Diff line number Diff line change
Expand Up @@ -744,8 +744,8 @@ public boolean getUpperExclusive()
class DoubleRange
extends AbstractRange
{
private final double lower;
private final double upper;
protected final double lower;
protected final double upper;

protected DoubleRange(double lower, boolean lowerUnbounded, boolean lowerExclusive, double upper, boolean upperUnbounded, boolean upperExclusive, boolean nullAllowed)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common.predicate.vector;

import com.facebook.presto.common.predicate.TupleDomainFilter;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorMask;

import static jdk.incubator.vector.VectorOperators.GE;
import static jdk.incubator.vector.VectorOperators.GT;
import static jdk.incubator.vector.VectorOperators.LE;
import static jdk.incubator.vector.VectorOperators.LT;
import static jdk.incubator.vector.VectorOperators.NE;

public class DoubleRangeVector
extends TupleDomainFilter.DoubleRange
implements TupleDomainFilterVector
{
protected DoubleRangeVector(double lower, boolean lowerUnbounded, boolean lowerExclusive, double upper, boolean upperUnbounded, boolean upperExclusive, boolean nullAllowed)
{
super(lower, lowerUnbounded, lowerExclusive, upper, upperUnbounded, upperExclusive, nullAllowed);
}

public static DoubleRangeVector of(double lower, boolean lowerUnbounded, boolean lowerExclusive, double upper, boolean upperUnbounded, boolean upperExclusive, boolean nullAllowed)
{
return new DoubleRangeVector(lower, lowerUnbounded, lowerExclusive, upper, upperUnbounded, upperExclusive, nullAllowed);
}

public DoubleRangeVector(DoubleRange doubleRange)
{
super(doubleRange.getLower(),
doubleRange.getLowerUnbounded(),
doubleRange.getLowerExclusive(),
doubleRange.getUpper(),
doubleRange.getUpperUnbounded(),
doubleRange.getUpperExclusive(),
doubleRange.testNull());
}

@Override
public VectorMask<Double> testDoubleVector(DoubleVector values)
{
VectorMask<Double> passing = values.compare(NE, Double.NaN);
if (!lowerUnbounded) {
if (lowerExclusive) {
passing = values.compare(GT, lower);
}
else {
passing = values.compare(GE, lower);
}
}
if (!upperUnbounded) {
if (upperExclusive) {
passing = passing.and(values.compare(LT, upper));
}
else {
passing = passing.and(values.compare(LE, upper));
}
}
return passing;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common.predicate.vector;

import com.facebook.presto.common.predicate.TupleDomainFilter;
import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorMask;

public interface TupleDomainFilterVector
extends TupleDomainFilter
{
VectorMask<Double> testDoubleVector(DoubleVector values);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.common.predicate.vector;

import jdk.incubator.vector.DoubleVector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorSpecies;
import org.testng.annotations.Test;

import static org.testng.Assert.assertEquals;

public class TestTupleDomainFilterVector
{
@Test
public void testDoubleRangeVector()
{
VectorSpecies<Double> speciesDouble = DoubleVector.SPECIES_MAX;
double[] values = new double[speciesDouble.length()];
for (int i = 0; i < speciesDouble.length(); i++) {
values[i] = (double) i;
}
DoubleVector valuesVector = DoubleVector.fromArray(speciesDouble, values, 0);
TupleDomainFilterVector tupleDomainFilterVector = DoubleRangeVector.of((double) 1, false, false, (double) 1, false, false, false);
VectorMask<Double> result = tupleDomainFilterVector.testDoubleVector(valuesVector);
for (int i = 0; i < speciesDouble.length(); i++) {
assertEquals(tupleDomainFilterVector.testDouble(values[i]), result.laneIsSet(i));
}
tupleDomainFilterVector = DoubleRangeVector.of((double) 1, false, false, (double) 10, false, false, false);
result = tupleDomainFilterVector.testDoubleVector(valuesVector);
for (int i = 0; i < speciesDouble.length(); i++) {
assertEquals(tupleDomainFilterVector.testDouble(values[i]), result.laneIsSet(i));
}
}
}
2 changes: 2 additions & 0 deletions presto-docs/src/main/sphinx/connector/hive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ Property Name Description

``hive.skip-empty-files`` Enable skipping empty files. Otherwise, it will produce an ``false``
error iterating through empty files.

``hive.orc-use-vector-filter`` Enable use of vector ORC readers in compilation and execution ``false``
================================================== ============================================================ ============

Metastore Configuration Properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,7 @@ public class HiveClientConfig
private int quickStatsMaxConcurrentCalls = 100;
private DataSize affinitySchedulingFileSectionSize = new DataSize(256, MEGABYTE);
private boolean legacyTimestampBucketing;
private boolean orcUseVectorFilter;

@Min(0)
public int getMaxInitialSplits()
Expand Down Expand Up @@ -1861,4 +1862,17 @@ public HiveClientConfig setLegacyTimestampBucketing(boolean legacyTimestampBucke
this.legacyTimestampBucketing = legacyTimestampBucketing;
return this;
}

@Config("hive.orc-use-vector-filter")
@ConfigDescription("Experimental: enable vector path for orc filters")
public HiveClientConfig setOrcUseVectorFilter(boolean orcUseVectorFilter)
{
this.orcUseVectorFilter = orcUseVectorFilter;
return this;
}

public boolean getOrcUseVectorFilter()
{
return orcUseVectorFilter;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public final class HiveSessionProperties
private static final String ORC_OPTIMIZED_WRITER_STRING_DICTIONARY_SORTING_ENABLED = "orc_optimized_writer_string_dictionary_sorting_enabled";
private static final String ORC_OPTIMIZED_WRITER_FLAT_MAP_WRITER_ENABLED = "orc_optimized_writer_flat_map_writer_enabled";
private static final String ORC_OPTIMIZED_WRITER_COMPRESSION_LEVEL = "orc_optimized_writer_compression_level";
private static final String ORC_USE_VECTOR_FILTER = "orc_use_vector_filter";
private static final String PAGEFILE_WRITER_MAX_STRIPE_SIZE = "pagefile_writer_max_stripe_size";
public static final String HIVE_STORAGE_FORMAT = "hive_storage_format";
private static final String COMPRESSION_CODEC = "compression_codec";
Expand Down Expand Up @@ -215,6 +216,11 @@ public HiveSessionProperties(HiveClientConfig hiveClientConfig, OrcFileWriterCon
"Experimental: ORC: Compression level, works only for ZSTD and ZLIB compression kinds",
orcFileWriterConfig.getCompressionLevel(),
false),
booleanProperty(
ORC_USE_VECTOR_FILTER,
"Experimental: enable vector path for orc filters",
hiveClientConfig.getOrcUseVectorFilter(),
false),
dataSizeSessionProperty(
PAGEFILE_WRITER_MAX_STRIPE_SIZE,
"PAGEFILE: Max stripe size",
Expand Down Expand Up @@ -1140,4 +1146,9 @@ public static boolean isLegacyTimestampBucketing(ConnectorSession session)
{
return session.getProperty(LEGACY_TIMESTAMP_BUCKETING, Boolean.class);
}

public static boolean getOrcUseVectorFilter(ConnectorSession session)
{
return session.getProperty(ORC_USE_VECTOR_FILTER, Boolean.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcBloomFiltersEnabled;
import static com.facebook.presto.hive.HiveCommonSessionProperties.isOrcZstdJniDecompressionEnabled;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_BUCKET_FILES;
import static com.facebook.presto.hive.HiveSessionProperties.getOrcUseVectorFilter;
import static com.facebook.presto.hive.HiveSessionProperties.isAdaptiveFilterReorderingEnabled;
import static com.facebook.presto.hive.HiveSessionProperties.isLegacyTimestampBucketing;
import static com.facebook.presto.hive.HiveUtil.getPhysicalHiveColumnHandles;
Expand Down Expand Up @@ -365,6 +366,9 @@ public static ConnectorPageSource createOrcPageSource(

List<FilterFunction> filterFunctions = toFilterFunctions(replaceExpression(remainingPredicate, variableToInput), bucketAdapter, session, rowExpressionService.getDeterminismEvaluator(), rowExpressionService.getPredicateCompiler());

boolean orcUseVectorFilter = getOrcUseVectorFilter(session);
Map<String, Boolean> orcReaderUserOptions = new HashMap<String, Boolean>();
orcReaderUserOptions.put(OrcReader.ORC_USE_VECTOR_FILTER, orcUseVectorFilter);
OrcSelectiveRecordReader recordReader = reader.createSelectiveRecordReader(
columnTypes,
outputIndices,
Expand All @@ -380,7 +384,8 @@ public static ConnectorPageSource createOrcPageSource(
hiveStorageTimeZone,
systemMemoryUsage,
Optional.empty(),
INITIAL_BATCH_SIZE);
INITIAL_BATCH_SIZE,
orcReaderUserOptions);

return new OrcSelectivePageSource(
recordReader,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ public void testDefaults()
.setCteVirtualBucketCount(128)
.setSkipEmptyFilesEnabled(false)
.setAffinitySchedulingFileSectionSize(new DataSize(256, MEGABYTE))
.setLegacyTimestampBucketing(false));
.setLegacyTimestampBucketing(false)
.setOrcUseVectorFilter(false));
}

@Test
Expand Down Expand Up @@ -296,6 +297,7 @@ public void testExplicitPropertyMappings()
.put("hive.affinity-scheduling-file-section-size", "512MB")
.put("hive.skip-empty-files", "true")
.put("hive.legacy-timestamp-bucketing", "true")
.put("hive.orc-use-vector-filter", "true")
.build();

HiveClientConfig expected = new HiveClientConfig()
Expand Down Expand Up @@ -420,7 +422,8 @@ public void testExplicitPropertyMappings()
.setSkipEmptyFilesEnabled(true)
.setCteVirtualBucketCount(256)
.setAffinitySchedulingFileSectionSize(new DataSize(512, MEGABYTE))
.setLegacyTimestampBucketing(true);
.setLegacyTimestampBucketing(true)
.setOrcUseVectorFilter(true);

ConfigAssertions.assertFullMapping(properties, expected);
}
Expand Down
Loading

0 comments on commit 213e86c

Please sign in to comment.