New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support persistent CTE's #20887
Support persistent CTE's #20887
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,12 @@ | |
import com.facebook.presto.Session; | ||
import com.facebook.presto.spi.PrestoException; | ||
import com.facebook.presto.sql.relational.SqlToRowExpressionTranslator; | ||
import com.facebook.presto.sql.tree.Query; | ||
import com.google.common.annotations.VisibleForTesting; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Stack; | ||
|
||
import static com.facebook.presto.SystemSessionProperties.getMaxLeafNodesInPlan; | ||
import static com.facebook.presto.SystemSessionProperties.isLeafNodeLimitEnabled; | ||
|
@@ -28,10 +34,18 @@ public class SqlPlannerContext | |
private int leafNodesInLogicalPlan; | ||
private final SqlToRowExpressionTranslator.Context translatorContext; | ||
|
||
private final NestedCteStack nestedCteStack; | ||
|
||
public SqlPlannerContext(int leafNodesInLogicalPlan) | ||
{ | ||
this.leafNodesInLogicalPlan = leafNodesInLogicalPlan; | ||
this.translatorContext = new SqlToRowExpressionTranslator.Context(); | ||
this.nestedCteStack = new NestedCteStack(); | ||
} | ||
|
||
public NestedCteStack getNestedCteStack() | ||
{ | ||
return nestedCteStack; | ||
} | ||
|
||
public SqlToRowExpressionTranslator.Context getTranslatorContext() | ||
|
@@ -49,4 +63,58 @@ public void incrementLeafNodes(Session session) | |
} | ||
} | ||
} | ||
|
||
public class NestedCteStack | ||
{ | ||
@VisibleForTesting | ||
public static final String delimiter = "_*%$_"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naming convention note: static final fields should be in all caps (so DELIMETER) |
||
private final Stack<String> cteStack; | ||
private final Map<String, String> rawCtePathMap; | ||
|
||
public NestedCteStack() | ||
{ | ||
this.cteStack = new Stack<>(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. initialize these where they are declared. They don't need anything from the constructor. |
||
this.rawCtePathMap = new HashMap<>(); | ||
} | ||
|
||
public void push(String cteName, Query query) | ||
{ | ||
this.cteStack.push(cteName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. style note: we generally only use the "this" prefix in the constructor or setters. Otherwise, we refer to fields without the "this" prefix as long as there is no conflict requiring it. (not sure why, i think just to be concise). |
||
if (query.getWith().isPresent()) { | ||
// All ctes defined in this context should have their paths updated | ||
query.getWith().get().getQueries().forEach(with -> this.addNestedCte(with.getName().toString())); | ||
} | ||
} | ||
|
||
public void pop(Query query) | ||
{ | ||
this.cteStack.pop(); | ||
if (query.getWith().isPresent()) { | ||
query.getWith().get().getQueries().forEach(with -> this.removeNestedCte(with.getName().toString())); | ||
} | ||
} | ||
|
||
public String getRawPath(String cteName) | ||
{ | ||
if (!this.rawCtePathMap.containsKey(cteName)) { | ||
return cteName; | ||
} | ||
return this.rawCtePathMap.get(cteName); | ||
} | ||
|
||
private void addNestedCte(String cteName) | ||
{ | ||
this.rawCtePathMap.put(cteName, getCurrentRelativeCtePath() + delimiter + cteName); | ||
} | ||
|
||
private void removeNestedCte(String cteName) | ||
{ | ||
this.rawCtePathMap.remove(cteName); | ||
} | ||
|
||
public String getCurrentRelativeCtePath() | ||
{ | ||
return String.join(delimiter, cteStack); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ | |
import com.facebook.presto.spi.VariableAllocator; | ||
import com.facebook.presto.spi.WarningCollector; | ||
import com.facebook.presto.spi.plan.AggregationNode; | ||
import com.facebook.presto.spi.plan.CteConsumerNode; | ||
import com.facebook.presto.spi.plan.CteProducerNode; | ||
import com.facebook.presto.spi.plan.CteReferenceNode; | ||
import com.facebook.presto.spi.plan.DistinctLimitNode; | ||
import com.facebook.presto.spi.plan.ExceptNode; | ||
import com.facebook.presto.spi.plan.FilterNode; | ||
|
@@ -55,6 +58,9 @@ public class ApplyConnectorOptimization | |
implements PlanOptimizer | ||
{ | ||
static final Set<Class<? extends PlanNode>> CONNECTOR_ACCESSIBLE_PLAN_NODES = ImmutableSet.of( | ||
CteProducerNode.class, | ||
CteConsumerNode.class, | ||
CteReferenceNode.class, | ||
Comment on lines
+61
to
+63
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For connector optimizers to apply in cases where plan contains these nodes |
||
DistinctLimitNode.class, | ||
FilterNode.class, | ||
TableScanNode.class, | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
@@ -0,0 +1,76 @@ | ||||
/* | ||||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||||
* you may not use this file except in compliance with the License. | ||||
* You may obtain a copy of the License at | ||||
* | ||||
* http://www.apache.org/licenses/LICENSE-2.0 | ||||
* | ||||
* Unless required by applicable law or agreed to in writing, software | ||||
* distributed under the License is distributed on an "AS IS" BASIS, | ||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
* See the License for the specific language governing permissions and | ||||
* limitations under the License. | ||||
*/ | ||||
package com.facebook.presto.sql.planner.optimizations; | ||||
|
||||
import com.facebook.presto.common.type.RowType; | ||||
import com.facebook.presto.common.type.VarcharType; | ||||
import com.facebook.presto.common.type.Varchars; | ||||
import com.facebook.presto.spi.PrestoException; | ||||
import com.facebook.presto.spi.relation.VariableReferenceExpression; | ||||
|
||||
import java.util.List; | ||||
|
||||
import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR; | ||||
|
||||
public class CteUtils | ||||
{ | ||||
private CteUtils() | ||||
{ | ||||
} | ||||
|
||||
// Determines whether the CTE can be materialized. | ||||
public static boolean isCteMaterializable(List<VariableReferenceExpression> outputVariables) | ||||
{ | ||||
return outputVariables.stream().anyMatch(CteUtils::isVariableMaterializable) | ||||
&& outputVariables.stream() | ||||
.allMatch(variableReferenceExpression -> { | ||||
if (Varchars.isVarcharType(variableReferenceExpression.getType())) { | ||||
return isSupportedVarcharType((VarcharType) variableReferenceExpression.getType()); | ||||
} | ||||
return true; | ||||
}); | ||||
} | ||||
|
||||
/* | ||||
Fetches the index of the first variable that can be materialized. | ||||
ToDo: Implement usage of NDV (number of distinct values) statistics to identify the best partitioning variable, | ||||
as temporary tables are bucketed. | ||||
*/ | ||||
public static Integer getCtePartitionIndex(List<VariableReferenceExpression> outputVariables) | ||||
{ | ||||
for (int i = 0; i < outputVariables.size(); i++) { | ||||
if (isVariableMaterializable(outputVariables.get(i))) { | ||||
return i; | ||||
} | ||||
} | ||||
throw new PrestoException(GENERIC_INTERNAL_ERROR, "No Partitioning index found"); | ||||
} | ||||
|
||||
/* | ||||
Currently, Hive bucketing does not support the Presto type 'ROW'. | ||||
*/ | ||||
public static boolean isVariableMaterializable(VariableReferenceExpression var) | ||||
{ | ||||
return !(var.getType() instanceof RowType); | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about other types not supported by Hive such as TIME? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, can we add tests for these types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The recommended table format is Pagefile which doesn't require translation to hivetypes. Also, there special handling already that uses the format pagefile for these types that are not supported by hive.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add some tests like this:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a test based on ttwithtz, thanks |
||||
} | ||||
|
||||
/* | ||||
While Presto supports Varchar of length 0 (as discussed in https://github.com/trinodb/trino/issues/1136), | ||||
Hive does not support this. | ||||
*/ | ||||
private static boolean isSupportedVarcharType(VarcharType varcharType) | ||||
{ | ||||
return (varcharType.isUnbounded() || varcharType.getLengthSafe() != 0); | ||||
} | ||||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NestedCteStack is responsible for managing the scope of the CTE
cteStack keeps track of the current CTEs that are accessible outside this scope.
The rawCtePathMap holds the localCtes along with their respective scopes, keeping them updated.