Skip to content

Commit

Permalink
HIVE-24167: Test. How about relaxing validations
Browse files Browse the repository at this point in the history
  • Loading branch information
okumin committed May 13, 2024
1 parent 0afb02b commit dd5be15
Show file tree
Hide file tree
Showing 17 changed files with 1,530 additions and 896 deletions.
2 changes: 2 additions & 0 deletions common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
Original file line number Diff line number Diff line change
Expand Up @@ -5674,6 +5674,8 @@ public static enum ConfVars {
"If runtime stats are stored in metastore; the maximal batch size per round during load."),
HIVE_QUERY_REEXECUTION_STATS_CACHE_SIZE("hive.query.reexecution.stats.cache.size", 100_000,
"Size of the runtime statistics cache. Unit is: OperatorStat entry; a query plan consist ~100."),
HIVE_QUERY_PLANMAPPER_STRICT_VALIDATION("hive.query.planmapper.strict.validation", false,
"Internal use only. Whether to raise an error when unexpected links are found."),
HIVE_QUERY_PLANMAPPER_LINK_RELNODES("hive.query.planmapper.link.relnodes", true,
"Whether to link Calcite nodes to runtime statistics."),
HIVE_QUERY_MAX_RECOMPILATION_COUNT("hive.query.recompilation.max.count", 1,
Expand Down
5 changes: 5 additions & 0 deletions data/conf/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,11 @@
<value>TEST</value>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<property>
<name>iceberg.hive.keep.stats</name>
<value>true</value>
Expand Down
5 changes: 5 additions & 0 deletions data/conf/iceberg/llap/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,11 @@
<value>TEST</value>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<property>
<name>iceberg.hive.keep.stats</name>
<value>true</value>
Expand Down
5 changes: 5 additions & 0 deletions data/conf/iceberg/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,11 @@
<value>TEST</value>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<property>
<name>iceberg.hive.keep.stats</name>
<value>true</value>
Expand Down
5 changes: 5 additions & 0 deletions data/conf/llap/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,11 @@
<value>TEST</value>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<property>
<name>hive.txn.xlock.ctas</name>
<value>false</value>
Expand Down
5 changes: 5 additions & 0 deletions data/conf/rlist/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
<description>Internal marker for test. Used for masking env-dependent values</description>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<!-- Hive Configuration can either be stored in this file or in the hadoop configuration files -->
<!-- that are implied by Hadoop setup variables. -->
<!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive -->
Expand Down
5 changes: 5 additions & 0 deletions data/conf/tez/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,11 @@
<value>TEST</value>
</property>

<property>
<name>hive.query.planmapper.strict.validation</name>
<value>true</value>
</property>

<property>
<name>hive.txn.xlock.ctas</name>
<value>false</value>
Expand Down
3 changes: 2 additions & 1 deletion ql/src/java/org/apache/hadoop/hive/ql/Context.java
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ public class Context {
private WmContext wmContext;

private boolean isExplainPlan = false;
private PlanMapper planMapper = new PlanMapper();
private PlanMapper planMapper;
private StatsSource statsSource;
private int executionIndex;

Expand Down Expand Up @@ -417,6 +417,7 @@ private Context(Configuration conf, String executionId) {
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SQL) ||
HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING_SUBQUERY_SQL);
scheduledQuery = false;
planMapper = new PlanMapper(conf);
}

protected Context(Context ctx) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ public class RuntimeStatsPersistenceCheckerHook implements ExecuteWithHookContex
public void run(HookContext hookContext) throws Exception {

PlanMapper pm = ((PrivateHookContext) hookContext).getContext().getPlanMapper();
if (pm.isBroken()) {
LOG.warn("Skip checking signatures. The PlanMapper is broken");
return;
}

List<OpTreeSignature> sigs = pm.getAll(OpTreeSignature.class);

Expand Down
33 changes: 0 additions & 33 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
Expand Down Expand Up @@ -100,7 +99,6 @@
import org.apache.hadoop.hive.ql.optimizer.SharedWorkOptimizer;
import org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyProcessor;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkDeDuplication;
import org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor;
import org.apache.hadoop.hive.ql.optimizer.correlation.ReduceSinkJoinDeDuplication;
Expand All @@ -117,7 +115,6 @@
import org.apache.hadoop.hive.ql.optimizer.physical.SerializeFilter;
import org.apache.hadoop.hive.ql.optimizer.physical.StageIDsRearranger;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.stats.annotation.AnnotateWithStatistics;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.AppMasterEventDesc;
Expand All @@ -139,7 +136,6 @@
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.mapper.AuxOpTreeSignature;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper.EquivGroup;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.ql.stats.OperatorStats;
Expand Down Expand Up @@ -991,35 +987,6 @@ private void removeSemiJoinIfNoStats(OptimizeTezProcContext procCtx)
ogw.startWalking(topNodes, null);
}

private static class CollectAll implements SemanticNodeProcessor {
private PlanMapper planMapper;

@Override
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx, Object... nodeOutputs)
throws SemanticException {
ParseContext pCtx = ((OptimizeTezProcContext) procCtx).parseContext;
planMapper = pCtx.getContext().getPlanMapper();
FilterOperator fop = (FilterOperator) nd;
OpTreeSignature sig = planMapper.getSignatureOf(fop);
List<EquivGroup> ar = getGroups(planMapper, HiveFilter.class);


return nd;
}

private List<EquivGroup> getGroups(PlanMapper planMapper2, Class<HiveFilter> class1) {
Iterator<EquivGroup> it = planMapper.iterateGroups();
List<EquivGroup> ret = new ArrayList<PlanMapper.EquivGroup>();
while (it.hasNext()) {
EquivGroup g = it.next();
if (g.getAll(class1).size() > 0) {
ret.add(g);
}
}
return ret;
}
}

private static class MarkRuntimeStatsAsIncorrect implements SemanticNodeProcessor {

private PlanMapper planMapper;
Expand Down
35 changes: 24 additions & 11 deletions ql/src/java/org/apache/hadoop/hive/ql/plan/mapper/PlanMapper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,21 +32,32 @@
import java.util.Objects;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignature;
import org.apache.hadoop.hive.ql.optimizer.signature.OpTreeSignatureFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Enables to connect related objects to eachother.
*
* Most importantly it aids to connect Operators to OperatorStats and probably RelNodes.
*/
public class PlanMapper {
private static final Logger LOG = LoggerFactory.getLogger(PlanMapper.class);

Set<EquivGroup> groups = new HashSet<>();
private Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
private final Set<EquivGroup> groups = new HashSet<>();
private final Map<Object, EquivGroup> objectMap = new CompositeMap<>(OpTreeSignature.class, AuxOpTreeSignature.class);
private final boolean failsOnBroken;
private boolean isBroken = false;

public PlanMapper(Configuration conf) {
failsOnBroken = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_QUERY_PLANMAPPER_STRICT_VALIDATION);
}

/**
* Specialized class which can compare by identity or value; based on the key type.
Expand Down Expand Up @@ -217,7 +228,11 @@ private void link(Object o1, Object o2, boolean mayMerge) {
}
if (mGroups.size() > 1) {
if (!mayMerge) {
throw new RuntimeException("equivalence mapping violation");
LOG.error("Illegally linking {} and {}", o1, o2);
if (failsOnBroken) {
throw new RuntimeException("equivalence mapping violation");
}
isBroken = true;
}
EquivGroup newGrp = new EquivGroup();
newGrp.add(o1);
Expand Down Expand Up @@ -248,6 +263,10 @@ private Object getKeyFor(Object o) {
return o;
}

public boolean isBroken() {
return isBroken;
}

public <T> List<T> getAll(Class<T> clazz) {
List<T> ret = new ArrayList<>();
for (EquivGroup g : groups) {
Expand All @@ -256,20 +275,15 @@ public <T> List<T> getAll(Class<T> clazz) {
return ret;
}

public void runMapper(GroupTransformer mapper) {
for (EquivGroup equivGroup : groups) {
mapper.map(equivGroup);
}
}

public <T> List<T> lookupAll(Class<T> clazz, Object key) {
private <T> List<T> lookupAll(Class<T> clazz, Object key) {
EquivGroup group = objectMap.get(key);
if (group == null) {
throw new NoSuchElementException(Objects.toString(key));
}
return group.getAll(clazz);
}

@VisibleForTesting
public <T> T lookup(Class<T> clazz, Object key) {
List<T> all = lookupAll(clazz, key);
if (all.size() != 1) {
Expand All @@ -279,7 +293,6 @@ public <T> T lookup(Class<T> clazz, Object key) {
return all.get(0);
}

@VisibleForTesting
public Iterator<EquivGroup> iterateGroups() {
return groups.iterator();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public static StatsSource getStatsSourceContaining(StatsSource currentStatsSourc

private static ImmutableList<PersistedRuntimeStats> extractStatsFromPlanMapper(PlanMapper pm) {
Builder<PersistedRuntimeStats> li = ImmutableList.builder();
if (pm.isBroken()) {
LOG.warn("Don't generate any stats. This PlanMapper is broken");
return li.build();
}

Iterator<EquivGroup> it = pm.iterateGroups();
while (it.hasNext()) {
EquivGroup e = it.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,14 @@ public void prepareToReExecute() {

@Override
public boolean shouldReExecuteAfterCompile(int executionNum, PlanMapper oldPlanMapper, PlanMapper newPlanMapper) {
if (oldPlanMapper.isBroken() || newPlanMapper.isBroken()) {
LOG.warn(
"Giving up a re-execution. The old plan mapper is {}, and the new one is {}",
oldPlanMapper.isBroken() ? "broken" : "not broken",
newPlanMapper.isBroken() ? "broken" : "not broken");
return false;
}

boolean planDidChange = !planEquals(oldPlanMapper, newPlanMapper);
LOG.info("planDidChange: {}", planDidChange);
return planDidChange;
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/cbo_query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain cbo
Expand Down
1 change: 0 additions & 1 deletion ql/src/test/queries/clientpositive/perf/query14.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
--! qt:disabled:HIVE-24167
set hive.mapred.mode=nonstrict;
-- start query 1 in stream 0 using template query14.tpl and seed 1819994127
explain
Expand Down

0 comments on commit dd5be15

Please sign in to comment.