-
Notifications
You must be signed in to change notification settings - Fork 241
/
relaxed.rs
171 lines (156 loc) · 7.28 KB
/
relaxed.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
use sql::reuse::helpers::predicate_implication::complex_predicate_implies;
use sql::reuse::{ReuseConfiguration, ReuseType};
use sql::query_graph::{QueryGraph, QueryGraphEdge};
use mir::query::MirQuery;
use std::vec::Vec;
use std::collections::HashMap;
/// Implementation of reuse algorithm with relaxed constraints.
/// While Finkelstein checks if queries are compatible for direct extension,
/// this algorithm considers the possibility of reuse of internal views.
/// For example, given the queries:
/// 1) select * from Paper, PaperReview
/// where Paper.paperId = PaperReview.paperId
/// and PaperReview.reviewType = 1;
/// 2) select * from Paper, PaperReview where Paper.paperId = PaperReview.paperId;
///
/// Finkelstein reuse would be conditional on the order the queries are added,
/// since 1) is a direct extension of 2), but not the other way around.
///
/// This relaxed version of the reuse algorithm considers cases where a query might
/// reuse just a prefix of another. First, it checks if the queries perform the
/// same joins, then it checks predicate implication and at last, checks group
/// by compatibility.
/// If all checks pass, them the algorithm works like Finkelstein and the query
/// is a direct extension of the other. However, if not all, but at least one
/// check passes, then the algorithm returns that the queries have a prefix in
/// common.
pub struct Relaxed;
impl ReuseConfiguration for Relaxed {
fn reuse_candidates<'a>(
qg: &QueryGraph,
query_graphs: &'a HashMap<u64, (QueryGraph, MirQuery)>,
) -> Vec<(ReuseType, &'a QueryGraph)> {
let mut reuse_candidates = Vec::new();
for &(ref existing_qg, _) in query_graphs.values() {
if existing_qg
.signature()
.is_weak_generalization_of(&qg.signature())
{
match Self::check_compatibility(&qg, existing_qg) {
Some(reuse) => {
// QGs are compatible, we can reuse `existing_qg` as part of `qg`!
reuse_candidates.push((reuse, existing_qg));
}
None => (),
}
}
}
reuse_candidates
}
}
impl Relaxed {
fn check_compatibility(new_qg: &QueryGraph, existing_qg: &QueryGraph) -> Option<ReuseType> {
// 1. NQG's nodes is subset of EQG's nodes
// -- already established via signature check
assert!(
existing_qg
.signature()
.is_weak_generalization_of(&new_qg.signature())
);
// Check if the queries are join compatible -- if the new query
// performs a superset of the joins in the existing query.
// TODO 1: this currently only checks that the joins use the same
// tables. some possible improvements are:
// 1) relaxing to fail only on non-disjoint join sets
// 2) constraining to also check implication of join predicates
// TODO 2: malte's suggestion of possibly reuse LeftJoin as a
// plain Join by simply adding a filter that discards rows with
// NULLs in the right side columns
for (srcdst, ex_qge) in &existing_qg.edges {
match *ex_qge {
QueryGraphEdge::Join(_) => {
if !new_qg.edges.contains_key(srcdst) {
return None;
}
let new_qge = &new_qg.edges[srcdst];
match *new_qge {
QueryGraphEdge::Join(_) => {}
// If there is no matching Join edge, we cannot reuse
_ => return None,
}
}
QueryGraphEdge::LeftJoin(_) => {
if !new_qg.edges.contains_key(srcdst) {
return None;
}
let new_qge = &new_qg.edges[srcdst];
match *new_qge {
QueryGraphEdge::LeftJoin(_) => {}
// If there is no matching LeftJoin edge, we cannot reuse
_ => return None,
}
}
_ => continue,
}
}
// Checks group by compatibility between queries.
for (srcdst, ex_qge) in &existing_qg.edges {
match *ex_qge {
QueryGraphEdge::GroupBy(ref ex_columns) => {
if !new_qg.edges.contains_key(srcdst) {
return Some(ReuseType::PrefixReuse);
}
let new_qge = &new_qg.edges[srcdst];
match *new_qge {
QueryGraphEdge::GroupBy(ref new_columns) => {
// GroupBy implication holds if the new QG groups by the same columns as
// the original one, or by a *superset* (as we can always apply more
// grouped operatinos on top of earlier ones)
if new_columns.len() < ex_columns.len() {
// more columns in existing QG's GroupBy, so we're done
// however, we can still reuse joins and predicates.
return Some(ReuseType::PrefixReuse);
}
for ex_col in ex_columns {
// EQG groups by a column that we don't group by, so we can't reuse
// the group by nodes, but we can still reuse joins and predicates.
if !new_columns.contains(ex_col) {
return Some(ReuseType::PrefixReuse);
}
}
}
// If there is no matching GroupBy edge, we cannot reuse the group by clause
_ => return Some(ReuseType::PrefixReuse),
}
}
_ => continue,
}
}
// Check that the new query's predicates imply the existing query's predicate.
for (name, ex_qgn) in &existing_qg.relations {
if !new_qg.relations.contains_key(name) {
return Some(ReuseType::PrefixReuse);
}
let new_qgn = &new_qg.relations[name];
// iterate over predicates and ensure that each
// matching one on the existing QG is implied by the new one
for ep in &ex_qgn.predicates {
let mut matched = false;
for np in &new_qgn.predicates {
if complex_predicate_implies(np, ep) {
matched = true;
break;
}
}
if !matched {
// We found no matching predicate for np, so we give up now.
// However, we can still reuse the join nodes from the existing query.
return Some(ReuseType::PrefixReuse);
}
}
}
// projected columns don't influence the reuse opportunities in this case, since
// we are only trying to reuse the query partially, not completely extending it.
return Some(ReuseType::DirectExtension);
}
}