-
Notifications
You must be signed in to change notification settings - Fork 241
/
finkelstein.rs
210 lines (188 loc) · 8.16 KB
/
finkelstein.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
use sql::reuse::helpers::predicate_implication::complex_predicate_implies;
use sql::reuse::{ReuseConfiguration, ReuseType};
use sql::query_graph::{QueryGraph, QueryGraphEdge};
use mir::query::MirQuery;
use std::vec::Vec;
use std::collections::HashMap;
pub struct Finkelstein;
/// Finkelstein reuse algorithm.
/// This algorithm checks if any existing query graphs are generalizations
/// of the query graph of the new query being added.
/// For Soup's purpose this algorithm is sometimes too strict as it only
/// considers reuse of final materializations, not intermediate ones.
impl ReuseConfiguration for Finkelstein {
fn reuse_candidates<'a>(
qg: &QueryGraph,
query_graphs: &'a HashMap<u64, (QueryGraph, MirQuery)>,
) -> Vec<(ReuseType, &'a QueryGraph)> {
let mut reuse_candidates = Vec::new();
for &(ref existing_qg, _) in query_graphs.values() {
if existing_qg
.signature()
.is_generalization_of(&qg.signature())
{
match Self::check_compatibility(&qg, existing_qg) {
Some(reuse) => {
// QGs are compatible, we can reuse `existing_qg` as part of `qg`!
reuse_candidates.push((reuse, existing_qg));
}
None => (),
}
}
}
if reuse_candidates.len() > 0 {
vec![Self::choose_best_option(reuse_candidates)]
} else {
reuse_candidates
}
}
}
impl Finkelstein {
fn choose_best_option<'a>(
options: Vec<(ReuseType, &'a QueryGraph)>,
) -> (ReuseType, &'a QueryGraph) {
let mut best_choice = None;
let mut best_score = 0;
for (o, qg) in options {
let mut score = 0;
// crude scoring: direct extension always preferrable over backjoins; reusing larger
// queries is also preferrable as they are likely to cover a larger fraction of the new
// query's nodes. Edges (group by, join) count for more than extra relations.
match o {
ReuseType::DirectExtension => {
score += 2 * qg.relations.len() + 4 * qg.edges.len() + 10;
}
ReuseType::BackjoinRequired(_) => {
score += qg.relations.len() + 3 * qg.edges.len();
}
_ => unreachable!(),
}
if score > best_score {
best_score = score;
best_choice = Some((o, qg));
}
}
assert!(best_score > 0);
best_choice.unwrap()
}
fn check_compatibility(new_qg: &QueryGraph, existing_qg: &QueryGraph) -> Option<ReuseType> {
// 1. NQG's nodes is subset of EQG's nodes
// -- already established via signature check
// 2. NQG's attributes is subset of NQG's edges
// -- already established via signature check
assert!(
existing_qg
.signature()
.is_generalization_of(&new_qg.signature())
);
// 3. NQC's edges are superset of EQG's
// (N.B.: this does not yet consider the relationships of the edge predicates; we do that
// below in the next step.)
for e in &existing_qg.edges {
if !new_qg.edges.contains_key(e.0) {
return None;
}
}
// 4. NQG's predicates imply EQG's
// 4a. on nodes
for (name, ex_qgn) in &existing_qg.relations {
let new_qgn = &new_qg.relations[name];
// iterate over predicates and ensure that each matching
// one on the existing QG is implied by the new one
for ep in &ex_qgn.predicates {
let mut matched = false;
for np in &new_qgn.predicates {
if complex_predicate_implies(np, ep) {
matched = true;
break;
}
}
if !matched {
// We found no matching predicate for np, so we give up now.
// trace!(log, "Failed: no matching predicate for {:#?}", ep);
return None;
}
}
}
// 4b. on edges
for (srcdst, ex_qge) in &existing_qg.edges {
let new_qge = &new_qg.edges[srcdst];
match *ex_qge {
QueryGraphEdge::GroupBy(ref ex_columns) => {
match *new_qge {
QueryGraphEdge::GroupBy(ref new_columns) => {
// GroupBy implication holds if the new QG groups by the same columns as
// the original one, or by a *superset* (as we can always apply more
// grouped operatinos on top of earlier ones)
if new_columns.len() < ex_columns.len() {
// more columns in existing QG's GroupBy, so we're done
return None;
}
for ex_col in ex_columns {
// EQG groups by a column that we don't group by, so we can't reuse
if !new_columns.contains(ex_col) {
return None;
}
}
}
// If there is no matching GroupBy edge, we cannot reuse
_ => return None,
}
}
QueryGraphEdge::Join(_) => {
match *new_qge {
QueryGraphEdge::Join(_) => {}
// If there is no matching Join edge, we cannot reuse
_ => return None,
}
}
QueryGraphEdge::LeftJoin(_) => {
match *new_qge {
QueryGraphEdge::LeftJoin(_) => {}
// If there is no matching LeftJoin edge, we cannot reuse
_ => return None,
}
}
}
}
// we don't need to check projected columns to reuse a prefix of the query
return Some(ReuseType::DirectExtension);
// 5. Consider projected columns
// 5a. NQG projects a subset of EQG's edges --> can use directly
// for (name, ex_qgn) in &existing_qg.relations {
// let new_qgn = &new_qg.relations[name];
// // does EQG already have *all* the columns we project?
// let all_projected = new_qgn
// .columns
// .iter()
// .all(|nc| ex_qgn.columns.contains(nc));
// if all_projected {
// // if so, super -- we can extend directly
// return Some(ReuseType::DirectExtension);
// } else {
// if name == "computed_columns" {
// // NQG has some extra columns, and they're computed ones
// // (i.e., grouped/function columns).
// // We can recompute those, but not via a backjoin.
// // TODO(malte): be cleverer about this situation
// return None;
// }
// // find the extra columns in the EQG to identify backjoins required
// let backjoin_tables: Vec<_> = new_qgn
// .columns
// .iter()
// .filter(|nc| !ex_qgn.columns.contains(nc) && nc.table.is_some())
// .map(|c| Table::from(c.table.as_ref().unwrap().as_str()))
// .collect();
// if backjoin_tables.len() > 0 {
// return Some(ReuseType::BackjoinRequired(backjoin_tables));
// } else {
// panic!("expected to find some backjoin tables!");
// }
// }
// }
// XXX(malte): 5b. NQG projects a superset of EQG's edges --> need backjoin
// XXX(malte): should this be a positive? If so, what?
// None
}
}