Skip to content

Commit

Permalink
Flatten onepass member of the OnePassCompiler
Browse files Browse the repository at this point in the history
Embedding the OnePass DFA to be compiled in the OnePassCompiler
caused a few values to be unnecessarily duplicated and added an
extra level of indirection. This patch resolves that issue and
takes advantage of these move semantics I'm always hearing about.
  • Loading branch information
Ethan Pailes committed Apr 24, 2018
1 parent 103f20e commit 223e14c
Showing 1 changed file with 39 additions and 38 deletions.
77 changes: 39 additions & 38 deletions src/onepass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,22 +50,17 @@ pub struct OnePass {
/// DFA states.
table: Vec<StatePtr>,
/// The prefixes.
///
/// TODO(ethan):yakshaving This guy is getting computed 4 times
/// at the moment, which seems a bit extra.
/// talk with @burntsushi to figure out how
/// to drop the duplicated work.
prefixes: LiteralSearcher,
/// The stride.
num_byte_classes: usize,
/// The byte classes of this regex.
byte_classes: Vec<u8>,
/// The starting state.
start_state: StatePtr,
/// True if the regex is anchored at the end.
is_anchored_end: bool,
/// True if the regex is anchored at the start.
is_anchored_start: bool,
/// True if the regex is anchored at the end.
is_anchored_end: bool,
/// True if this regex ought to only accept utf8 strings.
only_utf8: bool,
}
Expand Down Expand Up @@ -468,13 +463,19 @@ impl OnePass {

/// Compiler for a OnePass DFA
pub struct OnePassCompiler {
onepass: OnePass,
/// The flattened transition table AKA the baked form of the DFA.
table: Vec<StatePtr>,

num_byte_classes: usize,
only_utf8: bool,

/// The program to be compiled.
prog: Program,

/// A mapping from instruction indicies to their transitions
/// A mapping from instruction indices to their transitions
transitions: Vec<Option<TransitionTable>>,

/// A mapping from instruction indicies to flags indicating
/// A mapping from instruction indices to flags indicating
/// if they should have the STATE_MATCH flag set.
accepting_states: Vec<bool>,

Expand Down Expand Up @@ -555,16 +556,10 @@ impl OnePassCompiler {

trace!("new compiler for:\n{:?}", prog);
Ok(OnePassCompiler {
onepass: OnePass {
table: vec![],
prefixes: prog.prefixes.clone(),
num_byte_classes: num_byte_classes,
byte_classes: prog.byte_classes.clone(),
start_state: 0,
is_anchored_end: prog.is_anchored_end,
is_anchored_start: prog.is_anchored_start,
only_utf8: only_utf8,
},
table: vec![],
num_byte_classes: num_byte_classes,
only_utf8: only_utf8,

transitions: {
let mut x = Vec::new();
for _ in 0..prog.len() {
Expand Down Expand Up @@ -594,9 +589,17 @@ impl OnePassCompiler {
// Now emit the transitions in a form that we can actually
// execute.
self.emit_transitions();
self.onepass.start_state = 0 | STATE_ACTION;

Ok(self.onepass)
Ok(OnePass {
table: self.table,
prefixes: self.prog.prefixes,
num_byte_classes: self.num_byte_classes,
byte_classes: self.prog.byte_classes,
start_state: 0 | STATE_ACTION,
is_anchored_start: self.prog.is_anchored_start,
is_anchored_end: self.prog.is_anchored_end,
only_utf8: self.only_utf8,
})
}

/// Compile the stage 1 transition table for the state corresponding
Expand Down Expand Up @@ -629,7 +632,7 @@ impl OnePassCompiler {

let mut trans = TransitionTable(
vec![Transition { tgt: TransitionTarget::Die, priority: 0 };
self.onepass.num_byte_classes]);
self.num_byte_classes]);

// Start at priority 1 because everything is higher priority than
// the initial list of `TransitionTarget::Die` pointers.
Expand All @@ -648,7 +651,7 @@ impl OnePassCompiler {
// closed-open ranges.
for byte in (inst.start as usize)..(inst.end as usize + 1) {
let byte = byte as u8;
let bc = self.onepass.byte_classes[byte as usize];
let bc = self.prog.byte_classes[byte as usize];
trans.0[bc as usize] = Transition {
tgt: TransitionTarget::BytesInst(child_idx),
priority: priority
Expand Down Expand Up @@ -784,19 +787,19 @@ impl OnePassCompiler {
/// Once all the per-instruction transition tables have been worked
/// out, we can bake them into the single flat transition table we
/// are going to use for the actual DFA. This function creates the
/// baked form, storing it in `self.onepass.table`.
/// baked form, storing it in `self.table`.
fn emit_transitions(&mut self) {
// pre-compute the state indices
let mut state_starts = Vec::with_capacity(self.prog.len());
let mut off = 0;
for inst_idx in 0..self.prog.len() {
state_starts.push(off);
if self.transitions[inst_idx].is_some() {
off += self.onepass.num_byte_classes;
off += self.num_byte_classes;

match &self.prog[inst_idx] {
&Inst::EmptyLook(_) | &Inst::Save(_) => {
off += self.onepass.num_byte_classes;
off += self.num_byte_classes;
}
_ => {}
}
Expand All @@ -811,12 +814,10 @@ impl OnePassCompiler {
p
};

self.onepass.table.reserve(
state_starts[state_starts.len() - 1]
+ self.onepass.num_byte_classes);
self.table.reserve(state_starts[state_starts.len() - 1]
+ self.num_byte_classes);
for inst_idx in 0..self.prog.len() {
let mut trans = Vec::with_capacity(
self.onepass.num_byte_classes * 2);
let mut trans = Vec::with_capacity(self.num_byte_classes * 2);

match &self.transitions[inst_idx] {
&None => continue,
Expand All @@ -834,26 +835,26 @@ impl OnePassCompiler {
}
}

self.onepass.table.extend(trans);
self.table.extend(trans);

// emit all the right window dressing for the action, if
// there is one.
match &self.prog[inst_idx] {
&Inst::Save(ref inst) => {
debug_assert!(self.onepass.num_byte_classes >= 2);
debug_assert!(self.num_byte_classes >= 2);

let mut save_args = vec![
Action::Save as StatePtr,
inst.slot as StatePtr];
save_args.extend(vec![STATE_POISON;
self.onepass.num_byte_classes - 2]);
self.onepass.table.extend(save_args);
self.num_byte_classes - 2]);
self.table.extend(save_args);
}
&Inst::EmptyLook(ref inst) => {
let mut el_args = vec![self.empty_look_action(inst.look)];
el_args.extend(vec![STATE_POISON;
self.onepass.num_byte_classes - 1]);
self.onepass.table.extend(el_args);
self.num_byte_classes - 1]);
self.table.extend(el_args);
}
_ => {}
}
Expand Down

0 comments on commit 223e14c

Please sign in to comment.