Skip to content

Commit

Permalink
Implement a basic version of %parse-param.
Browse files Browse the repository at this point in the history
We previously could parse `%parse_param` but not do anything with it.
This commit adds support for `%parse-param` (using Bison's spelling)
which, if defined, allows the user to pass an extra parameter to the
parser. See `lrpar/cttests/src/parseparam.test` for an example:

```
%start S
%parse-param p: &'input u64
%%
S -> u64:
    'INT' { *p + $lexer.span_str($1.unwrap().span()).parse::<u64>().unwrap() }
;
%%
```

Now the generated parser can be called with two parameters
`parse(&lexer, &1234)`.

Since Rust has tuples, `parse-param` only needs to take a single
paramater (whereas Bison allows multiple extra parameters), which is
fortunate, because Rust doesn't have varargs, which would make dealing
with multiple parameters extremely difficult.

This commit is a bit simple in some ways: the type of the extra
parameter must implement `Copy` and, if it's a reference, it must be
tied to the `'input` lifetime. Perhaps these restrictions could be
relaxed one day, but this is a start, and better than nothing.
  • Loading branch information
ltratt committed Oct 7, 2021
1 parent 21e2532 commit 3298c50
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 165 deletions.
6 changes: 2 additions & 4 deletions cfgrammar/src/lib/yacc/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,11 @@ pub struct GrammarAST {
pub precs: HashMap<String, Precedence>,
pub avoid_insert: Option<HashSet<String>>,
pub implicit_tokens: Option<HashSet<String>>,
pub parse_param_bindings: Option<Vec<(String, String)>>,
pub parse_param_lifetimes: Option<HashSet<String>>,
// Error pretty-printers
pub epp: HashMap<String, String>,
pub expect: Option<usize>,
pub expectrr: Option<usize>,
pub parse_param: Option<(String, String)>,
pub programs: Option<String>,
}

Expand Down Expand Up @@ -123,9 +122,8 @@ impl GrammarAST {
epp: HashMap::new(),
expect: None,
expectrr: None,
parse_param: None,
programs: None,
parse_param_bindings: None,
parse_param_lifetimes: None,
}
}

Expand Down
22 changes: 5 additions & 17 deletions cfgrammar/src/lib/yacc/grammar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,9 @@ pub struct YaccGrammar<StorageT = u32> {
implicit_rule: Option<RIdx<StorageT>>,
/// User defined Rust programs which can be called within actions
actions: Vec<Option<String>>,
/// Extra Parameters to the parse function, for user actions.
param_args: Vec<(String, String)>,
/// A `(name, type)` pair defining an extra parameter to pass to action functions.
parse_param: Option<(String, String)>,
/// Lifetimes for `param_args`
param_lifetimes: Vec<String>,
/// The programs section of a grammar, if specified; otherwise `None`.
programs: Option<String>,
/// The actiontypes of rules (one per rule).
actiontypes: Vec<Option<String>>,
Expand Down Expand Up @@ -343,13 +341,7 @@ where
prod_precs: prod_precs.into_iter().map(Option::unwrap).collect(),
implicit_rule: implicit_rule.map(|x| rule_map[&x]),
actions,
param_args: ast.parse_param_bindings.iter().flatten().cloned().collect(),
param_lifetimes: ast
.parse_param_lifetimes
.iter()
.flatten()
.cloned()
.collect(),
parse_param: ast.parse_param,
programs: ast.programs,
avoid_insert,
actiontypes,
Expand Down Expand Up @@ -492,12 +484,8 @@ where
&self.actiontypes[usize::from(ridx)]
}

pub fn param_args(&self) -> &Vec<(String, String)> {
&self.param_args
}

pub fn param_lifetimes(&self) -> &Vec<String> {
&self.param_lifetimes
pub fn parse_param(&self) -> &Option<(String, String)> {
&self.parse_param
}

/// Get the programs part of the grammar
Expand Down
130 changes: 18 additions & 112 deletions cfgrammar/src/lib/yacc/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,18 @@ impl YaccParser {
}
continue;
}
if let Some(j) = self.lookahead_is("%parse_param", i) {
i = self.parse_param(j)?;
if let Some(j) = self.lookahead_is("%parse-param", i) {
i = self.parse_ws(j, false)?;
let (j, name) = self.parse_to_single_colon(i)?;
match self.lookahead_is(":", j) {
Some(j) => i = self.parse_ws(j, false)?,
None => {
return Err(self.mk_error(YaccParserErrorKind::MissingColon, j));
}
}
let (j, ty) = self.parse_to_eol(i)?;
self.ast.parse_param = Some((name, ty));
i = self.parse_ws(j, true)?;
continue;
}
if let YaccKind::Eco = self.yacc_kind {
Expand Down Expand Up @@ -449,95 +459,6 @@ impl YaccParser {
Ok(i)
}

// Handle parse_param declarations of the form:
// %parse_param <'a>(x: u32, y : (u32, u32))
fn parse_param(&mut self, mut i: usize) -> YaccResult<usize> {
i = self.parse_ws(i, false)?;
// First gobble up all of the '<' lifetime ',' ... '>
if let Some(mut j) = self.lookahead_is("<", i) {
let mut k = j;
let mut lifetimes = HashSet::new();
let mut add_lifetime = |j, k, c: char| {
let s = self.src[j..k].trim().to_string();
lifetimes.insert(s);
k + c.len_utf8()
};

while k < self.src.len() {
let c = self.src[k..].chars().next().unwrap();
match c {
'\n' | '\r' => return Err(self.mk_error(YaccParserErrorKind::ReachedEOL, k)),
',' => {
k = add_lifetime(j, k, ',');
j = k;
}
'>' => {
k = add_lifetime(j, k, '>');
break;
}
_ => k += c.len_utf8(),
}
}
self.ast.parse_param_lifetimes = Some(lifetimes);
i = k;
}

// Next, the '(' pattern : type, ... ')'
i = self.parse_ws(i, false)?;
if self.lookahead_is("(", i).is_some() {
let mut j = i;
let mut bindings: Vec<(String, String)> = Vec::new();
while j < self.src.len() && self.lookahead_is(")", j).is_none() {
let c = self.src[j..].chars().next().unwrap();
j += c.len_utf8();

// Some binding name, or pattern.
j = self.parse_ws(j, false)?;
let (k, binding) = self.parse_to_single_colon(j)?;
let (k, typ) = self.parse_param_rust_type(k + ':'.len_utf8())?;
j = k;
bindings.push((binding.trim_end().to_string(), typ));
}
if !bindings.is_empty() {
self.ast.parse_param_bindings = Some(bindings);
}
i = j;
}
let (i, _) = self.parse_to_eol(i)?;
self.parse_ws(i, true)
}

// Parse a rust type, followed by either a ',' character or an unbalanced ')'
// Return the char indice of the trailing character,
fn parse_param_rust_type(&mut self, i: usize) -> YaccResult<(usize, String)> {
let i = self.parse_ws(i, false)?;
let mut j = i;
let mut brace_count = 0;

while j < self.src.len() {
let c = self.src[j..].chars().next().unwrap();
match c {
'\n' | '\r' => return Err(self.mk_error(YaccParserErrorKind::ReachedEOL, j)),
')' | ',' if brace_count == 0 => {
return Ok((j, self.src[i..j].trim_end().to_string()));
}
'(' | '{' | '[' | '<' => {
brace_count += 1;
j += c.len_utf8();
}
')' | '}' | '>' | ']' => {
if brace_count == 0 {
return Err(self.mk_error(YaccParserErrorKind::MismatchedBrace, j));
}
brace_count -= 1;
j += c.len_utf8();
}
c => j += c.len_utf8(),
}
}
Err(self.mk_error(YaccParserErrorKind::PrematureEnd, j))
}

/// Parse up to (but do not include) the end of line (or, if it comes sooner, the end of file).
fn parse_to_eol(&mut self, i: usize) -> YaccResult<(usize, String)> {
let mut j = i;
Expand Down Expand Up @@ -751,8 +672,6 @@ impl YaccParser {

#[cfg(test)]
mod test {
use std::{collections::HashSet, iter::FromIterator};

use super::{
super::{
ast::{GrammarAST, Production, Symbol},
Expand Down Expand Up @@ -1844,28 +1763,15 @@ x"
#[test]
fn test_parse_param() {
let src = "
%parse_param <'a, 'b> (x: &'a (), (y, z) : (Result<((), ()), ((), ())>, ((u32, u32), &'b ())))
%parse-param a::b: (u64, u64)
%%
A: 'a';
";
let grm = parse(
YaccKind::Original(YaccOriginalActionKind::GenericParseTree),
&src,
)
.unwrap();
let grm = parse(YaccKind::Original(YaccOriginalActionKind::UserAction), &src).unwrap();

let expect_lifetimes = HashSet::from_iter([&"'a", &"'b"].iter().map(|s| s.to_string()));
let expect_bindings = [
("x", "&'a ()"),
(
"(y, z)",
"(Result<((), ()), ((), ())>, ((u32, u32), &'b ()))",
),
]
.iter()
.map(|(v, t)| (v.to_string(), t.to_string()))
.collect::<Vec<(String, String)>>();
assert_eq!(grm.parse_param_lifetimes, Some(expect_lifetimes));
assert_eq!(grm.parse_param_bindings, Some(expect_bindings));
assert_eq!(
grm.parse_param,
Some(("a::b".to_owned(), "(u64, u64)".to_owned()))
);
}
}
25 changes: 25 additions & 0 deletions doc/src/actioncode.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,28 @@ make use of the following:
to the lexer and return them from rules / store them in structs without
copying. `Lexer::span_str` returns such strings and the typical idiom of use
is `&'input str`.


## Additional parse parameter

A single extra parameter can be passed to action functions if the `%parse-param
<var>: <type>` declaration is used. The variable `<var>` is then visible in all
action code. Note that `<type>` must implement the [`Copy`
trait](https://doc.rust-lang.org/std/marker/trait.Copy.html). If you wish to
pass a reference it must currently be tied to the `'input` lifetime (i.e.
`%parse-param x: &'lifetime ...`).

For example if a grammar has a declaration:

```
%parse-param p: u64
```

then the statically generated `parse` function will take two paramaters
`(lexer: &..., p: u64)` and the variable `p` can be used in action code e.g.:

```
R -> ...:
'ID' { format!("{}{}", p, ...) }
;
```
4 changes: 4 additions & 0 deletions doc/src/yacccompatibility.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ There are several differences between Yacc and grmtools including:
* grmtools allows both Yacc's `%expect` and Bison's `%expect-rr` declarations
in its base "Yacc" mode.

* Bison's `%parse-param` can take multiple arguments. grmtools' `%parse-param`
takes a single argument which can be a tuple, thus emulating multiple
arguments while integrating naturally into Rust's type system.

* Although rare, it is possible to generate accept/reduce conflicts (e.g. for
a grammar with the sole rule `A: A;`). grmtools considers accept/reduce
conflicts to be a hard error, and refuses to generate anything for the
Expand Down
13 changes: 13 additions & 0 deletions lrpar/cttests/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ lrpar_mod!("lexer_lifetime.y");
lrlex_mod!("multitypes.l");
lrpar_mod!("multitypes.y");

lrlex_mod!("parseparam.l");
lrpar_mod!("parseparam.y");

lrlex_mod!("passthrough.l");
lrpar_mod!("passthrough.y");

Expand Down Expand Up @@ -230,6 +233,16 @@ fn test_span() {
}
}

#[test]
fn test_parseparam() {
let lexerdef = parseparam_l::lexerdef();
let lexer = lexerdef.lexer("101");
match parseparam_y::parse(&lexer, &3) {
(Some(i), _) if i == 104 => (),
_ => unreachable!(),
}
}

#[test]
fn test_passthrough() {
let lexerdef = passthrough_l::lexerdef();
Expand Down
13 changes: 13 additions & 0 deletions lrpar/cttests/src/parseparam.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: Test %parse-param
yacckind: Grmtools
grammar: |
%start S
%parse-param p: &'input u64
%%
S -> u64:
'INT' { *p + $lexer.span_str($1.unwrap().span()).parse::<u64>().unwrap() }
;
%%
lexer: |
%%
[0-9]+ 'INT'
26 changes: 17 additions & 9 deletions lrpar/src/lib/cpctplus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,20 @@ struct CPCTPlus<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Eq + Hash,
ActionT: 'a,
ParamT: Copy,
> {
parser: &'a Parser<'a, 'b, 'input, LexemeT, StorageT, ActionT>,
parser: &'a Parser<'a, 'b, 'input, LexemeT, StorageT, ActionT, ParamT>,
}

pub(super) fn recoverer<
'a,
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
ActionT: 'a,
ParamT: Copy,
>(
parser: &'a Parser<LexemeT, StorageT, ActionT>,
) -> Box<dyn Recoverer<LexemeT, StorageT, ActionT> + 'a>
parser: &'a Parser<LexemeT, StorageT, ActionT, ParamT>,
) -> Box<dyn Recoverer<LexemeT, StorageT, ActionT, ParamT> + 'a>
where
usize: AsPrimitive<StorageT>,
{
Expand All @@ -131,14 +133,16 @@ impl<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
ActionT: 'a,
> Recoverer<LexemeT, StorageT, ActionT> for CPCTPlus<'a, 'b, 'input, LexemeT, StorageT, ActionT>
ParamT: Copy,
> Recoverer<LexemeT, StorageT, ActionT, ParamT>
for CPCTPlus<'a, 'b, 'input, LexemeT, StorageT, ActionT, ParamT>
where
usize: AsPrimitive<StorageT>,
{
fn recover(
&self,
finish_by: Instant,
parser: &Parser<LexemeT, StorageT, ActionT>,
parser: &Parser<LexemeT, StorageT, ActionT, ParamT>,
in_laidx: usize,
mut in_pstack: &mut Vec<StIdx>,
mut astack: &mut Vec<AStackType<LexemeT, ActionT>>,
Expand Down Expand Up @@ -264,7 +268,8 @@ impl<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
ActionT: 'a,
> CPCTPlus<'a, 'b, 'input, LexemeT, StorageT, ActionT>
ParamT: Copy,
> CPCTPlus<'a, 'b, 'input, LexemeT, StorageT, ActionT, ParamT>
where
usize: AsPrimitive<StorageT>,
{
Expand Down Expand Up @@ -450,8 +455,9 @@ fn apply_repairs<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
ActionT: 'a,
ParamT: Copy,
>(
parser: &Parser<LexemeT, StorageT, ActionT>,
parser: &Parser<LexemeT, StorageT, ActionT, ParamT>,
mut laidx: usize,
mut pstack: &mut Vec<StIdx>,
mut astack: &mut Option<&mut Vec<AStackType<LexemeT, ActionT>>>,
Expand Down Expand Up @@ -496,8 +502,9 @@ fn simplify_repairs<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Hash + PrimInt + Unsigned,
ActionT,
ParamT: Copy,
>(
parser: &Parser<LexemeT, StorageT, ActionT>,
parser: &Parser<LexemeT, StorageT, ActionT, ParamT>,
all_rprs: &mut Vec<Vec<ParseRepair<LexemeT, StorageT>>>,
) where
usize: AsPrimitive<StorageT>,
Expand Down Expand Up @@ -555,8 +562,9 @@ fn rank_cnds<
LexemeT: Lexeme<StorageT>,
StorageT: 'static + Debug + Hash + PrimInt + Unsigned,
ActionT: 'a,
ParamT: Copy,
>(
parser: &Parser<LexemeT, StorageT, ActionT>,
parser: &Parser<LexemeT, StorageT, ActionT, ParamT>,
finish_by: Instant,
in_laidx: usize,
in_pstack: &[StIdx],
Expand Down
Loading

0 comments on commit 3298c50

Please sign in to comment.