Skip to content

Commit

Permalink
Modified recursive descent parser to return a derivation, and added a…
Browse files Browse the repository at this point in the history
… pretty-printer for such derivations
  • Loading branch information
chris committed May 28, 2010
1 parent 51430d3 commit 3046db3
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 64 deletions.
6 changes: 5 additions & 1 deletion abnf_cmd.ml
Expand Up @@ -80,7 +80,11 @@ let _ =
) tsorted_nodes;
printf "</body></html>"
|RDParse ->
Abnf_recursive_descent.parse_file_with_grammar !file_to_parse all_rules !starting_nonterminal
Abnf_ops.Text.prettyprint_derivation 0 (Abnf_recursive_descent.parse_file_with_grammar
!file_to_parse
all_rules !
starting_nonterminal
)
end;
eprintf "Done parsing file: %s\n" file;
()
Expand Down
16 changes: 16 additions & 0 deletions abnf_ops.ml
Expand Up @@ -91,6 +91,22 @@ module Text = struct
in register_terminal rule
) rds;
Hashtbl.fold (fun k _ a -> (string_of_terminal k) :: a) terms []

let rec prettyprint_derivation offset = function
| D_terminal (term, str) ->
printf "%s%s: %S\n" (String.make offset ' ') (string_of_terminal term) str
| D_string (str) ->
printf "%s\"%s\"\n" (String.make offset ' ') str
| D_hex_range (low, high, str) ->
printf "%s%x-%x: %S\n" (String.make offset ' ') low high str
| D_concat (d1, d2) ->
prettyprint_derivation offset d1;
prettyprint_derivation offset d2
| D_reference (nt_name, d) ->
printf "%s%s->\n" (String.make offset ' ') nt_name;
prettyprint_derivation (offset + 2) d
| D_repetition (ds) ->
List.iter (prettyprint_derivation offset) ds

end

Expand Down
125 changes: 62 additions & 63 deletions abnf_recursive_descent.ml
Expand Up @@ -50,33 +50,28 @@ let parse_file_with_grammar infile grammar starting_nonterminal = (
in let find_named_rule name =
(Hashtbl.find grammar name)

in let rec rule_nullable = (function
| S_terminal term -> (match term with
| LWSP -> true
| _ -> false)
| S_string str -> false
| S_concat (rl1, rl2) -> (rule_nullable rl1) && (rule_nullable rl2)
| S_alt (rl1, rl2) -> (rule_nullable rl1) || (rule_nullable rl2)
| S_seq (rl1, rl2) -> (rule_nullable rl1) && (rule_nullable rl2)
| S_repetition(min, max, rl) -> (match min with
| None | Some 0 -> true
| Some n -> false)
| S_reference r -> rule_nullable (find_named_rule r)
| S_any_except (r1, r2) -> rule_nullable r1
| S_hex_range (f, t) -> false
)

in let rec parse_terminal term = (match term with
| LWSP -> while (let next_byte = rd_input_byte fin in
(next_byte = 9 || next_byte = 10 || next_byte = 13))
do () done
| CRLF -> (parse_terminal CR; parse_terminal LF)
| LWSP -> let outstr = Buffer.create(1) in
while
(
let next_byte = rd_input_byte fin in
if (next_byte = 9 || next_byte = 10 || next_byte = 13) then
(Buffer.add_char outstr (chr next_byte); true)
else false
)
do () done;
D_terminal(LWSP, Buffer.contents outstr)
| CRLF -> (let _ = parse_terminal CR in
let _ = parse_terminal LF in
D_terminal (CRLF, "\r\n"))
| _ ->
(let next_byte = rd_input_byte fin in
let possible_bytes = possible_chars_of_terminal term in
if exists (fun x -> x = next_byte) possible_bytes then
(eprintf "Consumed %C\n" (chr next_byte);
())
(
eprintf "Consumed %C\n" (chr next_byte);
D_terminal (term, String.make 1 (chr next_byte))
)
else raise (RecursiveDescentParseFailure
(sprintf "Terminal \"%s\" cannot accept %C"
(Text.string_of_terminal term)
Expand All @@ -93,16 +88,46 @@ let parse_file_with_grammar infile grammar starting_nonterminal = (
done
)

in let rec parse_rule rule =
(let restart_pos = pos_in fin in
in let rec parse_repetition min max rl accum =
(
eprintf "Matching %s\n" (Text.string_of_rule (S_repetition (min, max, rl)));
if max = Some 0 then (rev accum) else
let may_skip = match min with | None -> true | Some 0 -> true | _ -> false in
let new_min = match min with | None -> None | Some 0 -> Some 0 | Some n -> Some (n - 1) in
let new_max = match max with | None -> None | Some 0 -> Some 0 | Some n -> Some (n - 1) in
let new_derivation = ref None in
let restart_pos = pos_in fin in
(eprintf "Trying matching against %s\n" (Text.string_of_rule rl);
(try (new_derivation := Some (parse_rule rl)) with RecursiveDescentParseFailure str ->
if may_skip then
(eprintf "Matching %s failed, but skipping permitted\n" (Text.string_of_rule rl);
seek_in fin restart_pos;
)
else raise (RecursiveDescentParseFailure
(sprintf "Failure parsing \"%s\"'s inner expression: %s"
(Text.string_of_rule (S_repetition (min, max, rl)))
str
)
)
);
match !new_derivation with
|None -> rev accum
|Some deriv -> parse_repetition new_min new_max rl (deriv::accum)
)
)

and parse_rule rule =
(let restart_pos = pos_in fin in
try
(match rule with
| S_terminal term -> eprintf "Matching terminal %s\n" (Text.string_of_terminal term);
parse_terminal term
| S_string str -> eprintf "Consuming string %s\n" str; consume_string str
| S_string str -> eprintf "Consuming string %s\n" str; consume_string str; D_string(str)
| S_concat (rl1, rl2) -> eprintf "Matching %s then %s\n"
(Text.string_of_rule rl1) (Text.string_of_rule rl2);
parse_rule rl1; parse_rule rl2
let rl1_d = parse_rule rl1 in
let rl2_d = parse_rule rl2 in
D_concat(rl1_d, rl2_d)
| S_alt (rl1, rl2) ->
(eprintf "Choice: match %s or %s\nTrying %s\n"
(Text.string_of_rule rl1) (Text.string_of_rule rl2) (Text.string_of_rule rl1);
Expand All @@ -118,52 +143,25 @@ let parse_file_with_grammar infile grammar starting_nonterminal = (
)
)
)
| S_seq (rl1, rl2) -> eprintf "Matching %s then %s\n"
(Text.string_of_rule rl1) (Text.string_of_rule rl2);
parse_rule rl1; parse_rule rl2
| S_repetition (min, max, rl) -> (
eprintf "Matching %s\n" (Text.string_of_rule (S_repetition (min, max, rl)));
if max = Some 0 then () else
let may_skip = match min with | None -> true | Some 0 -> true | _ -> false in
let new_min = match min with | None -> None | Some 0 -> Some 0 | Some n -> Some (n - 1) in
let new_max = match max with | None -> None | Some 0 -> Some 0 | Some n -> Some (n - 1) in
let should_continue = ref true in
(eprintf "Trying matching against %s\n" (Text.string_of_rule rl);
(try parse_rule rl with RecursiveDescentParseFailure str ->
if may_skip then
(eprintf "Matching %s failed, but skipping permitted\n" (Text.string_of_rule rl);
seek_in fin restart_pos;
should_continue := false;
())
else raise (RecursiveDescentParseFailure
(sprintf "Failure parsing \"%s\"'s inner expression: %s"
(Text.string_of_rule rule)
str
)
)
);
if !should_continue then
parse_rule (S_repetition (new_min, new_max, rl))
else
()
)
)
| S_seq (rl1, rl2) -> parse_rule (S_concat (rl1, rl2))
| S_repetition (min, max, rl) ->
D_repetition (parse_repetition min max rl [])
| S_reference r -> eprintf "Expanding %s to %s\n"
r (Text.string_of_rule (find_named_rule r));
parse_rule (find_named_rule r)
D_reference (r, parse_rule (find_named_rule r))
| S_any_except (r1, r2) ->
eprintf "Matching against %s but not %s\n"
(Text.string_of_rule r1)
(Text.string_of_rule r2);
parse_rule r1;
let result_derivation = parse_rule r1 in
let after_success_pos = pos_in fin in
let should_raise = ref true in
seek_in fin restart_pos;
eprintf "Successfully matched %s; checking can't match %s\n"
(Text.string_of_rule r1)
(Text.string_of_rule r2);
(try
parse_rule r2
(try
let _ = parse_rule r2 in ()
with RecursiveDescentParseFailure str ->
eprintf "Good: failed to match %s with error \"%s\"\n" (Text.string_of_rule r2) str;
(* Put the file pointer where it was after the r1 matching success *)
Expand All @@ -177,10 +175,11 @@ let parse_file_with_grammar infile grammar starting_nonterminal = (
)
)
else
()
result_derivation
| S_hex_range (f, t) -> eprintf "Consuming byte between %x and %x\n" f t;
let next_byte = rd_input_byte fin in
if next_byte >= f && next_byte <= t then ()
if next_byte >= f && next_byte <= t then
D_hex_range(f, t, String.make 1 (chr next_byte))
else raise (RecursiveDescentParseFailure
(sprintf "Rule \"%s\" not satisfied by byte %C"
(Text.string_of_rule (S_hex_range (f, t)))
Expand All @@ -193,7 +192,7 @@ let parse_file_with_grammar infile grammar starting_nonterminal = (
raise (RecursiveDescentParseFailure str) (* reraise! *)
)

in parse_rule (find_named_rule starting_nonterminal)
in parse_rule (S_reference starting_nonterminal)

)

9 changes: 9 additions & 0 deletions abnf_syntaxtree.ml
Expand Up @@ -34,6 +34,15 @@ type rule =
| S_hex_range of int * int
| S_any_except of rule * rule (* any rule except rule *)

type derivation =
| D_terminal of terminal * string
| D_hex_range of int * int * string
| D_string of string
| D_concat of derivation * derivation
| D_reference of string * derivation
| D_repetition of derivation list
(* No any_except or alt, of which all branches are not represented, or seq/concat differentiation *)

(* Each line in an ABNF file is defined here *)
type rule_definition = {
s_name: string;
Expand Down

0 comments on commit 3046db3

Please sign in to comment.