Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Optimize "ocamllex -ml" #1585

Merged
merged 11 commits into from
Jan 30, 2018
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ Working version
structures and signatures (e.g. "include struct … include(struct … end) … end")
(Florian Angeletti, review by Gabriel Scherer, report by Christophe Raffalli)

- GPR#1585: optimize output of "ocamllex -ml"
(Alain Frisch, review by Frédéric Bour and Gabriel Scherer)

### Manual and documentation:

- PR#7647, GPR#1384: emphasize ocaml.org website and forum in README
Expand Down
34 changes: 34 additions & 0 deletions experimental/frisch/Makefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include ../../config/Makefile

ROOT=../..
OCAMLC=$(ROOT)/boot/ocamlrun $(ROOT)/ocamlc -I $(ROOT)/stdlib -I $(ROOT)/parsing -I $(ROOT)/utils -I $(ROOT)/tools -I $(ROOT)/typing -I $(ROOT)/driver -I $(ROOT)/toplevel -w A-4-9-42
COMMON=$(ROOT)/compilerlibs/ocamlcommon.cma
Expand Down Expand Up @@ -77,3 +79,35 @@ nomli:
matches:
$(OCAMLC) -linkall -o ppx_matches.exe $(COMMON) ppx_matches.ml
$(OCAMLC) -c -dsource -ppx ./ppx_matches.exe test_matches.ml


## Benchmark ocamllex

.PHONY: bench_ocamllex bench_ocamllex2
ARGS=-o bench.exe -nostdlib \
-I $(ROOT)/otherlibs/$(UNIXLIB) -I $(ROOT)/stdlib -I $(ROOT)/parsing -I $(ROOT)/utils \
$(ROOT)/compilerlibs/ocamlcommon.cma unix.cma \
my_lexer.ml bench.ml
bench_ocamllex:
@cp $(ROOT)/parsing/lexer.mll my_lexer.mll
cp my_lexer2.mll my_lexer.mll
@(cd $(ROOT)/lex && make ocamllex)
@$(ROOT)/boot/ocamlrun $(ROOT)/lex/ocamllex -ml my_lexer.mll
@echo WITH -ml flag:
@make -s bench_ocamllex2
@$(ROOT)/boot/ocamlrun $(ROOT)/lex/ocamllex -q my_lexer.mll
@echo WITHOUT -ml flag:
@make -s bench_ocamllex2

bench_ocamllex2:
@echo -n " NATIVE, -inline 1000: "
@$(ROOT)/boot/ocamlrun $(ROOT)/ocamlopt -inline 1000 $(ARGS:.cma=.cmxa)
@./bench.exe

@echo -n " NATIVE, -inline 10 : "
@$(ROOT)/boot/ocamlrun $(ROOT)/ocamlopt -inline 10 $(ARGS:.cma=.cmxa)
@./bench.exe

@echo -n " BYTECODE : "
@$(ROOT)/boot/ocamlrun $(ROOT)/ocamlc -custom $(ARGS)
@./bench.exe
39 changes: 39 additions & 0 deletions experimental/frisch/bench.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
let lexing s =
let lexbuf = Lexing.from_string s in
lexbuf.lex_curr_p <- Lexing.dummy_pos;
let rec loop () =
match My_lexer.token lexbuf with
| Parser.EOF -> ()
| token -> loop ()
in
loop ()

let s =
let ic = open_in "../../typing/typecore.ml" in
let b = Buffer.create 16 in
begin
try while true do
Buffer.add_string b (input_line ic);
Buffer.add_char b '\n'
done
with End_of_file -> ()
end;
close_in ic;
Buffer.contents b

let () =
let alloc0 = Gc.allocated_bytes () in
let t0 = Unix.gettimeofday () in
let n = 100 in
for _ = 1 to n do
lexing s
done;
let time = Unix.gettimeofday () -. t0 in
let alloc = Gc.allocated_bytes () -. alloc0 in

let len = float (String.length s) *. float n in
let mb = len /. 1024. /. 1024. in
Printf.printf " % 8.02f Mb/s % 8.02f ms/Mb alloc x % 8.02f \n%!"
(mb /. time)
(time *. 1000. /. mb)
(alloc /. len)
110 changes: 110 additions & 0 deletions experimental/frisch/bench_ocamllex_optims.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
Some benchmark to evaluate the speedup to `ocamllex -ml`.

In all tests, we tokenize the `typecore.ml` file (first loaded in
memory) file using either:

- the OCaml lexer, or

- a simpler lexer with trivial actions (to eliminate the cost of
actions themselves, which is not under the control of ocamllex).

We run the output of:

- `ocamllex` without the -ml flag, i.e. using tables interpreted at
runtime by the C support code

- `ocamllex -ml`, i.e. the automaton is translated to OCaml code;
this is done on before and after the optimizations;

In adition, since it turned out that the automatic update of lex_start_p by
the generated code is quite costly, there is now logic so that
the generated code does not update this field and lex_curr_p when
lex_start_p is initially physically equal to Lexing.dummy_pos. This is also
tested (only for the simpler lexer, since the OCaml one update the field in
its actions).

For each case, we compile the benchmark with:

- `ocamlc`

- `ocamlopt -inline 10`

- `ocamlopt -inline 1000`

(flambda disabled).

The tables below show:

- the throughput (Mb of source code tokenized
by second -- higher is better;

- its inverse (number of milleseconds to parse one Mb) -- lower is better;

- the allocation ratio (number of bytes allocated by the GC for each byte of source code)


Conclusions:

- In native code, the "-ml" mode is slightly slower than the table
mode before the optimizations, but it becomes significantly faster
after the optimizations, obviously even more so when the
lexer actions are trivial (throughput 58.44 -> 98.30).

- In bytecode, the "-ml" mode is always much slower than the table
mode, but the optimization reduce the gap is little bit.

- Not tested here, but it is likely that the optimizations produce
code which would be more friendly to Javascript backends
(js_of_ocaml and Bucklescript), as they reduce quite a bit
the number of function calls and mutations.

Note:

- The "refill handler" mode has been lightly tested only.


OCaml lexer:

````
WITHOUT -ml flag:
NATIVE, -inline 1000: 38.07 Mb/s 26.27 ms/Mb alloc x 36.79
NATIVE, -inline 10 : 35.42 Mb/s 28.23 ms/Mb alloc x 36.79
BYTECODE : 7.84 Mb/s 127.54 ms/Mb alloc x 35.48


WITH -ml flag, TRUNK:
NATIVE, -inline 1000: 34.36 Mb/s 29.11 ms/Mb alloc x 36.79
NATIVE, -inline 10 : 34.12 Mb/s 29.31 ms/Mb alloc x 36.79
BYTECODE : 4.08 Mb/s 244.93 ms/Mb alloc x 35.48


WITH -ml flag, BRANCH:
NATIVE, -inline 1000: 45.56 Mb/s 21.95 ms/Mb alloc x 36.79
NATIVE, -inline 10 : 43.19 Mb/s 23.15 ms/Mb alloc x 36.79
BYTECODE : 4.35 Mb/s 229.91 ms/Mb alloc x 35.48
````


Simpler lexer (trivial actions):

````
WITHOUT -ml flag:
NATIVE, -inline 1000: 58.44 Mb/s 17.11 ms/Mb alloc x 21.94
NATIVE, -inline 10 : 58.24 Mb/s 17.17 ms/Mb alloc x 21.94
BYTECODE : 12.63 Mb/s 79.21 ms/Mb alloc x 21.93

WITH -ml flag, TRUNK:
NATIVE, -inline 1000: 55.14 Mb/s 18.13 ms/Mb alloc x 21.94
NATIVE, -inline 10 : 50.76 Mb/s 19.70 ms/Mb alloc x 21.94
BYTECODE : 5.74 Mb/s 174.22 ms/Mb alloc x 21.93

WITH -ml flag, BRANCH:
NATIVE, -inline 1000: 98.30 Mb/s 10.17 ms/Mb alloc x 21.94
NATIVE, -inline 10 : 87.16 Mb/s 11.47 ms/Mb alloc x 21.94
BYTECODE : 6.48 Mb/s 154.43 ms/Mb alloc x 21.93

WITH -ml flag, BRANCH, dummy_pos:
NATIVE, -inline 1000: 152.68 Mb/s 6.55 ms/Mb alloc x 1.00
NATIVE, -inline 10 : 133.97 Mb/s 7.46 ms/Mb alloc x 1.00
BYTECODE : 7.42 Mb/s 134.81 ms/Mb alloc x 1.00
````
89 changes: 89 additions & 0 deletions experimental/frisch/my_lexer2.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
{
open Parser
}
let newline = ('\013'* '\010')
let blank = [' ' '\009' '\012']
let lowercase = ['a'-'z' '_']
let uppercase = ['A'-'Z']
let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']
let lowercase_latin1 = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
let uppercase_latin1 = ['A'-'Z' '\192'-'\214' '\216'-'\222']
let identchar_latin1 =
['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
let symbolchar =
['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
let dotsymbolchar =
['!' '$' '%' '&' '*' '+' '-' '/' ':' '=' '>' '?' '@' '^' '|' '~']
let decimal_literal =
['0'-'9'] ['0'-'9' '_']*
let hex_digit =
['0'-'9' 'A'-'F' 'a'-'f']
let hex_literal =
'0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
let oct_literal =
'0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
let bin_literal =
'0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
let int_literal =
decimal_literal | hex_literal | oct_literal | bin_literal
let float_literal =
['0'-'9'] ['0'-'9' '_']*
('.' ['0'-'9' '_']* )?
(['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']* )?
let hex_float_literal =
'0' ['x' 'X']
['0'-'9' 'A'-'F' 'a'-'f'] ['0'-'9' 'A'-'F' 'a'-'f' '_']*
('.' ['0'-'9' 'A'-'F' 'a'-'f' '_']* )?
(['p' 'P'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']* )?
let literal_modifier = ['G'-'Z' 'g'-'z']

rule token = parse
| eof { EOF }
| lowercase identchar * { TILDE }
| "&" { AMPERSAND }
| "&&" { AMPERAMPER }
| "`" { BACKQUOTE }
| "\'" { QUOTE }
| "(" { LPAREN }
| ")" { RPAREN }
| "*" { STAR }
| "," { COMMA }
| "->" { MINUSGREATER }
| "." { DOT }
| ".." { DOTDOT }
| ":" { COLON }
| "::" { COLONCOLON }
| ":=" { COLONEQUAL }
| ":>" { COLONGREATER }
| ";" { SEMI }
| ";;" { SEMISEMI }
| "<" { LESS }
| "<-" { LESSMINUS }
| "=" { EQUAL }
| "[" { LBRACKET }
| "[|" { LBRACKETBAR }
| "[<" { LBRACKETLESS }
| "[>" { LBRACKETGREATER }
| "]" { RBRACKET }
| "{" { LBRACE }
| "{<" { LBRACELESS }
| "|" { BAR }
| "||" { BARBAR }
| "|]" { BARRBRACKET }
| ">" { GREATER }
| ">]" { GREATERRBRACKET }
| "}" { RBRACE }
| ">}" { GREATERRBRACE }
| "[@" { LBRACKETAT }
| "[@@" { LBRACKETATAT }
| "[@@@" { LBRACKETATATAT }
| "[%" { LBRACKETPERCENT }
| "[%%" { LBRACKETPERCENTPERCENT }
| "!" { BANG }
| "!=" { INFIXOP0 "!=" }
| "+" { PLUS }
| "+." { PLUSDOT }
| "+=" { PLUSEQ }
| "-" { MINUS }
| "-." { MINUSDOT }
| _ { EOL }
Loading