Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: Don't accept code block results without delim #1149

Merged
merged 3 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
- `Odoc_html_frontend` does not use tyxml, for smaller javascript sizes.
(@EmileTrotignon, #1072)
- Overhaul of module-type-of expansions and shadowing code (@jonludlam, #1081)
- Allow `][` in code blocks (@Julow, #1149)
This was interpreted as "code blocks with result", which now mandate a delimiter:
`{delim@lang[ code ]delim[ result ]}`

### Fixed

Expand Down
37 changes: 26 additions & 11 deletions src/parser/lexer.mll
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ and token input = parse
emit ~start_offset input token }

| "{["
{ code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf }
{ code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf }

| (("{" (delim_char* as delim) "@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_)
{
Expand All @@ -424,21 +424,31 @@ and token input = parse
let empty_content = with_location_adjustments (fun _ -> Loc.at) input "" in
emit ~start_offset input (`Code_block (Some (lang_tag, None), delim, empty_content, false))
in
(* Disallow result block sections for code blocks without a delimiter.
This avoids the surprising parsing of '][' ending the code block. *)
let allow_result_block = delim <> "" in
let code_block_with_metadata metadata =
let content_offset = Lexing.lexeme_end lexbuf in
let metadata = Some (lang_tag, metadata) in
let prefix = Buffer.create 256 in
code_block allow_result_block start_offset content_offset metadata
prefix delim input lexbuf
in
match code_block_metadata_tail input lexbuf with
| `Ok metadata -> code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, metadata)) (Buffer.create 256) delim input lexbuf
| `Ok metadata -> code_block_with_metadata metadata
| `Eof ->
warning input ~start_offset Parse_error.truncated_code_block_meta;
emit_truncated_code_block ()
| `Invalid_char c ->
warning input ~start_offset
(Parse_error.language_tag_invalid_char lang_tag_ c);
code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, None)) (Buffer.create 256) delim input lexbuf
code_block_with_metadata None
}

| "{@" horizontal_space* '['
{
warning input Parse_error.no_language_tag_in_meta;
code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf
code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf
}

| "{v"
Expand Down Expand Up @@ -737,20 +747,24 @@ and code_block_metadata_tail input = parse
| eof
{ `Eof }

and code_block start_offset content_offset metadata prefix delim input = parse
and code_block allow_result_block start_offset content_offset metadata prefix delim input = parse
| ("]" (delim_char* as delim') "[") as terminator
{ if delim = delim'
{ if delim = delim' && allow_result_block
then emit_code_block ~start_offset content_offset input metadata delim terminator prefix true
else
(Buffer.add_string prefix terminator;
code_block start_offset content_offset metadata prefix delim input lexbuf) }
else (
Buffer.add_string prefix terminator;
code_block allow_result_block start_offset content_offset metadata
prefix delim input lexbuf
)
}
| ("]" (delim_char* as delim') "}") as terminator
{
if delim = delim'
then emit_code_block ~start_offset content_offset input metadata delim terminator prefix false
else (
Buffer.add_string prefix terminator;
code_block start_offset content_offset metadata prefix delim input lexbuf
code_block allow_result_block start_offset content_offset metadata
prefix delim input lexbuf
)
}
| eof
Expand All @@ -761,5 +775,6 @@ and code_block start_offset content_offset metadata prefix delim input = parse
| (_ as c)
{
Buffer.add_char prefix c;
code_block start_offset content_offset metadata prefix delim input lexbuf
code_block allow_result_block start_offset content_offset metadata
prefix delim input lexbuf
}
85 changes: 70 additions & 15 deletions src/parser/test/test.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2711,15 +2711,65 @@ let%expect_test _ =
(warnings ())) |}]

let code_block_with_output =
test "{delim@ocaml[foo]delim[output {b foo}]}";
[%expect
{|
((output
(((f.ml (1 0) (1 39))
(code_block (((f.ml (1 7) (1 12)) ocaml) ()) ((f.ml (1 13) (1 16)) foo)
((paragraph
(((f.ml (1 23) (1 29)) (word output)) ((f.ml (1 29) (1 30)) space)
((f.ml (1 30) (1 37)) (bold (((f.ml (1 33) (1 36)) (word foo))))))))))))
(warnings ())) |}]

(* Code block contains ']['. *)
let code_block_with_output_without_delim =
test "{[foo][output {b foo}]}";
[%expect
{|
((output
(((f.ml (1 0) (1 23))
(code_block ((f.ml (1 2) (1 21)) "foo][output {b foo}")))))
(warnings ())) |}]

(* Code block contains ']['. *)
let code_block_with_output_and_lang_without_delim =
test "{@ocaml[foo][output {b foo}]}";
[%expect
{|
((output
(((f.ml (1 0) (1 29))
(code_block (((f.ml (1 2) (1 7)) ocaml) ()) ((f.ml (1 8) (1 11)) foo)
((paragraph
(((f.ml (1 13) (1 19)) (word output)) ((f.ml (1 19) (1 20)) space)
((f.ml (1 20) (1 27)) (bold (((f.ml (1 23) (1 26)) (word foo))))))))))))
(code_block (((f.ml (1 2) (1 7)) ocaml) ())
((f.ml (1 8) (1 27)) "foo][output {b foo}")))))
(warnings ())) |}]

let code_block_with_output_unexpected_delim =
test "{[foo]unexpected[output {b foo}]}";
[%expect
{|
((output
(((f.ml (1 0) (1 33))
(code_block ((f.ml (1 2) (1 31)) "foo]unexpected[output {b foo}")))))
(warnings ())) |}]

let code_block_with_output_lang_unexpected_delim =
test "{@ocaml[foo]unexpected[output {b foo}]}";
[%expect
{|
((output
(((f.ml (1 0) (1 39))
(code_block (((f.ml (1 2) (1 7)) ocaml) ())
((f.ml (1 8) (1 37)) "foo]unexpected[output {b foo}")))))
(warnings ())) |}]

let code_block_with_output_wrong_delim =
test "{delim@ocaml[foo]wrong[output {b foo}]delim}";
[%expect
{|
((output
(((f.ml (1 0) (1 44))
(code_block (((f.ml (1 7) (1 12)) ocaml) ())
((f.ml (1 13) (1 37)) "foo]wrong[output {b foo}")))))
(warnings ())) |}]

let code_block_empty_meta =
Expand Down Expand Up @@ -2850,17 +2900,22 @@ let%expect_test _ =

let code_block_with_output =
test
{|{@ocaml[ let x = ][ {err@mdx-error[ here's the error ]} ]err}
]}|};
[%expect
"\n\
\ ((output\n\
\ (((f.ml (1 0) (2 10))\n\
\ (code_block (((f.ml (1 2) (1 7)) ocaml) ())\n\
\ ((f.ml (1 8) (1 17)) \"let x = \")\n\
\ ((code_block (((f.ml (1 25) (1 34)) mdx-error) ())\n\
\ ((f.ml (1 35) (1 56)) \"here's the error ]} \")))))))\n\
\ (warnings ()))"]
{|{delim@ocaml[ let x = ]delim[ {err@mdx-error[ here's the error ]} ]err}
]delim}|};
[%expect
"
((output
(((f.ml (1 0) (2 15))
(code_block (((f.ml (1 7) (1 12)) ocaml) ())
((f.ml (1 13) (1 22)) \"let x = \")
((code_block (((f.ml (1 35) (1 44)) mdx-error) ())
((f.ml (1 45) (1 66)) \"here's the error ]} \"))
(paragraph
(((f.ml (2 8) (2 9)) (word ])) ((f.ml (2 9) (2 14)) (word delim)))))))))
(warnings
( \"File \\\"f.ml\\\", line 2, characters 8-9:\\
\\nUnpaired ']' (end of code).\\
\\nSuggestion: try '\\\\]'.\")))"]

let delimited_code_block_with_output =
test "{delim@ocaml[ foo ]delim[ ]}";
Expand Down
Loading