ocaml · panglesd · Jul 5, 2024 · Jun 21, 2024 · Jun 21, 2024 · Jul 1, 2024
diff --git a/CHANGES.md b/CHANGES.md
@@ -25,6 +25,9 @@
 - `Odoc_html_frontend` does not use tyxml, for smaller javascript sizes.
   (@EmileTrotignon, #1072)
 - Overhaul of module-type-of expansions and shadowing code (@jonludlam, #1081)
+- Allow `][` in code blocks (@Julow, #1149)
+  This was interpreted as "code blocks with result", which now mandate a delimiter:
+  `{delim@lang[ code ]delim[ result ]}`
 
 ### Fixed
 

diff --git a/src/parser/lexer.mll b/src/parser/lexer.mll
@@ -412,7 +412,7 @@ and token input = parse
       emit ~start_offset input token }
 
   | "{["
-    { code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf }
+    { code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf }
 
   | (("{" (delim_char* as delim) "@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_)
     {
@@ -424,21 +424,31 @@ and token input = parse
         let empty_content = with_location_adjustments (fun _ -> Loc.at) input "" in
         emit ~start_offset input (`Code_block (Some (lang_tag, None), delim, empty_content, false))
       in
+      (* Disallow result block sections for code blocks without a delimiter.
+         This avoids the surprising parsing of '][' ending the code block. *)
+      let allow_result_block = delim <> "" in
+      let code_block_with_metadata metadata =
+        let content_offset = Lexing.lexeme_end lexbuf in
+        let metadata = Some (lang_tag, metadata) in
+        let prefix = Buffer.create 256 in
+        code_block allow_result_block start_offset content_offset metadata
+          prefix delim input lexbuf
+      in
       match code_block_metadata_tail input lexbuf with
-      | `Ok metadata -> code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, metadata)) (Buffer.create 256) delim input lexbuf
+      | `Ok metadata -> code_block_with_metadata metadata
       | `Eof ->
           warning input ~start_offset Parse_error.truncated_code_block_meta;
           emit_truncated_code_block ()
       | `Invalid_char c ->
           warning input ~start_offset
             (Parse_error.language_tag_invalid_char lang_tag_ c);
-          code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, None)) (Buffer.create 256) delim input lexbuf
+          code_block_with_metadata None
     }
 
   | "{@" horizontal_space* '['
     {
       warning input Parse_error.no_language_tag_in_meta;
-      code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf
+      code_block false (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf
     }
 
   | "{v"
@@ -737,20 +747,24 @@ and code_block_metadata_tail input = parse
   | eof
     { `Eof }
 
-and code_block start_offset content_offset metadata prefix delim input = parse
+and code_block allow_result_block start_offset content_offset metadata prefix delim input = parse
   | ("]" (delim_char* as delim') "[") as terminator
-    { if delim = delim'
+    { if delim = delim' && allow_result_block
       then emit_code_block ~start_offset content_offset input metadata delim terminator prefix true
-      else
-        (Buffer.add_string prefix terminator;
-        code_block start_offset content_offset metadata prefix delim input lexbuf) }
+      else (
+        Buffer.add_string prefix terminator;
+        code_block allow_result_block start_offset content_offset metadata
+          prefix delim input lexbuf
+      )
+    }
   | ("]" (delim_char* as delim') "}") as terminator
     { 
       if delim = delim'
       then emit_code_block ~start_offset content_offset input metadata delim terminator prefix false
       else (
         Buffer.add_string prefix terminator;
-        code_block start_offset content_offset metadata prefix delim input lexbuf
+        code_block allow_result_block start_offset content_offset metadata
+          prefix delim input lexbuf
       )
     }
   | eof
@@ -761,5 +775,6 @@ and code_block start_offset content_offset metadata prefix delim input = parse
   | (_ as c)
     {
       Buffer.add_char prefix c;
-      code_block start_offset content_offset metadata prefix delim input lexbuf
+      code_block allow_result_block start_offset content_offset metadata
+        prefix delim input lexbuf
     }
diff --git a/src/parser/test/test.ml b/src/parser/test/test.ml
@@ -2711,15 +2711,65 @@ let%expect_test _ =
          (warnings ())) |}]
 
     let code_block_with_output =
+      test "{delim@ocaml[foo]delim[output {b foo}]}";
+      [%expect
+        {|
+        ((output
+          (((f.ml (1 0) (1 39))
+            (code_block (((f.ml (1 7) (1 12)) ocaml) ()) ((f.ml (1 13) (1 16)) foo)
+             ((paragraph
+               (((f.ml (1 23) (1 29)) (word output)) ((f.ml (1 29) (1 30)) space)
+                ((f.ml (1 30) (1 37)) (bold (((f.ml (1 33) (1 36)) (word foo))))))))))))
+         (warnings ())) |}]
+
+    (* Code block contains ']['. *)
+    let code_block_with_output_without_delim =
+      test "{[foo][output {b foo}]}";
+      [%expect
+        {|
+        ((output
+          (((f.ml (1 0) (1 23))
+            (code_block ((f.ml (1 2) (1 21)) "foo][output {b foo}")))))
+         (warnings ())) |}]
+
+    (* Code block contains ']['. *)
+    let code_block_with_output_and_lang_without_delim =
       test "{@ocaml[foo][output {b foo}]}";
       [%expect
         {|
         ((output
           (((f.ml (1 0) (1 29))
-            (code_block (((f.ml (1 2) (1 7)) ocaml) ()) ((f.ml (1 8) (1 11)) foo)
-             ((paragraph
-               (((f.ml (1 13) (1 19)) (word output)) ((f.ml (1 19) (1 20)) space)
-                ((f.ml (1 20) (1 27)) (bold (((f.ml (1 23) (1 26)) (word foo))))))))))))
+            (code_block (((f.ml (1 2) (1 7)) ocaml) ())
+             ((f.ml (1 8) (1 27)) "foo][output {b foo}")))))
+         (warnings ())) |}]
+
+    let code_block_with_output_unexpected_delim =
+      test "{[foo]unexpected[output {b foo}]}";
+      [%expect
+        {|
+        ((output
+          (((f.ml (1 0) (1 33))
+            (code_block ((f.ml (1 2) (1 31)) "foo]unexpected[output {b foo}")))))
+         (warnings ())) |}]
+
+    let code_block_with_output_lang_unexpected_delim =
+      test "{@ocaml[foo]unexpected[output {b foo}]}";
+      [%expect
+        {|
+        ((output
+          (((f.ml (1 0) (1 39))
+            (code_block (((f.ml (1 2) (1 7)) ocaml) ())
+             ((f.ml (1 8) (1 37)) "foo]unexpected[output {b foo}")))))
+         (warnings ())) |}]
+
+    let code_block_with_output_wrong_delim =
+      test "{delim@ocaml[foo]wrong[output {b foo}]delim}";
+      [%expect
+        {|
+        ((output
+          (((f.ml (1 0) (1 44))
+            (code_block (((f.ml (1 7) (1 12)) ocaml) ())
+             ((f.ml (1 13) (1 37)) "foo]wrong[output {b foo}")))))
          (warnings ())) |}]
 
     let code_block_empty_meta =
@@ -2850,17 +2900,22 @@ let%expect_test _ =
 
     let code_block_with_output =
       test
-        {|{@ocaml[ let x = ][ {err@mdx-error[ here's the error ]} ]err}
-        ]}|};
-      [%expect
-        "\n\
-        \        ((output\n\
-        \          (((f.ml (1 0) (2 10))\n\
-        \            (code_block (((f.ml (1 2) (1 7)) ocaml) ())\n\
-        \             ((f.ml (1 8) (1 17)) \"let x = \")\n\
-        \             ((code_block (((f.ml (1 25) (1 34)) mdx-error) ())\n\
-        \               ((f.ml (1 35) (1 56)) \"here's the error ]} \")))))))\n\
-        \         (warnings ()))"]
+        {|{delim@ocaml[ let x = ]delim[ {err@mdx-error[ here's the error ]} ]err}
+        ]delim}|};
+      [%expect
+        "
+        ((output
+          (((f.ml (1 0) (2 15))
+            (code_block (((f.ml (1 7) (1 12)) ocaml) ())
+             ((f.ml (1 13) (1 22)) \"let x = \")
+             ((code_block (((f.ml (1 35) (1 44)) mdx-error) ())
+               ((f.ml (1 45) (1 66)) \"here's the error ]} \"))
+              (paragraph
+               (((f.ml (2 8) (2 9)) (word ])) ((f.ml (2 9) (2 14)) (word delim)))))))))
+         (warnings
+          ( \"File \\\"f.ml\\\", line 2, characters 8-9:\\
+           \\nUnpaired ']' (end of code).\\
+           \\nSuggestion: try '\\\\]'.\")))"]
 
     let delimited_code_block_with_output =
       test "{delim@ocaml[ foo ]delim[ ]}";