Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor handling of comments #2371

Merged
merged 63 commits into from Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
7df9213
WIP: Factorize interpretation of comments
Julow Apr 27, 2023
9caf607
Fix lost ending space
Julow May 26, 2023
207f471
Promote
Julow May 26, 2023
86ff2e4
WIP: Rewrite parsing of normal and asterisk prefixed comments
Julow May 26, 2023
f5cce1a
Test 'error4' requires one more iteration
Julow May 30, 2023
032f6c3
Fix added newline in cinaps comments
Julow May 30, 2023
124a89b
Strip trailing spaces of cinaps comments
Julow May 31, 2023
2d0930d
Promote
Julow May 31, 2023
b207454
Strip heading/trailing empty lines
Julow May 31, 2023
e8875b5
Small cleanup in docstring fmt function
Julow May 31, 2023
c42280a
Fix incorrect unindenting of cinaps comments
Julow May 31, 2023
4e4ef13
Normalize comments in code in comments
Julow May 30, 2023
7e9e6ab
Revert "Normalize comments in code in comments"
Julow May 31, 2023
6061527
Fix unindenting when first line is empty
Julow Jun 1, 2023
520a35c
Don't unindent doc comments
Julow Jun 1, 2023
2095965
Revert "Test 'error4' requires one more iteration"
Julow Jun 1, 2023
606447a
Trim trailing empty lines and whitespaces
Julow Jun 1, 2023
4572014
Preserve empty trailing lines in doc comments
Julow Jun 2, 2023
3468395
Restore formatting of cinaps comments
Julow Jun 2, 2023
aa80541
Preserve pro/epi break on comments as doc
Julow Jun 2, 2023
364fc75
Fix regressions on unwrapped comments
Julow Jun 5, 2023
e009001
Fix parsing of asterisk prefixed comments
Julow Jun 5, 2023
7bff959
Restore break before preceeding multi-line comments
Julow Jun 5, 2023
f8719ba
Preserve leading/trailing newlines in unwrapped comments
Julow Jun 5, 2023
69f2a8e
Tests: Remove no longer necessary `--max-iter`
Julow Jun 6, 2023
5a56bf1
Tests: Remove empty .err files
Julow Jun 6, 2023
37bf3f0
Fix parsing and printing of header-like comments
Julow Jun 6, 2023
8bf52f2
Update changes
Julow Jun 6, 2023
cf878da
Fix parsing of asterisk prefixed comments too open
Julow Jun 6, 2023
00a77f3
Merge branch 'main' into cmts-factor
Julow Jun 6, 2023
6e0ae10
Cleanup Cmt
gpetiot Jun 7, 2023
9a5bb99
Don't check the margin to group comments
gpetiot Jun 7, 2023
fe93ffd
Merge pull request #1 from gpetiot/cmts-factor-dont-check-margin
Julow Jun 7, 2023
58e8d0b
Even less open parsing of asterisk prefixed
Julow Jun 7, 2023
f4f64ca
Change the baseline indentation for unwrapped comments
Julow Jun 7, 2023
017935d
Fix interference between f4f64ca5 and 58e8d0b3
Julow Jun 7, 2023
1ee288d
Don't unindent unwrapped comments
Julow Jun 7, 2023
09a9638
Fix last line of asterisk prefixed
Julow Jun 7, 2023
b92c62f
Make Cmt.t abstract
Julow Jun 7, 2023
25aa14c
Don't mix comments and docstrings
Julow Jun 7, 2023
cd00fe6
Make Cmt.t abstract
Julow Jun 7, 2023
ef208e9
Don't mix comments and docstrings
Julow Jun 7, 2023
2ef7270
Merge branch 'cmts-docstrings' into cmts-factor
Julow Jun 8, 2023
7633f3e
Merge branch 'main' into cmts-factor
Julow Jun 8, 2023
c37b6ad
Normalize comments inside comments
Julow Jun 9, 2023
8a4e1cf
Add #2372 to changelog
Julow Jun 12, 2023
e000e31
Merge branch 'main' into cmts-factor
Julow Jun 12, 2023
e273379
Merge branch 'main' into cmts-factor
Julow Jun 22, 2023
ca35745
Move break out of `fmt_cmt`
Julow Jun 23, 2023
27a0074
Revert change to test break_separators.ml
Julow Jun 23, 2023
fcf2b80
Remove unecessary test 'asterisk_prefixed_cmts'
Julow Sep 19, 2023
0c95f7e
Merge branch 'MAIN' into cmts-factor
Julow Sep 19, 2023
418f8c3
Merge branch 'MAIN' into cmts-factor
Julow Oct 26, 2023
8f3d799
Add testcase for https://github.com/ocaml-ppx/ocamlformat/issues/2468
Julow Oct 26, 2023
971df85
Add test variant for unwrapped comments
Julow Nov 2, 2023
bce8df8
Preserve trailing empty line of unwrapped comments
Julow Nov 2, 2023
20f4fb8
Merge branch 'MAIN' into cmts-factor
Julow Nov 6, 2023
c86b00c
test: Add non-stabilizing comment
Julow Nov 6, 2023
907d70a
Fix non-stabilizing comment
Julow Nov 6, 2023
a400535
Fix regression (break introduced before the end of a paragraph)
gpetiot Nov 7, 2023
a0c9aae
Don't trim first line of asterisk prefixed comment
Julow Nov 7, 2023
5f5532e
Add test case from #2469
Julow Nov 7, 2023
86e12c3
test: Fix spacing in cinaps.ml
Julow Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion lib-rpc-server/ocamlformat_rpc.ml
Expand Up @@ -82,7 +82,8 @@ let run_format conf x =
(* The formatting functions are ordered in such a way that the ones
expecting a keyword first (like signatures) are placed before the more
general ones (like toplevel phrases). Parsing a file as `--impl` with
`ocamlformat` processes it as a use file (toplevel phrases) anyway.
`ocamlformat` processes it as a use file (toplevel phrases)
anyway.
gpetiot marked this conversation as resolved.
Show resolved Hide resolved

`ocaml-lsp` should use core types, module types and signatures.
`ocaml-mdx` should use toplevel phrases, expressions and
Expand Down
93 changes: 86 additions & 7 deletions lib/Cmt.ml
Expand Up @@ -88,25 +88,104 @@ end

type pos = Before | Within | After

let unindent_lines ~offset first_line tl_lines =
type decoded_kind =
| Verbatim of string
| Doc of string
| Normal of string
| Code of string
| Asterisk_prefixed of string list

type decoded = {prefix: string; suffix: string; kind: decoded_kind}

(** [opn_offset] indicates at which column the body of the comment starts. *)
let unindent_lines ~opn_offset first_line tl_lines =
let indent_of_line s =
(* index of first non-whitespace is indentation, None means white line *)
String.lfindi s ~f:(fun _ c -> not (Char.is_whitespace c))
in
(* The indentation of the first line must account for the location of the
comment opening *)
let fl_spaces = Option.value ~default:0 (indent_of_line first_line) in
let fl_indent = fl_spaces + offset in
comment opening. Don't account for the first line if it's empty. *)
let fl_spaces, fl_indent =
match indent_of_line first_line with
| Some i -> (i, i + opn_offset)
| None -> (String.length first_line, Stdlib.max_int)
in
let min_indent =
List.fold_left ~init:fl_indent
~f:(fun acc s ->
Option.value_map ~default:acc ~f:(min acc) (indent_of_line s) )
match indent_of_line s with Some i -> min acc i | None -> acc )
tl_lines
in
(* Completely trim the first line *)
String.drop_prefix first_line fl_spaces
:: List.map ~f:(fun s -> String.drop_prefix s min_indent) tl_lines

let unindent_lines ~offset = function
let unindent_lines ~opn_offset txt =
match String.split ~on:'\n' txt with
| [] -> []
| hd :: tl -> unindent_lines ~offset hd tl
| hd :: tl -> unindent_lines ~opn_offset hd tl

let split_asterisk_prefixed = function
| hd :: (_ :: _ as tl)
when List.for_all ~f:(String.is_prefix ~prefix:"*") tl ->
Some (hd :: List.map tl ~f:(fun s -> String.drop_prefix s 1))
| _ -> None

let mk ?(prefix = "") ?(suffix = "") kind = {prefix; suffix; kind}

let is_all_whitespace s = String.for_all s ~f:Char.is_whitespace

let decode ~parse_comments_as_doc {txt; loc} =
let txt =
(* Windows compatibility *)
let f = function '\r' -> false | _ -> true in
String.filter txt ~f
in
let opn_offset =
let {Lexing.pos_cnum; pos_bol; _} = loc.Location.loc_start in
pos_cnum - pos_bol + 2
in
if String.length txt >= 2 then
match txt.[0] with
| '$' when not (Char.is_whitespace txt.[1]) -> mk (Verbatim txt)
| '$' ->
let opn_offset = opn_offset + 1 in
let dollar_suf = Char.equal txt.[String.length txt - 1] '$' in
let suffix = if dollar_suf then "$" else "" in
let source =
let len = String.length txt - if dollar_suf then 2 else 1 in
String.sub ~pos:1 ~len txt
in
let lines = unindent_lines ~opn_offset source in
let lines = List.map ~f:String.rstrip lines in
let lines = List.drop_while ~f:is_all_whitespace lines in
let code = String.concat ~sep:"\n" lines in
mk ~prefix:"$" ~suffix (Code code)
| '=' -> mk (Verbatim txt)
| '*' -> mk ~prefix:"*" (Doc (String.drop_prefix txt 1))
| _ when is_all_whitespace txt ->
mk (Verbatim " ") (* Make sure not to format to [(**)]. *)
| _ when parse_comments_as_doc -> mk (Doc txt)
| _ -> (
let lines = unindent_lines ~opn_offset txt in
(* Don't add a space to the prefix if the first line was only
spaces. *)
let prefix =
if
String.starts_with_whitespace txt
&& not (String.is_empty (List.hd_exn lines))
then " "
else ""
in
match split_asterisk_prefixed lines with
| Some deprefixed_lines ->
mk ~prefix (Asterisk_prefixed deprefixed_lines)
| None -> mk ~prefix (Normal (String.concat ~sep:"\n" lines)) )
else
match txt with
(* "(**)" is not parsed as a docstring but as a regular comment
containing '*' and would be rewritten as "(***)" *)
| "*" when Location.width loc = 4 -> mk (Verbatim "")
| ("*" | "$") as txt -> mk (Verbatim txt)
| "\n" | " " -> mk (Verbatim " ")
| _ -> mk (Normal txt)
19 changes: 16 additions & 3 deletions lib/Cmt.mli
Expand Up @@ -37,6 +37,19 @@ module Comparator_no_loc : sig
include Comparator.S with type t := t
end

val unindent_lines : offset:int -> string list -> string list
(** Detect and remove the baseline indentation of a comment or a code block.
[offset] is the column number at which the first line starts. *)
type decoded_kind =
| Verbatim of string (** Original content. *)
| Doc of string (** Original content. *)
| Normal of string
(** Original content with indentation trimmed. Trailing spaces are not
removed. *)
| Code of string (** Source code with indentation removed. *)
| Asterisk_prefixed of string list
(** Line splitted with asterisks removed. *)

type decoded =
{ prefix: string (** Just after the opening. *)
; suffix: string (** Just before the closing. *)
; kind: decoded_kind }

val decode : parse_comments_as_doc:bool -> t -> decoded