Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor handling of comments #2371

Merged
merged 63 commits into from Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
7df9213
WIP: Factorize interpretation of comments
Julow Apr 27, 2023
9caf607
Fix lost ending space
Julow May 26, 2023
207f471
Promote
Julow May 26, 2023
86ff2e4
WIP: Rewrite parsing of normal and asterisk prefixed comments
Julow May 26, 2023
f5cce1a
Test 'error4' requires one more iteration
Julow May 30, 2023
032f6c3
Fix added newline in cinaps comments
Julow May 30, 2023
124a89b
Strip trailing spaces of cinaps comments
Julow May 31, 2023
2d0930d
Promote
Julow May 31, 2023
b207454
Strip heading/trailing empty lines
Julow May 31, 2023
e8875b5
Small cleanup in docstring fmt function
Julow May 31, 2023
c42280a
Fix incorrect unindenting of cinaps comments
Julow May 31, 2023
4e4ef13
Normalize comments in code in comments
Julow May 30, 2023
7e9e6ab
Revert "Normalize comments in code in comments"
Julow May 31, 2023
6061527
Fix unindenting when first line is empty
Julow Jun 1, 2023
520a35c
Don't unindent doc comments
Julow Jun 1, 2023
2095965
Revert "Test 'error4' requires one more iteration"
Julow Jun 1, 2023
606447a
Trim trailing empty lines and whitespaces
Julow Jun 1, 2023
4572014
Preserve empty trailing lines in doc comments
Julow Jun 2, 2023
3468395
Restore formatting of cinaps comments
Julow Jun 2, 2023
aa80541
Preserve pro/epi break on comments as doc
Julow Jun 2, 2023
364fc75
Fix regressions on unwrapped comments
Julow Jun 5, 2023
e009001
Fix parsing of asterisk prefixed comments
Julow Jun 5, 2023
7bff959
Restore break before preceeding multi-line comments
Julow Jun 5, 2023
f8719ba
Preserve leading/trailing newlines in unwrapped comments
Julow Jun 5, 2023
69f2a8e
Tests: Remove no longer necessary `--max-iter`
Julow Jun 6, 2023
5a56bf1
Tests: Remove empty .err files
Julow Jun 6, 2023
37bf3f0
Fix parsing and printing of header-like comments
Julow Jun 6, 2023
8bf52f2
Update changes
Julow Jun 6, 2023
cf878da
Fix parsing of asterisk prefixed comments too open
Julow Jun 6, 2023
00a77f3
Merge branch 'main' into cmts-factor
Julow Jun 6, 2023
6e0ae10
Cleanup Cmt
gpetiot Jun 7, 2023
9a5bb99
Don't check the margin to group comments
gpetiot Jun 7, 2023
fe93ffd
Merge pull request #1 from gpetiot/cmts-factor-dont-check-margin
Julow Jun 7, 2023
58e8d0b
Even less open parsing of asterisk prefixed
Julow Jun 7, 2023
f4f64ca
Change the baseline indentation for unwrapped comments
Julow Jun 7, 2023
017935d
Fix interference between f4f64ca5 and 58e8d0b3
Julow Jun 7, 2023
1ee288d
Don't unindent unwrapped comments
Julow Jun 7, 2023
09a9638
Fix last line of asterisk prefixed
Julow Jun 7, 2023
b92c62f
Make Cmt.t abstract
Julow Jun 7, 2023
25aa14c
Don't mix comments and docstrings
Julow Jun 7, 2023
cd00fe6
Make Cmt.t abstract
Julow Jun 7, 2023
ef208e9
Don't mix comments and docstrings
Julow Jun 7, 2023
2ef7270
Merge branch 'cmts-docstrings' into cmts-factor
Julow Jun 8, 2023
7633f3e
Merge branch 'main' into cmts-factor
Julow Jun 8, 2023
c37b6ad
Normalize comments inside comments
Julow Jun 9, 2023
8a4e1cf
Add #2372 to changelog
Julow Jun 12, 2023
e000e31
Merge branch 'main' into cmts-factor
Julow Jun 12, 2023
e273379
Merge branch 'main' into cmts-factor
Julow Jun 22, 2023
ca35745
Move break out of `fmt_cmt`
Julow Jun 23, 2023
27a0074
Revert change to test break_separators.ml
Julow Jun 23, 2023
fcf2b80
Remove unecessary test 'asterisk_prefixed_cmts'
Julow Sep 19, 2023
0c95f7e
Merge branch 'MAIN' into cmts-factor
Julow Sep 19, 2023
418f8c3
Merge branch 'MAIN' into cmts-factor
Julow Oct 26, 2023
8f3d799
Add testcase for https://github.com/ocaml-ppx/ocamlformat/issues/2468
Julow Oct 26, 2023
971df85
Add test variant for unwrapped comments
Julow Nov 2, 2023
bce8df8
Preserve trailing empty line of unwrapped comments
Julow Nov 2, 2023
20f4fb8
Merge branch 'MAIN' into cmts-factor
Julow Nov 6, 2023
c86b00c
test: Add non-stabilizing comment
Julow Nov 6, 2023
907d70a
Fix non-stabilizing comment
Julow Nov 6, 2023
a400535
Fix regression (break introduced before the end of a paragraph)
gpetiot Nov 7, 2023
a0c9aae
Don't trim first line of asterisk prefixed comment
Julow Nov 7, 2023
5f5532e
Add test case from #2469
Julow Nov 7, 2023
86e12c3
test: Fix spacing in cinaps.ml
Julow Nov 7, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -8,6 +8,7 @@ profile. This started with version 0.26.0.

### Changed

- \* Consistent formatting of comments (#2371, @Julow)
- Documentation comments are now formatted by default (#2390, @Julow)
Use the option `parse-docstrings = false` to disable.
- \* Janestreet profile: do not break `fun _ -> function` (#2460, @tdelvecchio-jsc)
Expand Down
134 changes: 106 additions & 28 deletions lib/Cmt.ml
Expand Up @@ -82,40 +82,118 @@ let pp_error fs {kind; cmt_kind} =
formatting using the option --no-parse-docstrings.\n\
%!" )

module T_no_loc = struct
include T

let compare =
Comparable.lexicographic [Comparable.lift String.compare ~f:txt]
end

type loc = t

module Comparator_no_loc = struct
type t = loc

include Comparator.Make (T_no_loc)
end

type pos = Before | Within | After

let unindent_lines ~offset first_line tl_lines =
(* The indentation of the first line must account for the location of the
comment opening *)
let fl_spaces =
Option.value ~default:0 (String.indent_of_line first_line)
in
let fl_indent = fl_spaces + offset in
let min_indent =
List.fold_left ~init:fl_indent
type decoded_kind =
| Verbatim of string
| Doc of string
| Normal of string
| Code of string
| Asterisk_prefixed of string list

type decoded = {prefix: string; suffix: string; kind: decoded_kind}

(** [~content_offset] indicates at which column the body of the comment
starts (1-indexed). [~max_idnent] indicates the maximum amount of
indentation to trim. *)
let unindent_lines ?(max_indent = Stdlib.max_int) ~content_offset first_line
tl_lines =
let tl_indent =
List.fold_left ~init:max_indent
~f:(fun acc s ->
Option.value_map ~default:acc ~f:(min acc) (String.indent_of_line s) )
tl_lines
in
(* Completely trim the first line *)
String.drop_prefix first_line fl_spaces
(* The indentation of the first line must account for the location of the
comment opening. Don't account for the first line if it's empty.
[fl_trim] is the number of characters to remove from the first line. *)
let fl_trim, fl_indent =
match String.indent_of_line first_line with
| Some i ->
(max 0 (min i (tl_indent - content_offset)), i + content_offset - 1)
| None -> (String.length first_line, max_indent)
in
let min_indent = min tl_indent fl_indent in
let first_line = String.drop_prefix first_line fl_trim in
first_line
:: List.map ~f:(fun s -> String.drop_prefix s min_indent) tl_lines

let unindent_lines ~offset = function
let unindent_lines ?max_indent ~content_offset txt =
match String.split ~on:'\n' txt with
| [] -> []
| hd :: tl -> unindent_lines ~offset hd tl
| hd :: tl -> unindent_lines ?max_indent ~content_offset hd tl

let is_all_whitespace s = String.for_all s ~f:Char.is_whitespace

let split_asterisk_prefixed =
let prefix = "*" in
let drop_prefix s = String.drop_prefix s (String.length prefix) in
let rec lines_are_asterisk_prefixed = function
| [] -> true
(* Allow the last line to be empty *)
| [last] when is_all_whitespace last -> true
| hd :: tl ->
String.is_prefix hd ~prefix && lines_are_asterisk_prefixed tl
in
function
(* Check whether the second line is not empty to avoid matching a comment
with no asterisks. *)
| fst_line :: (snd_line :: _ as tl)
when lines_are_asterisk_prefixed tl && not (is_all_whitespace snd_line)
->
Some (fst_line :: List.map tl ~f:drop_prefix)
| _ -> None

let mk ?(prefix = "") ?(suffix = "") kind = {prefix; suffix; kind}

let decode_comment ~parse_comments_as_doc txt loc =
let txt =
(* Windows compatibility *)
let f = function '\r' -> false | _ -> true in
String.filter txt ~f
in
let opn_offset =
let {Lexing.pos_cnum; pos_bol; _} = loc.Location.loc_start in
pos_cnum - pos_bol + 1
in
if String.length txt >= 2 then
match txt.[0] with
| '$' when not (Char.is_whitespace txt.[1]) -> mk (Verbatim txt)
| '$' ->
let dollar_suf = Char.equal txt.[String.length txt - 1] '$' in
let suffix = if dollar_suf then "$" else "" in
let code =
let len = String.length txt - if dollar_suf then 2 else 1 in
String.sub ~pos:1 ~len txt
in
mk ~prefix:"$" ~suffix (Code code)
| '=' -> mk (Verbatim txt)
| _ when is_all_whitespace txt ->
mk (Verbatim " ") (* Make sure not to format to [(**)]. *)
| _ when parse_comments_as_doc -> mk (Doc txt)
| _ -> (
let lines =
let content_offset = opn_offset + 2 in
unindent_lines ~content_offset txt
in
match split_asterisk_prefixed lines with
| Some deprefixed_lines -> mk (Asterisk_prefixed deprefixed_lines)
| None -> mk (Normal txt) )
else
match txt with
(* "(**)" is not parsed as a docstring but as a regular comment
containing '*' and would be rewritten as "(***)" *)
| "*" when Location.width loc = 4 -> mk (Verbatim "")
| ("*" | "$") as txt -> mk (Verbatim txt)
| "\n" | " " -> mk (Verbatim " ")
| _ -> mk (Normal txt)

let decode_docstring _loc = function
| "" -> mk (Verbatim "")
| ("*" | "$") as txt -> mk (Verbatim txt)
| "\n" | " " -> mk (Verbatim " ")
| txt -> mk ~prefix:"*" (Doc txt)

let decode ~parse_comments_as_doc = function
| Comment {txt; loc} -> decode_comment ~parse_comments_as_doc txt loc
| Docstring {txt; loc} -> decode_docstring loc txt
27 changes: 16 additions & 11 deletions lib/Cmt.mli
Expand Up @@ -33,14 +33,19 @@ val pp_error : Format.formatter -> error -> unit

type pos = Before | Within | After

type loc = t

module Comparator_no_loc : sig
type t = loc

include Comparator.S with type t := t
end

val unindent_lines : offset:int -> string list -> string list
(** Detect and remove the baseline indentation of a comment or a code block.
[offset] is the column number at which the first line starts. *)
type decoded_kind =
| Verbatim of string (** Original content. *)
| Doc of string (** Original content. *)
| Normal of string
(** Original content with indentation trimmed. Trailing spaces are not
removed. *)
| Code of string (** Source code with indentation removed. *)
| Asterisk_prefixed of string list
(** Line splitted with asterisks removed. *)

type decoded =
{ prefix: string (** Just after the opening. *)
; suffix: string (** Just before the closing. *)
; kind: decoded_kind }

val decode : parse_comments_as_doc:bool -> t -> decoded