Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: allow utf8 encoding #919

Merged
merged 1 commit into from
Nov 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@

- Add custom ocamllsp/hoverExtended request (#561)

- Support utf-8 position encoding clients (#919)

# 1.14.2

## Fixes
Expand Down
124 changes: 96 additions & 28 deletions lsp/src/text_document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,64 @@ module String = StringLabels

exception Invalid_utf8

let find_offset ~utf8 ~utf16_range:range =
exception Outside

let find_nth_nl =
let rec find_nth_nl str nth pos len =
if nth = 0 then pos
else if pos >= len then raise Outside
else if str.[pos] = '\n' then find_nth_nl str (nth - 1) (pos + 1) len
else find_nth_nl str nth (pos + 1) len
in
fun s ~nth ~start ->
let len = String.length s in
match find_nth_nl s nth start len with
| n -> n
| exception Outside -> len

let find_utf8_pos =
let rec find_pos char dec =
if char = 0 || Uutf.decoder_line dec = 2 then Uutf.decoder_byte_count dec
else
match Uutf.decode dec with
| `Malformed _ | `Await -> raise Invalid_utf8
| `End -> assert false
| `Uchar _ -> find_pos (char - 1) dec
in
fun s ~start ~character ->
let dec =
Uutf.decoder ~nln:(`ASCII (Uchar.of_char '\n')) ~encoding:`UTF_8 `Manual
in
Uutf.Manual.src
dec
(Bytes.unsafe_of_string s)
start
(String.length s - start);
assert (Uutf.decoder_line dec = 1);
find_pos character dec + start

let find_offset_8 ~utf8 ~utf8_range:range =
let { Range.start; end_ } = range in
let start_line_offset = find_nth_nl utf8 ~nth:start.line ~start:0 in
let end_line_offset =
if end_.line = start.line then start_line_offset
else if end_.line > start.line then
find_nth_nl utf8 ~nth:(end_.line - start.line) ~start:start_line_offset
else invalid_arg "inverted range"
in
let make_offset ~start ~character =
if start = String.length utf8 then start
else find_utf8_pos utf8 ~start ~character
in
let start_offset =
make_offset ~start:start_line_offset ~character:start.character
in
let end_offset =
make_offset ~start:end_line_offset ~character:end_.character
in
(start_offset, end_offset)

let find_offset_16 ~utf8 ~utf16_range:range =
let dec =
Uutf.decoder
~nln:(`ASCII (Uchar.of_char '\n'))
Expand Down Expand Up @@ -57,39 +114,50 @@ let find_offset ~utf8 ~utf16_range:range =
computed based on UTF-16. Therefore we reencode every file into utf16 for
analysis. *)

type t = TextDocumentItem.t
type t =
{ document : TextDocumentItem.t
; position_encoding : [ `UTF8 | `UTF16 ]
}

let text (t : TextDocumentItem.t) = t.text
let text (t : t) = t.document.text

let make (t : DidOpenTextDocumentParams.t) = t.textDocument
let make ~position_encoding (t : DidOpenTextDocumentParams.t) =
{ document = t.textDocument; position_encoding }

let documentUri (t : TextDocumentItem.t) = t.uri
let documentUri (t : t) = t.document.uri

let version (t : TextDocumentItem.t) = t.version
let version (t : t) = t.document.version

let languageId (t : TextDocumentItem.t) = t.languageId
let languageId (t : t) = t.document.languageId

let apply_content_change ?version (t : TextDocumentItem.t)
let apply_content_change ?version (t : t)
(change : TextDocumentContentChangeEvent.t) =
(* Changes can only be applied using utf16 offsets *)
let version =
let document =
match change.range with
| None -> { t.document with text = change.text }
| Some range ->
let start_offset, end_offset =
let utf8 = t.document.text in
match t.position_encoding with
| `UTF16 -> find_offset_16 ~utf8 ~utf16_range:range
| `UTF8 -> find_offset_8 ~utf8 ~utf8_range:range
in
let text =
String.concat
~sep:""
[ String.sub t.document.text ~pos:0 ~len:start_offset
; change.text
; String.sub
t.document.text
~pos:end_offset
~len:(String.length t.document.text - end_offset)
]
in
{ t.document with text }
in
let document =
match version with
| None -> t.version + 1
| Some version -> version
| None -> document
| Some version -> { document with version }
in
match change.range with
| None -> { t with version; text = change.text }
| Some utf16_range ->
let start_offset, end_offset = find_offset ~utf8:t.text ~utf16_range in
let text =
String.concat
~sep:""
[ String.sub t.text ~pos:0 ~len:start_offset
; change.text
; String.sub
t.text
~pos:end_offset
~len:(String.length t.text - end_offset)
]
in
{ t with text; version }
{ t with document }
3 changes: 2 additions & 1 deletion lsp/src/text_document.mli
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@ open Types

type t

val make : DidOpenTextDocumentParams.t -> t
val make :
position_encoding:[ `UTF8 | `UTF16 ] -> DidOpenTextDocumentParams.t -> t

val languageId : t -> string

Expand Down
82 changes: 61 additions & 21 deletions lsp/test/text_document_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,100 @@ let tuple_range start end_ =
}

let test text range ~change =
let td =
let uri = DocumentUri.of_path "" in
let version = 1 in
let languageId = "fake language" in
let textDocument = { TextDocumentItem.uri; version; languageId; text } in
Text_document.make { DidOpenTextDocumentParams.textDocument }
let test position_encoding =
let td =
let uri = DocumentUri.of_path "" in
let version = 1 in
let languageId = "fake language" in
let textDocument = { TextDocumentItem.uri; version; languageId; text } in
Text_document.make
~position_encoding
{ DidOpenTextDocumentParams.textDocument }
in
let td =
Text_document.apply_content_change
td
(TextDocumentContentChangeEvent.create ?range ~text:change ())
in
(match position_encoding with
| `UTF8 -> print_endline "UTF8:"
| `UTF16 -> print_endline "UTF16:");
print_endline (String.escaped (Text_document.text td))
in
let td =
Text_document.apply_content_change
td
(TextDocumentContentChangeEvent.create ?range ~text:change ())
in
print_endline (String.escaped (Text_document.text td))
test `UTF16;
test `UTF8

let%expect_test "first line insert" =
let range = tuple_range (0, 1) (0, 3) in
test "foo bar baz" range ~change:"XXXX";
[%expect {| fXXXX bar baz |}]
[%expect {|
UTF16:
fXXXX bar baz
UTF8:
fXXXX bar baz |}]

let%expect_test "no range" =
let range = None in
test "foo bar baz" range ~change:"XXXX";
[%expect {| XXXX |}]
[%expect {|
UTF16:
XXXX
UTF8:
XXXX |}]

let%expect_test "replace second line" =
let range = tuple_range (1, 0) (2, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXXX\n";
[%expect {|
UTF16:
foo\nXXXX\nbaz\n
UTF8:
foo\nXXXX\nbaz\n |}]

let%expect_test "edit in second line" =
let range = tuple_range (1, 1) (1, 2) in
test "foo\n\bar\nbaz\n" range ~change:"-XXX-";
[%expect {|
foo\n\b-XXX-r\nbaz\n |}]
test "foo\nbar\nbaz\n" range ~change:"-XXX-";
[%expect
{|
UTF16:
foo\nb-XXX-r\nbaz\n
UTF8:
foo\nb-XXX-r\nbaz\n |}]

let%expect_test "insert at the end" =
let range = tuple_range (3, 1) (4, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXX";
[%expect {|
[%expect
{|
UTF16:
foo\n\bar\nbaz\nXXX
UTF8:
foo\n\bar\nbaz\nXXX |}]

let%expect_test "insert at the beginning" =
let range = tuple_range (0, 0) (0, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXX\n";
[%expect {|
[%expect
{|
UTF16:
XXX\nfoo\n\bar\nbaz\n
UTF8:
XXX\nfoo\n\bar\nbaz\n |}]

let%expect_test "replace first line" =
let range = tuple_range (0, 0) (1, 0) in
test "foo\nbar\n" range ~change:"baz\n";
[%expect {| baz\nbar\n |}]
[%expect {|
UTF16:
baz\nbar\n
UTF8:
baz\nbar\n |}]

let%expect_test "beyond max char" =
let range = tuple_range (0, 0) (0, 100) in
test "foo\nbar\n" range ~change:"baz\n";
[%expect {| baz\n |}]
[%expect {|
UTF16:
baz\n
UTF8:
baz\nbar\n |}]
5 changes: 3 additions & 2 deletions ocaml-lsp-server/src/document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,10 @@ let make_merlin wheel merlin_db pipeline tdoc syntax =
in
Merlin { merlin_config; tdoc; pipeline; timer; syntax }

let make wheel config pipeline (doc : DidOpenTextDocumentParams.t) =
let make wheel config pipeline (doc : DidOpenTextDocumentParams.t)
~position_encoding =
Fiber.of_thunk (fun () ->
let tdoc = Text_document.make doc in
let tdoc = Text_document.make ~position_encoding doc in
let syntax = Syntax.of_text_document tdoc in
match syntax with
| Ocaml | Reason -> make_merlin wheel config pipeline tdoc syntax
Expand Down
1 change: 1 addition & 0 deletions ocaml-lsp-server/src/document.mli
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ val make :
-> Merlin_config.DB.t
-> Single_pipeline.t
-> DidOpenTextDocumentParams.t
-> position_encoding:[ `UTF8 | `UTF16 ]
-> t Fiber.t

val uri : t -> Uri.t
Expand Down
1 change: 1 addition & 0 deletions ocaml-lsp-server/src/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ include struct
module OptionalVersionedTextDocumentIdentifier =
OptionalVersionedTextDocumentIdentifier
module ParameterInformation = ParameterInformation
module PositionEncodingKind = PositionEncodingKind
module ProgressParams = ProgressParams
module ProgressToken = ProgressToken
module PublishDiagnosticsParams = PublishDiagnosticsParams
Expand Down
2 changes: 2 additions & 0 deletions ocaml-lsp-server/src/inference.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ let open_document_from_file (state : State.t) uri =
DidOpenTextDocumentParams.create ~textDocument:text_document
in
let+ doc =
let position_encoding = State.position_encoding state in
Document.make
~position_encoding
(State.wheel state)
state.merlin_config
state.merlin
Expand Down
33 changes: 29 additions & 4 deletions ocaml-lsp-server/src/ocaml_lsp_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ let initialize_info (client_capabilities : ClientCapabilities.t) :
~full
()))
in
let positionEncoding =
let open Option.O in
let* general = client_capabilities.general in
let* options = general.positionEncodings in
List.find_map
([ UTF8; UTF16 ] : PositionEncodingKind.t list)
~f:(fun encoding ->
Option.some_if (List.mem options ~equal:Poly.equal encoding) encoding)
in
ServerCapabilities.create
~textDocumentSync
~hoverProvider:(`Bool true)
Expand All @@ -139,6 +148,7 @@ let initialize_info (client_capabilities : ClientCapabilities.t) :
~renameProvider
~workspace
~executeCommandProvider
?positionEncoding
()
in
let serverInfo =
Expand Down Expand Up @@ -236,7 +246,16 @@ let on_initialize server (ip : InitializeParams.t) =
let+ () = Fiber.Pool.task state.detached ~f:(fun () -> Dune.run dune) in
dune
in
let state = State.initialize state ip workspaces dune diagnostics in
let initialize_info = initialize_info ip.capabilities in
let state =
let position_encoding =
match initialize_info.capabilities.positionEncoding with
| None | Some UTF16 -> `UTF16
| Some UTF8 -> `UTF8
| Some UTF32 | Some (Other _) -> assert false
in
State.initialize state ~position_encoding ip workspaces dune diagnostics
in
let state =
match ip.trace with
| None -> state
Expand All @@ -254,7 +273,7 @@ let on_initialize server (ip : InitializeParams.t) =
; _
} ->
Reply.later (fun send ->
let* () = send (initialize_info ip.capabilities) in
let* () = send initialize_info in
let register =
RegistrationParams.create
~registrations:
Expand All @@ -279,7 +298,7 @@ let on_initialize server (ip : InitializeParams.t) =
Server.request
server
(Server_request.ClientRegisterCapability register))
| _ -> Reply.now (initialize_info ip.capabilities)
| _ -> Reply.now initialize_info
in
(resp, state)

Expand Down Expand Up @@ -911,7 +930,13 @@ let on_notification server (notification : Client_notification.t) :
match notification with
| TextDocumentDidOpen params ->
let* doc =
Document.make (State.wheel state) state.merlin_config state.merlin params
let position_encoding = State.position_encoding state in
Document.make
~position_encoding
(State.wheel state)
state.merlin_config
state.merlin
params
in
assert (Document_store.get_opt store params.textDocument.uri = None);
let* () = Document_store.open_document store doc in
Expand Down
Loading