Skip to content

Commit

Permalink
feature: allow utf8 encoding
Browse files Browse the repository at this point in the history
Signed-off-by: Rudi Grinberg <me@rgrinberg.com>

ps-id: ac0de96a-2c5f-4c90-b1af-40acdc1a8344
  • Loading branch information
rgrinberg committed Nov 16, 2022
1 parent ded4b06 commit f48f53d
Show file tree
Hide file tree
Showing 11 changed files with 200 additions and 46 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

- Add custom ocamllsp/hoverExtended request (#561)

- Support UTF8 encoding (#919)

# 1.14.2

## Fixes
Expand Down
100 changes: 84 additions & 16 deletions lsp/src/text_document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,63 @@ module String = StringLabels

exception Invalid_utf8

let find_offset ~utf8 ~utf16_range:range =
exception Outside

let rec find_nth_nl str nth pos len =
if nth = 0 then pos
else if pos >= len then raise Outside
else if str.[pos] = '\n' then find_nth_nl str (nth - 1) (pos + 1) len
else find_nth_nl str nth (pos + 1) len

let find_nth_nl s ~nth ~start =
let len = String.length s in
match find_nth_nl s nth start len with
| n -> n
| exception Outside -> len

let find_utf8_pos =
let rec find_pos char dec =
if char = 0 || Uutf.decoder_line dec = 2 then Uutf.decoder_byte_count dec
else
match Uutf.decode dec with
| `Malformed _ | `Await -> raise Invalid_utf8
| `End -> assert false
| `Uchar _ -> find_pos (char - 1) dec
in
fun s ~start ~character ->
let dec =
Uutf.decoder ~nln:(`ASCII (Uchar.of_char '\n')) ~encoding:`UTF_8 `Manual
in
Uutf.Manual.src
dec
(Bytes.unsafe_of_string s)
start
(String.length s - start);
assert (Uutf.decoder_line dec = 1);
find_pos character dec + start

let find_offset_8 ~utf8 ~utf8_range:range =
let { Range.start; end_ } = range in
let start_line_offset = find_nth_nl utf8 ~nth:start.line ~start:0 in
let end_line_offset =
if end_.line = start.line then start_line_offset
else if end_.line > start.line then
find_nth_nl utf8 ~nth:(end_.line - start.line) ~start:start_line_offset
else invalid_arg "inverted range"
in
let make_offset ~start ~character =
if start = String.length utf8 then start
else find_utf8_pos utf8 ~start ~character
in
let start_offset =
make_offset ~start:start_line_offset ~character:start.character
in
let end_offset =
make_offset ~start:end_line_offset ~character:end_.character
in
(start_offset, end_offset)

let find_offset_16 ~utf8 ~utf16_range:range =
let dec =
Uutf.decoder
~nln:(`ASCII (Uchar.of_char '\n'))
Expand Down Expand Up @@ -57,39 +113,51 @@ let find_offset ~utf8 ~utf16_range:range =
computed based on UTF-16. Therefore we reencode every file into utf16 for
analysis. *)

type t = TextDocumentItem.t
type t =
{ document : TextDocumentItem.t
; encoding : [ `UTF8 | `UTF16 ]
}

let text (t : TextDocumentItem.t) = t.text
let text (t : t) = t.document.text

let make (t : DidOpenTextDocumentParams.t) = t.textDocument
let make ~encoding (t : DidOpenTextDocumentParams.t) =
{ document = t.textDocument; encoding }

let documentUri (t : TextDocumentItem.t) = t.uri
let documentUri (t : t) = t.document.uri

let version (t : TextDocumentItem.t) = t.version
let version (t : t) = t.document.version

let languageId (t : TextDocumentItem.t) = t.languageId
let languageId (t : t) = t.document.languageId

let apply_content_change ?version (t : TextDocumentItem.t)
let apply_content_change ?version (t : t)
(change : TextDocumentContentChangeEvent.t) =
(* Changes can only be applied using utf16 offsets *)
let version =
match version with
| None -> t.version + 1
| None -> t.document.version + 1
| Some version -> version
in
match change.range with
| None -> { t with version; text = change.text }
| Some utf16_range ->
let start_offset, end_offset = find_offset ~utf8:t.text ~utf16_range in
| None ->
let document = { t.document with version; text = change.text } in
{ t with document }
| Some range ->
let start_offset, end_offset =
let utf8 = t.document.text in
match t.encoding with
| `UTF16 -> find_offset_16 ~utf8 ~utf16_range:range
| `UTF8 -> find_offset_8 ~utf8 ~utf8_range:range
in
let text =
String.concat
~sep:""
[ String.sub t.text ~pos:0 ~len:start_offset
[ String.sub t.document.text ~pos:0 ~len:start_offset
; change.text
; String.sub
t.text
t.document.text
~pos:end_offset
~len:(String.length t.text - end_offset)
~len:(String.length t.document.text - end_offset)
]
in
{ t with text; version }
let document = { t.document with text; version } in
{ t with document }
2 changes: 1 addition & 1 deletion lsp/src/text_document.mli
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ open Types

type t

val make : DidOpenTextDocumentParams.t -> t
val make : encoding:[ `UTF8 | `UTF16 ] -> DidOpenTextDocumentParams.t -> t

val languageId : t -> string

Expand Down
80 changes: 59 additions & 21 deletions lsp/test/text_document_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -12,60 +12,98 @@ let tuple_range start end_ =
}

let test text range ~change =
let td =
let uri = DocumentUri.of_path "" in
let version = 1 in
let languageId = "fake language" in
let textDocument = { TextDocumentItem.uri; version; languageId; text } in
Text_document.make { DidOpenTextDocumentParams.textDocument }
let test encoding =
let td =
let uri = DocumentUri.of_path "" in
let version = 1 in
let languageId = "fake language" in
let textDocument = { TextDocumentItem.uri; version; languageId; text } in
Text_document.make ~encoding { DidOpenTextDocumentParams.textDocument }
in
let td =
Text_document.apply_content_change
td
(TextDocumentContentChangeEvent.create ?range ~text:change ())
in
(match encoding with
| `UTF8 -> print_endline "UTF8:"
| `UTF16 -> print_endline "UTF16:");
print_endline (String.escaped (Text_document.text td))
in
let td =
Text_document.apply_content_change
td
(TextDocumentContentChangeEvent.create ?range ~text:change ())
in
print_endline (String.escaped (Text_document.text td))
test `UTF16;
test `UTF8

let%expect_test "first line insert" =
let range = tuple_range (0, 1) (0, 3) in
test "foo bar baz" range ~change:"XXXX";
[%expect {| fXXXX bar baz |}]
[%expect {|
UTF16:
fXXXX bar baz
UTF8:
fXXXX bar baz |}]

let%expect_test "no range" =
let range = None in
test "foo bar baz" range ~change:"XXXX";
[%expect {| XXXX |}]
[%expect {|
UTF16:
XXXX
UTF8:
XXXX |}]

let%expect_test "replace second line" =
let range = tuple_range (1, 0) (2, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXXX\n";
[%expect {|
UTF16:
foo\nXXXX\nbaz\n
UTF8:
foo\nXXXX\nbaz\n |}]

let%expect_test "edit in second line" =
let range = tuple_range (1, 1) (1, 2) in
test "foo\n\bar\nbaz\n" range ~change:"-XXX-";
[%expect {|
foo\n\b-XXX-r\nbaz\n |}]
test "foo\nbar\nbaz\n" range ~change:"-XXX-";
[%expect
{|
UTF16:
foo\nb-XXX-r\nbaz\n
UTF8:
foo\nb-XXX-r\nbaz\n |}]

let%expect_test "insert at the end" =
let range = tuple_range (3, 1) (4, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXX";
[%expect {|
[%expect
{|
UTF16:
foo\n\bar\nbaz\nXXX
UTF8:
foo\n\bar\nbaz\nXXX |}]

let%expect_test "insert at the beginning" =
let range = tuple_range (0, 0) (0, 0) in
test "foo\n\bar\nbaz\n" range ~change:"XXX\n";
[%expect {|
[%expect
{|
UTF16:
XXX\nfoo\n\bar\nbaz\n
UTF8:
XXX\nfoo\n\bar\nbaz\n |}]

let%expect_test "replace first line" =
let range = tuple_range (0, 0) (1, 0) in
test "foo\nbar\n" range ~change:"baz\n";
[%expect {| baz\nbar\n |}]
[%expect {|
UTF16:
baz\nbar\n
UTF8:
baz\nbar\n |}]

let%expect_test "beyond max char" =
let range = tuple_range (0, 0) (0, 100) in
test "foo\nbar\n" range ~change:"baz\n";
[%expect {| baz\n |}]
[%expect {|
UTF16:
baz\n
UTF8:
baz\nbar\n |}]
4 changes: 2 additions & 2 deletions ocaml-lsp-server/src/document.ml
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ let make_merlin wheel merlin_db pipeline tdoc syntax =
in
Merlin { merlin_config; tdoc; pipeline; timer; syntax }

let make wheel config pipeline (doc : DidOpenTextDocumentParams.t) =
let make wheel config pipeline (doc : DidOpenTextDocumentParams.t) ~encoding =
Fiber.of_thunk (fun () ->
let tdoc = Text_document.make doc in
let tdoc = Text_document.make ~encoding doc in
let syntax = Syntax.of_text_document tdoc in
match syntax with
| Ocaml | Reason -> make_merlin wheel config pipeline tdoc syntax
Expand Down
1 change: 1 addition & 0 deletions ocaml-lsp-server/src/document.mli
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ val make :
-> Merlin_config.DB.t
-> Single_pipeline.t
-> DidOpenTextDocumentParams.t
-> encoding:[ `UTF8 | `UTF16 ]
-> t Fiber.t

val uri : t -> Uri.t
Expand Down
1 change: 1 addition & 0 deletions ocaml-lsp-server/src/import.ml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ include struct
module OptionalVersionedTextDocumentIdentifier =
OptionalVersionedTextDocumentIdentifier
module ParameterInformation = ParameterInformation
module PositionEncodingKind = PositionEncodingKind
module ProgressParams = ProgressParams
module ProgressToken = ProgressToken
module PublishDiagnosticsParams = PublishDiagnosticsParams
Expand Down
2 changes: 2 additions & 0 deletions ocaml-lsp-server/src/inference.ml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ let open_document_from_file (state : State.t) uri =
DidOpenTextDocumentParams.create ~textDocument:text_document
in
let+ doc =
let encoding = State.encoding state in
Document.make
~encoding
(State.wheel state)
state.merlin_config
state.merlin
Expand Down
33 changes: 29 additions & 4 deletions ocaml-lsp-server/src/ocaml_lsp_server.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,15 @@ let initialize_info (client_capabilities : ClientCapabilities.t) :
~full
()))
in
let positionEncoding =
let open Option.O in
let* general = client_capabilities.general in
let* options = general.positionEncodings in
List.find_map
([ UTF8; UTF16 ] : PositionEncodingKind.t list)
~f:(fun encoding ->
Option.some_if (List.mem options ~equal:Poly.equal encoding) encoding)
in
ServerCapabilities.create
~textDocumentSync
~hoverProvider:(`Bool true)
Expand All @@ -139,6 +148,7 @@ let initialize_info (client_capabilities : ClientCapabilities.t) :
~renameProvider
~workspace
~executeCommandProvider
?positionEncoding
()
in
let serverInfo =
Expand Down Expand Up @@ -236,7 +246,16 @@ let on_initialize server (ip : InitializeParams.t) =
let+ () = Fiber.Pool.task state.detached ~f:(fun () -> Dune.run dune) in
dune
in
let state = State.initialize state ip workspaces dune diagnostics in
let initialize_info = initialize_info ip.capabilities in
let state =
let encoding =
match initialize_info.capabilities.positionEncoding with
| None | Some UTF16 -> `UTF16
| Some UTF8 -> `UTF8
| Some UTF32 | Some (Other _) -> assert false
in
State.initialize state ~encoding ip workspaces dune diagnostics
in
let state =
match ip.trace with
| None -> state
Expand All @@ -254,7 +273,7 @@ let on_initialize server (ip : InitializeParams.t) =
; _
} ->
Reply.later (fun send ->
let* () = send (initialize_info ip.capabilities) in
let* () = send initialize_info in
let register =
RegistrationParams.create
~registrations:
Expand All @@ -279,7 +298,7 @@ let on_initialize server (ip : InitializeParams.t) =
Server.request
server
(Server_request.ClientRegisterCapability register))
| _ -> Reply.now (initialize_info ip.capabilities)
| _ -> Reply.now initialize_info
in
(resp, state)

Expand Down Expand Up @@ -911,7 +930,13 @@ let on_notification server (notification : Client_notification.t) :
match notification with
| TextDocumentDidOpen params ->
let* doc =
Document.make (State.wheel state) state.merlin_config state.merlin params
let encoding = State.encoding state in
Document.make
~encoding
(State.wheel state)
state.merlin_config
state.merlin
params
in
assert (Document_store.get_opt store params.textDocument.uri = None);
let* () = Document_store.open_document store doc in
Expand Down
Loading

0 comments on commit f48f53d

Please sign in to comment.