From d24b0ba3309f6b207cc19781c7e40b211a1e96a0 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger Date: Wed, 3 Apr 2024 14:33:35 +0200 Subject: [PATCH] Fix off-by-one error converting to LSP UTF8 offsets with multi-byte char MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On this file, ```rust fn main() { let 된장 = 1; } ``` when using `"positionEncodings":["utf-16"]` I get an "unused variable" diagnostic on the variable name (codepoint offset range `8..10`). So far so good. When using `positionEncodings":["utf-8"]`, I expect to get the equivalent range in bytes (LSP: "Character offsets count UTF-8 code units (e.g bytes)."), which is `8..14`, because both characters are 3 bytes in UTF-8. However I actually get `10..14`. Looks like this is because we accidentally treat a 1-based index as an offset value: when converting from our internal char-indices to LSP byte offsets, we look at one character to many. This causes wrong results if the extra character is a multi-byte one, such as when computing the start coordinate of 된장. Fix that by actually passing an offset. While at it, fix the variable name of the line number, which is not an offset (yet). Originally reported at https://github.com/kakoune-lsp/kakoune-lsp/issues/740 --- .../rust-analyzer/src/diagnostics/to_proto.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/crates/rust-analyzer/src/diagnostics/to_proto.rs b/crates/rust-analyzer/src/diagnostics/to_proto.rs index 7c4deac93f21..3d3f94401991 100644 --- a/crates/rust-analyzer/src/diagnostics/to_proto.rs +++ b/crates/rust-analyzer/src/diagnostics/to_proto.rs @@ -68,8 +68,13 @@ fn location( let range = { let position_encoding = snap.config.position_encoding(); lsp_types::Range::new( - position(&position_encoding, span, span.line_start, span.column_start), - position(&position_encoding, span, span.line_end, span.column_end), + position( + &position_encoding, + span, + span.line_start, + span.column_start.saturating_sub(1), + ), + position(&position_encoding, span, span.line_end, span.column_end.saturating_sub(1)), ) }; lsp_types::Location::new(uri, range) @@ -78,10 +83,10 @@ fn location( fn position( position_encoding: &PositionEncoding, span: &DiagnosticSpan, - line_offset: usize, + line_number: usize, column_offset_utf32: usize, ) -> lsp_types::Position { - let line_index = line_offset - span.line_start; + let line_index = line_number - span.line_start; let column_offset_encoded = match span.text.get(line_index) { // Fast path. @@ -104,8 +109,8 @@ fn position( }; lsp_types::Position { - line: (line_offset as u32).saturating_sub(1), - character: (column_offset_encoded as u32).saturating_sub(1), + line: (line_number as u32).saturating_sub(1), + character: column_offset_encoded as u32, } }