Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(lsp): add lsp.util.split_lines and lsp.util.split_lines_iter #16286

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
92 changes: 89 additions & 3 deletions runtime/lua/vim/lsp/util.lua
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,95 @@ local function get_border_size(opts)
return { height = height, width = width }
end

---@private
local function split_lines(value)
return split(value, '\n', true)
--- Splits the given text into lines according to LSP's definition of text
--- lines. Quoting the specification: "To ensure that both client and server
--- split the string into the same line representation the protocol specifies
--- the following end-of-line sequences: '\n', '\r\n' and '\r'."
---
---@see |vim.lsp.util.split_lines_iter()|
---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocuments
function M.split_lines(text, opts)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given we don't internally handle \r according to the specification, I'm not sure this is correct

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you talking about the function or the usages of it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm talking about we shouldn't be splitting lines on \r before sending to the server

local lines = {}
local i = 1
for _, line in M.split_lines_iter(text, opts) do
lines[i] = line
i = i + 1
end
return lines
end

--- Same as |vim.lsp.util.split_lines()|, but returns an iterator instead of a
--- list of lines.
function M.split_lines_iter(text, opts)
validate {
text = { text, 's' };
opts = { opts, 't', true };
}
opts = opts or {}
validate {
['opts.keep_line_endings'] = { opts.keep_line_endings, 'b', true };
['opts.keep_final_eol'] = { opts.keep_final_eol, 'b', true };
}
local keep_line_endings = opts.keep_line_endings
local keep_final_eol = opts.keep_final_eol

local string_find = string.find
local string_sub = string.sub
local math_min = math.min

-- Most text will be LF-separated anyway, so needless \r lookups which would
-- have to scan the whole string can be skipped. There is no such flag for \n
-- because in the CRLF case the \n lookup still must be performed, and let's
-- face it - nobody uses CR linebreaks anymore.
local no_carrige_returns_left = false

-- NOTE: string.find is implemented using memchr in LuaJIT, which will most
-- likely be faster than running our own loops.
local function find_curr_line_ending(start_idx)
local lf_idx = string_find(text, '\n', start_idx, true)
local cr_idx = nil
if not no_carrige_returns_left then
cr_idx = string_find(text, '\r', start_idx, true)
no_carrige_returns_left = not cr_idx
end
if lf_idx and cr_idx then
if cr_idx + 1 == lf_idx then
return cr_idx, 2
else
return math_min(lf_idx, cr_idx), 1
end
else
return lf_idx or cr_idx, 1
end
end

local line_start_idx = 1
local done = false
return function()
if done then
return
end

local line_ending_idx, line_ending_len = find_curr_line_ending(line_start_idx)
if not line_ending_idx then
done = true
local line = string_sub(text, line_start_idx)
return line_start_idx, line
end

local line_end_idx = line_ending_idx - 1
if keep_line_endings then
line_end_idx = line_end_idx + line_ending_len
end
local line = string_sub(text, line_start_idx, line_end_idx)

local prev_line_start_idx = line_start_idx
line_start_idx = line_ending_idx + line_ending_len
if line_start_idx > #text and not keep_final_eol then
done = true
end
return prev_line_start_idx, line
end
end

--- Replaces text in a range with new text.
Expand Down
87 changes: 87 additions & 0 deletions test/functional/plugin/lsp_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -2578,4 +2578,91 @@ describe('LSP', function()
}
end)
end)

describe('lsp.util.split_lines', function()
local function test_split_lines(expected_lines, ...)
eq(expected_lines, exec_lua('return vim.lsp.util.split_lines(...)', ...))
end

it('handles empty strings', function()
test_split_lines({''}, '')
end)

it('handles strings without line ending characters', function()
test_split_lines({'hi!'}, 'hi!')
end)

it('handles strings without a final EOL', function()
test_split_lines({'abcdef', 'ghijkl'}, 'abcdef\nghijkl')
test_split_lines({'a', 'b', 'd'}, 'a\r\nb\nd')
end)

local test_text = table.concat({
'#include <stdio.h>\n',
'int main() {\r\n',
' printf(\r"привет мир");\r\n\r', -- Let's throw in some multibyte because why not
' return 0;\n\r',
'}\n',
})

it('handles text with mixed newlines', function()
test_split_lines({
'#include <stdio.h>',
'int main() {',
' printf(',
'"привет мир");',
'',
' return 0;',
'',
'}',
}, test_text)
end)

it('handles keep_line_endings', function()
test_split_lines({
'#include <stdio.h>\n',
'int main() {\r\n',
' printf(\r',
'"привет мир");\r\n',
'\r',
' return 0;\n',
'\r',
'}\n',
}, test_text, { keep_line_endings = true })
end)

it('handles keep_final_eol', function()
test_split_lines({ 'абв', 'где', '' }, 'абв\nгде\n', { keep_final_eol = true })
test_split_lines({ 'абв', 'где' }, 'абв\nгде', { keep_final_eol = true })
test_split_lines({ 'абв\n', 'где\n', '' }, 'абв\nгде\n', { keep_final_eol = true, keep_line_endings = true })
test_split_lines({ 'абв\n', 'где' }, 'абв\nгде', { keep_final_eol = true, keep_line_endings = true })
end)

it('handles a final CRLF', function()
test_split_lines({ 'test' }, 'test\r\n')
end)

it('handles the no_carrige_returns_left optimization correctly', function()
test_split_lines({ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h' }, 'a\r\nb\r\nc\rd\ne\nf\ng\nh\n')
end)

it('the iterator returns correct line start indexes', function()
eq({
{ 1, '#include <stdio.h>' },
{ 20, 'int main() {' },
{ 34, ' printf(' },
{ 44, '"привет мир");' },
{ 69, '' },
{ 70, ' return 0;' },
{ 82, '' },
{ 83, '}' },
}, exec_lua([[
local results = {}
for start_idx, line in vim.lsp.util.split_lines_iter(...) do
table.insert(results, { start_idx, line })
end
return results
]], test_text))
end)
end)
end)