Skip to content

Commit

Permalink
shell: handle split-up UTF-8 sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
bfredl committed Feb 8, 2018
1 parent 2e6f06d commit 3d26213
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 7 deletions.
28 changes: 21 additions & 7 deletions src/nvim/os/shell.c
Expand Up @@ -422,7 +422,7 @@ static void out_data_ring(char *output, size_t size)
}

if (output == NULL && size == SIZE_MAX) { // Print mode
out_data_append_to_screen(last_skipped, last_skipped_len, true);
out_data_append_to_screen(last_skipped, &last_skipped_len, true);
return;
}

Expand Down Expand Up @@ -450,30 +450,40 @@ static void out_data_ring(char *output, size_t size)
/// @param output Data to append to screen lines.
/// @param remaining Size of data.
/// @param new_line If true, next data output will be on a new line.
static void out_data_append_to_screen(char *output, size_t remaining,
bool new_line)
static void out_data_append_to_screen(char *output, size_t *count,
bool eof)
{
char *p = output, *end = output + remaining;
char *p = output, *end = output + *count;
while (p < end) {
if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) {
msg_putchar_attr((uint8_t)(*p), 0);
p++;
} else {
// Note: this is not 100% precise:
// 1. we don't check if received continuation bytes are already invalid
// and we thus do some buffering that could be avoided
// 2. we don't compose chars over buffer boundaries, even if we see an
// incomplete UTF-8 sequence that could be composing with the last
// complete sequence.
// This will be corrected when we switch to vterm based implementation
int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1;
if (!eof && i == 1 && utf8len_tab_zero[*(uint8_t *)p] > (end-p) && *p) {
*count = (size_t)(p - output);
goto end;
}

(void)msg_outtrans_len_attr((char_u *)p, i, 0);
p += i;
}
}

end:
ui_flush();
}

static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
bool eof)
{
// We always output the whole buffer, so the buffer can never
// wrap around.
size_t cnt;
char *ptr = rbuffer_read_ptr(buf, &cnt);

Expand All @@ -482,12 +492,16 @@ static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data,
// Save the skipped output. If it is the final chunk, we display it later.
out_data_ring(ptr, cnt);
} else {
out_data_append_to_screen(ptr, cnt, eof);
out_data_append_to_screen(ptr, &cnt, eof);
}

if (cnt) {
rbuffer_consumed(buf, cnt);
}

// Move remaining data to start of buffer, so the buffer can never
// wrap around.
rbuffer_reset(buf);
}

/// Parses a command string into a sequence of words, taking quotes into
Expand Down
23 changes: 23 additions & 0 deletions test/functional/fixtures/shell-test.c
Expand Up @@ -4,6 +4,13 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>

static void wait(void)
{
fflush(stdout);
usleep(10*1000);
}

static void help(void)
{
Expand Down Expand Up @@ -61,6 +68,22 @@ int main(int argc, char **argv)
for (uint8_t i = 0; i < number; i++) {
printf("%d: %s\n", (int) i, argv[3]);
}
} else if (strcmp(argv[1], "UTF-8") == 0) {
// test split-up UTF-8 sequence
printf("\xc3"); wait();
printf("\xa5\n"); wait();

// split up a 2+2 grapheme clusters all possible ways
printf("ref: \xc3\xa5\xcc\xb2\n"); wait();

printf("1: \xc3"); wait();
printf("\xa5\xcc\xb2\n"); wait();

printf("2: \xc3\xa5"); wait();
printf("\xcc\xb2\n"); wait();

printf("3: \xc3\xa5\xcc"); wait();
printf("\xb2\n"); wait();
} else {
fprintf(stderr, "Unknown first argument\n");
return 3;
Expand Down
25 changes: 25 additions & 0 deletions test/functional/ui/output_spec.lua
Expand Up @@ -9,6 +9,7 @@ local feed_command = helpers.feed_command
local iswin = helpers.iswin
local clear = helpers.clear
local command = helpers.command
local nvim_dir = helpers.nvim_dir

describe("shell command :!", function()
if helpers.pending_win32(pending) then return end
Expand Down Expand Up @@ -196,5 +197,29 @@ describe("shell command :!", function()
end)
end)

it('handles multibyte sequences split over buffer boundaries', function()
command('cd '..nvim_dir)
local cmd
if iswin() then
cmd = '!shell-test UTF-8 '
else
cmd = '!./shell-test UTF-8'
end
feed_command(cmd)
-- Note: only the first example of split composed char works
screen:expect([[
{1:~ }|
{1:~ }|
:]]..cmd..[[ |
å |
ref: å̲ |
1: å̲ |
2: å ̲ |
3: å ̲ |
|
{3:Press ENTER or type command to continue}^ |
]])
end)

end)
end)

0 comments on commit 3d26213

Please sign in to comment.