diff --git a/src/nvim/os/shell.c b/src/nvim/os/shell.c index 166e06b1a40c3a..3214fd138846f9 100644 --- a/src/nvim/os/shell.c +++ b/src/nvim/os/shell.c @@ -422,7 +422,7 @@ static void out_data_ring(char *output, size_t size) } if (output == NULL && size == SIZE_MAX) { // Print mode - out_data_append_to_screen(last_skipped, last_skipped_len, true); + out_data_append_to_screen(last_skipped, &last_skipped_len, true); return; } @@ -450,30 +450,40 @@ static void out_data_ring(char *output, size_t size) /// @param output Data to append to screen lines. /// @param remaining Size of data. /// @param new_line If true, next data output will be on a new line. -static void out_data_append_to_screen(char *output, size_t remaining, - bool new_line) +static void out_data_append_to_screen(char *output, size_t *count, + bool eof) { - char *p = output, *end = output + remaining; + char *p = output, *end = output + *count; while (p < end) { if (*p == '\n' || *p == '\r' || *p == TAB || *p == BELL) { msg_putchar_attr((uint8_t)(*p), 0); p++; } else { + // Note: this is not 100% precise: + // 1. we don't check if received continuation bytes are already invalid + // and we thus do some buffering that could be avoided + // 2. we don't compose chars over buffer boundaries, even if we see an + // incomplete UTF-8 sequence that could be composing with the last + // complete sequence. + // This will be corrected when we switch to vterm based implementation int i = *p ? mb_ptr2len_len((char_u *)p, (int)(end-p)) : 1; + if (!eof && i == 1 && utf8len_tab_zero[*(uint8_t *)p] > (end-p) && *p) { + *count = (size_t)(p - output); + goto end; + } (void)msg_outtrans_len_attr((char_u *)p, i, 0); p += i; } } +end: ui_flush(); } static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data, bool eof) { - // We always output the whole buffer, so the buffer can never - // wrap around. size_t cnt; char *ptr = rbuffer_read_ptr(buf, &cnt); @@ -482,12 +492,16 @@ static void out_data_cb(Stream *stream, RBuffer *buf, size_t count, void *data, // Save the skipped output. If it is the final chunk, we display it later. out_data_ring(ptr, cnt); } else { - out_data_append_to_screen(ptr, cnt, eof); + out_data_append_to_screen(ptr, &cnt, eof); } if (cnt) { rbuffer_consumed(buf, cnt); } + + // Move remaining data to start of buffer, so the buffer can never + // wrap around. + rbuffer_reset(buf); } /// Parses a command string into a sequence of words, taking quotes into diff --git a/test/functional/fixtures/shell-test.c b/test/functional/fixtures/shell-test.c index 8dbec2aaeefbd5..38695ce76b0757 100644 --- a/test/functional/fixtures/shell-test.c +++ b/test/functional/fixtures/shell-test.c @@ -4,6 +4,13 @@ #include #include #include +#include + +static void wait(void) +{ + fflush(stdout); + usleep(10*1000); +} static void help(void) { @@ -61,6 +68,22 @@ int main(int argc, char **argv) for (uint8_t i = 0; i < number; i++) { printf("%d: %s\n", (int) i, argv[3]); } + } else if (strcmp(argv[1], "UTF-8") == 0) { + // test split-up UTF-8 sequence + printf("\xc3"); wait(); + printf("\xa5\n"); wait(); + + // split up a 2+2 grapheme clusters all possible ways + printf("ref: \xc3\xa5\xcc\xb2\n"); wait(); + + printf("1: \xc3"); wait(); + printf("\xa5\xcc\xb2\n"); wait(); + + printf("2: \xc3\xa5"); wait(); + printf("\xcc\xb2\n"); wait(); + + printf("3: \xc3\xa5\xcc"); wait(); + printf("\xb2\n"); wait(); } else { fprintf(stderr, "Unknown first argument\n"); return 3; diff --git a/test/functional/ui/output_spec.lua b/test/functional/ui/output_spec.lua index a1190d786b8d6a..749f6cd42f37d7 100644 --- a/test/functional/ui/output_spec.lua +++ b/test/functional/ui/output_spec.lua @@ -9,6 +9,7 @@ local feed_command = helpers.feed_command local iswin = helpers.iswin local clear = helpers.clear local command = helpers.command +local nvim_dir = helpers.nvim_dir describe("shell command :!", function() if helpers.pending_win32(pending) then return end @@ -196,5 +197,23 @@ describe("shell command :!", function() end) end) + it('handles multibyte sequences split over buffer boundaries', function() + command('cd '..nvim_dir) + feed_command('!./shell-test UTF-8') + -- Note: only the first example of split composed char works + screen:expect([[ + {1:~ }| + {1:~ }| + :!./shell-test UTF-8 | + å | + ref: å̲ | + 1: å̲ | + 2: å ̲ | + 3: å ̲ | + | + {3:Press ENTER or type command to continue}^ | + ]]) + end) + end) end)