diff --git a/runtime/autoload/health.vim b/runtime/autoload/health.vim index cbfd7c76a1b609..346b6e386eb72d 100644 --- a/runtime/autoload/health.vim +++ b/runtime/autoload/health.vim @@ -9,13 +9,20 @@ function! s:enhance_syntax() abort highlight link healthInfo ModeMsg syntax keyword healthSuccess SUCCESS - highlight link healthSuccess Function + highlight link healthSuccess ModeMsg syntax keyword healthSuggestion SUGGESTIONS highlight link healthSuggestion String + syntax match healthHelp "|.\{-}|" contains=healthBar + syntax match healthBar "|" contained conceal + highlight link healthHelp Identifier + " We do not care about markdown syntax errors in :CheckHealth output. highlight! link markdownError Normal + + " We don't need code blocks. + syntax clear markdownCodeBlock endfunction " Runs the specified healthchecks. @@ -28,6 +35,8 @@ function! health#check(plugin_names) abort tabnew setlocal wrap breakindent setlocal filetype=markdown bufhidden=wipe + setlocal conceallevel=2 concealcursor=nc + setlocal keywordprg=:help call s:enhance_syntax() if empty(healthchecks) @@ -78,6 +87,11 @@ function! s:indent_after_line1(s, columns) abort return join(lines, "\n") endfunction +" Changes ':help clipboard' to '|clipoard|'. Also removes surrounding quotes. +function! s:help_to_link(s) abort + return substitute(a:s, '\v[''"]?:h%[elp] ([^''"]+)[''"]?', '|\1|', 'g') +endfunction + " Format a message for a specific report item function! s:format_report_message(status, msg, ...) abort " {{{ let output = ' - ' . a:status . ': ' . s:indent_after_line1(a:msg, 4) @@ -99,7 +113,7 @@ function! s:format_report_message(status, msg, ...) abort " {{{ let output .= "\n - " . s:indent_after_line1(suggestion, 10) endfor - return output + return s:help_to_link(output) endfunction " }}} " Use {msg} to report information in the current section diff --git a/runtime/autoload/health/provider.vim b/runtime/autoload/health/provider.vim index d4b2f07a17ed1b..b3af3d50bc2619 100644 --- a/runtime/autoload/health/provider.vim +++ b/runtime/autoload/health/provider.vim @@ -45,6 +45,19 @@ function! s:download(url) abort return 'missing `curl` and `python`, cannot make pypi request' endfunction +" Check for clipboard tools. +function! s:check_clipboard() abort + call health#report_start('Clipboard') + + let clipboard_tool = provider#clipboard#Executable() + if empty(clipboard_tool) + call health#report_warn( + \ "No clipboard tool found. Using the system clipboard won't work.", + \ ['See |clipboard|.']) + else + call health#report_ok('Clipboard tool found: '. clipboard_tool) + endif +endfunction " Get the latest Neovim Python client version from PyPI. function! s:latest_pypi_version() abort @@ -371,6 +384,7 @@ function! s:check_ruby() abort endfunction function! health#provider#check() abort + call s:check_clipboard() call s:check_python(2) call s:check_python(3) call s:check_ruby() diff --git a/runtime/autoload/provider/clipboard.vim b/runtime/autoload/provider/clipboard.vim index 0f4aa78ddd0fb5..f63ad5730b9e78 100644 --- a/runtime/autoload/provider/clipboard.vim +++ b/runtime/autoload/provider/clipboard.vim @@ -31,34 +31,51 @@ function! s:try_cmd(cmd, ...) endfunction let s:cache_enabled = 1 -if executable('pbcopy') - let s:copy['+'] = 'pbcopy' - let s:paste['+'] = 'pbpaste' - let s:copy['*'] = s:copy['+'] - let s:paste['*'] = s:paste['+'] - let s:cache_enabled = 0 -elseif exists('$DISPLAY') && executable('xsel') - let s:copy['+'] = 'xsel --nodetach -i -b' - let s:paste['+'] = 'xsel -o -b' - let s:copy['*'] = 'xsel --nodetach -i -p' - let s:paste['*'] = 'xsel -o -p' -elseif exists('$DISPLAY') && executable('xclip') - let s:copy['+'] = 'xclip -quiet -i -selection clipboard' - let s:paste['+'] = 'xclip -o -selection clipboard' - let s:copy['*'] = 'xclip -quiet -i -selection primary' - let s:paste['*'] = 'xclip -o -selection primary' -elseif executable('lemonade') - let s:copy['+'] = 'lemonade copy' - let s:paste['+'] = 'lemonade paste' - let s:copy['*'] = 'lemonade copy' - let s:paste['*'] = 'lemonade paste' -elseif executable('doitclient') - let s:copy['+'] = 'doitclient wclip' - let s:paste['+'] = 'doitclient wclip -r' - let s:copy['*'] = s:copy['+'] - let s:paste['*'] = s:paste['+'] -else - echom 'clipboard: No clipboard tool available. See :help clipboard' +let s:err = '' + +function! provider#clipboard#Error() abort + return s:err +endfunction + +function! provider#clipboard#Executable() abort + if executable('pbcopy') + let s:copy['+'] = 'pbcopy' + let s:paste['+'] = 'pbpaste' + let s:copy['*'] = s:copy['+'] + let s:paste['*'] = s:paste['+'] + let s:cache_enabled = 0 + return 'pbcopy' + elseif exists('$DISPLAY') && executable('xsel') + let s:copy['+'] = 'xsel --nodetach -i -b' + let s:paste['+'] = 'xsel -o -b' + let s:copy['*'] = 'xsel --nodetach -i -p' + let s:paste['*'] = 'xsel -o -p' + return 'xsel' + elseif exists('$DISPLAY') && executable('xclip') + let s:copy['+'] = 'xclip -quiet -i -selection clipboard' + let s:paste['+'] = 'xclip -o -selection clipboard' + let s:copy['*'] = 'xclip -quiet -i -selection primary' + let s:paste['*'] = 'xclip -o -selection primary' + return 'xclip' + elseif executable('lemonade') + let s:copy['+'] = 'lemonade copy' + let s:paste['+'] = 'lemonade paste' + let s:copy['*'] = 'lemonade copy' + let s:paste['*'] = 'lemonade paste' + return 'lemonade' + elseif executable('doitclient') + let s:copy['+'] = 'doitclient wclip' + let s:paste['+'] = 'doitclient wclip -r' + let s:copy['*'] = s:copy['+'] + let s:paste['*'] = s:paste['+'] + return 'doitclient' + endif + + let s:err = 'clipboard: No clipboard tool available. See :help clipboard' + return '' +endfunction + +if empty(provider#clipboard#Executable()) finish endif diff --git a/runtime/doc/eval.txt b/runtime/doc/eval.txt index 3ce0d1cd87c17b..5c704837d6cfdc 100644 --- a/runtime/doc/eval.txt +++ b/runtime/doc/eval.txt @@ -1029,8 +1029,8 @@ A string constant accepts these special characters: \x. byte specified with one hex number (must be followed by non-hex char) \X.. same as \x.. \X. same as \x. -\u.... character specified with up to 4 hex numbers, stored according to the - current value of 'encoding' (e.g., "\u02a4") +\u.... character specified with up to 4 hex numbers, stored as UTF-8 + (e.g., "\u02a4") \U.... same as \u but allows up to 8 hex numbers. \b backspace \e escape @@ -1045,8 +1045,7 @@ A string constant accepts these special characters: utf-8 character, use \uxxxx as mentioned above. Note that "\xff" is stored as the byte 255, which may be invalid in some -encodings. Use "\u00ff" to store character 255 according to the current value -of 'encoding'. +encodings. Use "\u00ff" to store character 255 correctly as UTF-8. Note that "\000" and "\x00" force the end of the string. @@ -2532,8 +2531,6 @@ byteidxcomp({expr}, {nr}) *byteidxcomp()* < The first and third echo result in 3 ('e' plus composing character is 3 bytes), the second echo results in 1 ('e' is one byte). - Only works different from byteidx() when 'encoding' is set to - a Unicode encoding. call({func}, {arglist} [, {dict}]) *call()* *E699* Call function {func} with the items in |List| {arglist} as @@ -2568,11 +2565,11 @@ char2nr({expr}[, {utf8}]) *char2nr()* Return number value of the first char in {expr}. Examples: > char2nr(" ") returns 32 char2nr("ABC") returns 65 -< When {utf8} is omitted or zero, the current 'encoding' is used. - Example for "utf-8": > char2nr("á") returns 225 char2nr("á"[0]) returns 195 -< With {utf8} set to 1, always treat as utf-8 characters. +< Non-ASCII characters are always treated as UTF-8 characters. + {utf8} has no effect, and exists only for + backwards-compatibility. A combining character is a separate character. |nr2char()| does the opposite. @@ -4225,11 +4222,7 @@ iconv({expr}, {from}, {to}) *iconv()* Most conversions require Vim to be compiled with the |+iconv| feature. Otherwise only UTF-8 to latin1 conversion and back can be done. - This can be used to display messages with special characters, - no matter what 'encoding' is set to. Write the message in - UTF-8 and use: > - echo iconv(utf8_str, "utf-8", &enc) -< Note that Vim uses UTF-8 for all Unicode encodings, conversion + Note that Vim uses UTF-8 for all Unicode encodings, conversion from/to UCS-2 is automatically changed to use UTF-8. You cannot use UCS-2 in a string anyway, because of the NUL bytes. {only available when compiled with the |+multi_byte| feature} @@ -4513,9 +4506,7 @@ join({list} [, {sep}]) *join()* json_decode({expr}) *json_decode()* Convert {expr} from JSON object. Accepts |readfile()|-style list as the input, as well as regular string. May output any - Vim value. When 'encoding' is not UTF-8 string is converted - from UTF-8 to 'encoding', failing conversion fails - json_decode(). In the following cases it will output + Vim value. In the following cases it will output |msgpack-special-dict|: 1. Dictionary contains duplicate key. 2. Dictionary contains empty key. @@ -4523,33 +4514,22 @@ json_decode({expr}) *json_decode()* dictionary and for string will be emitted in case string with NUL byte was a dictionary key. - Note: function treats its input as UTF-8 always regardless of - 'encoding' value. This is needed because JSON source is - supposed to be external (e.g. |readfile()|) and JSON standard - allows only a few encodings, of which UTF-8 is recommended and - the only one required to be supported. Non-UTF-8 characters - are an error. + Note: function treats its input as UTF-8 always. The JSON + standard allows only a few encodings, of which UTF-8 is + recommended and the only one required to be supported. + Non-UTF-8 characters are an error. json_encode({expr}) *json_encode()* Convert {expr} into a JSON string. Accepts - |msgpack-special-dict| as the input. Converts from 'encoding' - to UTF-8 when encoding strings. Will not convert |Funcref|s, + |msgpack-special-dict| as the input. Will not convert |Funcref|s, mappings with non-string keys (can be created as |msgpack-special-dict|), values with self-referencing containers, strings which contain non-UTF-8 characters, pseudo-UTF-8 strings which contain codepoints reserved for surrogate pairs (such strings are not valid UTF-8 strings). - When converting 'encoding' is taken into account, if it is not - "utf-8", then conversion is performed before encoding strings. Non-printable characters are converted into "\u1234" escapes or special escapes like "\t", other are dumped as-is. - Note: all characters above U+0079 are considered non-printable - when 'encoding' is not UTF-8. This function always outputs - UTF-8 strings as required by the standard thus when 'encoding' - is not unicode resulting string will look incorrect if - "\u1234" notation is not used. - keys({dict}) *keys()* Return a |List| with all the keys of {dict}. The |List| is in arbitrary order. @@ -4651,9 +4631,9 @@ line2byte({lnum}) *line2byte()* Return the byte count from the start of the buffer for line {lnum}. This includes the end-of-line character, depending on the 'fileformat' option for the current buffer. The first - line returns 1. 'encoding' matters, 'fileencoding' is ignored. - This can also be used to get the byte count for the line just - below the last line: > + line returns 1. UTF-8 encoding is used, 'fileencoding' is + ignored. This can also be used to get the byte count for the + line just below the last line: > line2byte(line("$") + 1) < This is the buffer size plus one. If 'fileencoding' is empty it is the file size plus one. @@ -5172,10 +5152,10 @@ nr2char({expr}[, {utf8}]) *nr2char()* value {expr}. Examples: > nr2char(64) returns "@" nr2char(32) returns " " -< When {utf8} is omitted or zero, the current 'encoding' is used. - Example for "utf-8": > +< Example for "utf-8": > nr2char(300) returns I with bow character -< With {utf8} set to 1, always return utf-8 characters. +< UTF-8 encoding is always used, {utf8} option has no effect, + and exists only for backwards-compatibility. Note that a NUL character in the file is specified with nr2char(10), because NULs are represented with newline characters. nr2char(0) is a real NUL and terminates the @@ -5417,7 +5397,7 @@ py3eval({expr}) *py3eval()* converted to Vim data structures. Numbers and strings are returned as they are (strings are copied though, Unicode strings are additionally converted to - 'encoding'). + UTF-8). Lists are represented as Vim |List| type. Dictionaries are represented as Vim |Dictionary| type with keys converted to strings. @@ -5467,8 +5447,7 @@ readfile({fname} [, {binary} [, {max}]]) Otherwise: - CR characters that appear before a NL are removed. - Whether the last line ends in a NL or not does not matter. - - When 'encoding' is Unicode any UTF-8 byte order mark is - removed from the text. + - Any UTF-8 byte order mark is removed from the text. When {max} is given this specifies the maximum number of lines to be read. Useful if you only want to check the first ten lines of a file: > @@ -6621,8 +6600,7 @@ string({expr}) Return {expr} converted to a String. If {expr} is a Number, for infinite and NaN floating-point values representations which use |str2float()|. Strings are also dumped literally, only single quote is escaped, which does not allow using YAML - for parsing back binary strings (including text when - 'encoding' is not UTF-8). |eval()| should always work for + for parsing back binary strings. |eval()| should always work for strings and floats though and this is the only official method, use |msgpackdump()| or |json_encode()| if you need to share data with other application. diff --git a/runtime/doc/mbyte.txt b/runtime/doc/mbyte.txt index c87ed317d455be..3bdb682a314f04 100644 --- a/runtime/doc/mbyte.txt +++ b/runtime/doc/mbyte.txt @@ -70,29 +70,24 @@ See |mbyte-locale| for details. ENCODING -If your locale works properly, Vim will try to set the 'encoding' option -accordingly. If this doesn't work you can overrule its value: > +Nvim always uses UTF-8 internally. Thus 'encoding' option is always set +to "utf-8" and cannot be changed. - :set encoding=utf-8 +All the text that is used inside Vim will be in UTF-8. Not only the text in +the buffers, but also in registers, variables, etc. -See |encoding-values| for a list of acceptable values. - -The result is that all the text that is used inside Vim will be in this -encoding. Not only the text in the buffers, but also in registers, variables, -etc. 'encoding' is read-only after startup because changing it would make the -existing text invalid. - -You can edit files in another encoding than what 'encoding' is set to. Vim +You can edit files in different encodings than UTF-8. Nvim will convert the file when you read it and convert it back when you write it. See 'fileencoding', 'fileencodings' and |++enc|. DISPLAY AND FONTS -If you are working in a terminal (emulator) you must make sure it accepts the -same encoding as which Vim is working with. +If you are working in a terminal (emulator) you must make sure it accepts +UTF-8, the encoding which Vim is working with. Otherwise only ASCII can +be displayed and edited correctly. -For the GUI you must select fonts that work with the current 'encoding'. This +For the GUI you must select fonts that work with UTF-8. This is the difficult part. It depends on the system you are using, the locale and a few other things. See the chapters on fonts: |mbyte-fonts-X11| for X-Windows and |mbyte-fonts-MSwin| for MS-Windows. @@ -216,10 +211,9 @@ You could make a small shell script for this. ============================================================================== 3. Encoding *mbyte-encoding* -Vim uses the 'encoding' option to specify how characters are identified and -encoded when they are used inside Vim. This applies to all the places where -text is used, including buffers (files loaded into memory), registers and -variables. +In Nvim UTF-8 is always used internally to encode characters. + This applies to all the places where text is used, including buffers (files + loaded into memory), registers and variables. *charset* *codeset* Charset is another name for encoding. There are subtle differences, but these @@ -240,7 +234,7 @@ matter what language is used. Thus you might see the right text even when the encoding was set wrong. *encoding-names* -Vim can use many different character encodings. There are three major groups: +Vim can edit files in different character encodings. There are three major groups: 1 8bit Single-byte encodings, 256 different characters. Mostly used in USA and Europe. Example: ISO-8859-1 (Latin1). All @@ -255,11 +249,10 @@ u Unicode Universal encoding, can replace all others. ISO 10646. Millions of different characters. Example: UTF-8. The relation between bytes and screen cells is complex. -Other encodings cannot be used by Vim internally. But files in other +Only UTF-8 is used by Vim internally. But files in other encodings can be edited by using conversion, see 'fileencoding'. -Note that all encodings must use ASCII for the characters up to 128. -Supported 'encoding' values are: *encoding-values* +Recognized 'fileencoding' values include: *encoding-values* 1 latin1 8-bit characters (ISO 8859-1, also used for cp1252) 1 iso-8859-n ISO_8859 variant (n = 2 to 15) 1 koi8-r Russian @@ -311,11 +304,11 @@ u ucs-4 32 bit UCS-4 encoded Unicode (ISO/IEC 10646-1) u ucs-4le like ucs-4, little endian The {name} can be any encoding name that your system supports. It is passed -to iconv() to convert between the encoding of the file and the current locale. +to iconv() to convert between UTF-8 and the encoding of the file. For MS-Windows "cp{number}" means using codepage {number}. Examples: > - :set encoding=8bit-cp1252 - :set encoding=2byte-cp932 + :set fileencoding=8bit-cp1252 + :set fileencoding=2byte-cp932 The MS-Windows codepage 1252 is very similar to latin1. For practical reasons the same encoding is used and it's called latin1. 'isprint' can be used to @@ -337,8 +330,7 @@ u ucs-2be same as ucs-2 (big endian) u ucs-4be same as ucs-4 (big endian) u utf-32 same as ucs-4 u utf-32le same as ucs-4le - default stands for the default value of 'encoding', depends on the - environment + default the encoding of the current locale. For the UCS codes the byte order matters. This is tricky, use UTF-8 whenever you can. The default is to use big-endian (most significant byte comes @@ -363,13 +355,12 @@ or when conversion is not possible: CONVERSION *charset-conversion* Vim will automatically convert from one to another encoding in several places: -- When reading a file and 'fileencoding' is different from 'encoding' -- When writing a file and 'fileencoding' is different from 'encoding' +- When reading a file and 'fileencoding' is different from "utf-8" +- When writing a file and 'fileencoding' is different from "utf-8" - When displaying messages and the encoding used for LC_MESSAGES differs from - 'encoding' (requires a gettext version that supports this). + "utf-8" (requires a gettext version that supports this). - When reading a Vim script where |:scriptencoding| is different from - 'encoding'. -- When reading or writing a |shada| file. + "utf-8". Most of these require the |+iconv| feature. Conversion for reading and writing files may also be specified with the 'charconvert' option. @@ -408,11 +399,11 @@ Useful utilities for converting the charset: *mbyte-conversion* -When reading and writing files in an encoding different from 'encoding', +When reading and writing files in an encoding different from "utf-8", conversion needs to be done. These conversions are supported: - All conversions between Latin-1 (ISO-8859-1), UTF-8, UCS-2 and UCS-4 are handled internally. -- For MS-Windows, when 'encoding' is a Unicode encoding, conversion from and +- For MS-Windows, conversion from and to any codepage should work. - Conversion specified with 'charconvert' - Conversion with the iconv library, if it is available. @@ -468,8 +459,6 @@ and you will have a working UTF-8 terminal emulator. Try both > with the demo text that comes with ucs-fonts.tar.gz in order to see whether there are any problems with UTF-8 in your xterm. -For Vim you may need to set 'encoding' to "utf-8". - ============================================================================== 5. Fonts on X11 *mbyte-fonts-X11* @@ -864,11 +853,11 @@ between two keyboard settings. The value of the 'keymap' option specifies a keymap file to use. The name of this file is one of these two: - keymap/{keymap}_{encoding}.vim + keymap/{keymap}_utf-8.vim keymap/{keymap}.vim -Here {keymap} is the value of the 'keymap' option and {encoding} of the -'encoding' option. The file name with the {encoding} included is tried first. +Here {keymap} is the value of the 'keymap' option. +The file name with "utf-8" included is tried first. 'runtimepath' is used to find these files. To see an overview of all available keymap files, use this: > @@ -950,7 +939,7 @@ this is unusual. But you can use various ways to specify the character: > A octal value x special key name -The characters are assumed to be encoded for the current value of 'encoding'. +The characters are assumed to be encoded in UTF-8. It's possible to use ":scriptencoding" when all characters are given literally. That doesn't work when using the construct, because the conversion is done on the keymap file, not on the resulting character. @@ -1170,21 +1159,13 @@ Useful commands: message is truncated, use ":messages"). - "g8" shows the bytes used in a UTF-8 character, also the composing characters, as hex numbers. -- ":set encoding=utf-8 fileencodings=" forces using UTF-8 for all files. The - default is to use the current locale for 'encoding' and set 'fileencodings' - to automatically detect the encoding of a file. +- ":set fileencodings=" forces using UTF-8 for all files. The + default is to automatically detect the encoding of a file. STARTING VIM -If your current locale is in an utf-8 encoding, Vim will automatically start -in utf-8 mode. - -If you are using another locale: > - - set encoding=utf-8 - -You might also want to select the font used for the menus. Unfortunately this +You might want to select the font used for the menus. Unfortunately this doesn't always work. See the system specific remarks below, and 'langmenu'. @@ -1245,10 +1226,9 @@ not everybody is able to type a composing character. These options are relevant for editing multi-byte files. Check the help in options.txt for detailed information. -'encoding' Encoding used for the keyboard and display. It is also the - default encoding for files. +'encoding' Internal text encoding, always "utf-8". -'fileencoding' Encoding of a file. When it's different from 'encoding' +'fileencoding' Encoding of a file. When it's different from "utf-8" conversion is done when reading or writing the file. 'fileencodings' List of possible encodings of a file. When opening a file diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 79e5ff090f2b86..94c5e2d11d1cf2 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -52,7 +52,6 @@ achieve special effects. These options come in three forms: :se[t] all& Set all options to their default value. The values of these options are not changed: 'columns' - 'encoding' 'lines' Warning: This may have a lot of side effects. @@ -615,7 +614,6 @@ A jump table for the options with a short description can be found at |Q_op|. global {only available when compiled with the |+multi_byte| feature} - Only effective when 'encoding' is "utf-8" or another Unicode encoding. Tells Vim what to do with characters with East Asian Width Class Ambiguous (such as Euro, Registered Sign, Copyright Sign, Greek letters, Cyrillic letters). @@ -668,7 +666,6 @@ A jump table for the options with a short description can be found at |Q_op|. - Set the 'keymap' option to "arabic"; in Insert mode CTRL-^ toggles between typing English and Arabic key mapping. - Set the 'delcombine' option - Note that 'encoding' must be "utf-8" for working with Arabic text. Resetting this option will: - Reset the 'rightleft' option. @@ -1078,8 +1075,7 @@ A jump table for the options with a short description can be found at |Q_op|. {not available when compiled without the |+linebreak| feature} This option lets you choose which characters might cause a line - break if 'linebreak' is on. Only works for ASCII and also for 8-bit - characters when 'encoding' is an 8-bit encoding. + break if 'linebreak' is on. Only works for ASCII characters. *'breakindent'* *'bri'* 'breakindent' 'bri' boolean (default off) @@ -1214,11 +1210,9 @@ A jump table for the options with a short description can be found at |Q_op|. Specifies details about changing the case of letters. It may contain these words, separated by a comma: internal Use internal case mapping functions, the current - locale does not change the case mapping. This only - matters when 'encoding' is a Unicode encoding, - "latin1" or "iso-8859-15". When "internal" is - omitted, the towupper() and towlower() system library - functions are used when available. + locale does not change the case mapping. When + "internal" is omitted, the towupper() and towlower() + system library functions are used when available. keepascii For the ASCII characters (0x00 to 0x7f) use the US case mapping, the current locale is not effective. This probably only matters for Turkish. @@ -1271,13 +1265,12 @@ A jump table for the options with a short description can be found at |Q_op|. file to convert from. You will have to save the text in a file first. The expression must return zero or an empty string for success, non-zero for failure. - The possible encoding names encountered are in 'encoding'. + See |encoding-names| for possible encoding names. Additionally, names given in 'fileencodings' and 'fileencoding' are used. Conversion between "latin1", "unicode", "ucs-2", "ucs-4" and "utf-8" is done internally by Vim, 'charconvert' is not used for this. - 'charconvert' is also used to convert the shada file, if 'encoding' is - not "utf-8". Also used for Unicode conversion. + Also used for Unicode conversion. Example: > set charconvert=CharConvert() fun CharConvert() @@ -1292,8 +1285,6 @@ A jump table for the options with a short description can be found at |Q_op|. v:fname_in name of the input file v:fname_out name of the output file Note that v:fname_in and v:fname_out will never be the same. - Note that v:charconvert_from and v:charconvert_to may be different - from 'encoding'. Vim internally uses UTF-8 instead of UCS-2 or UCS-4. This option cannot be set from a |modeline| or in the |sandbox|, for security reasons. @@ -2140,44 +2131,14 @@ A jump table for the options with a short description can be found at |Q_op|. *'encoding'* *'enc'* *E543* -'encoding' 'enc' string (default: "utf-8") - global - {only available when compiled with the |+multi_byte| - feature} - Sets the character encoding used inside Vim. It applies to text in - the buffers, registers, Strings in expressions, text stored in the - shada file, etc. It sets the kind of characters which Vim can work - with. See |encoding-names| for the possible values. +'encoding' 'enc' Removed. |vim-differences| {Nvim} + Nvim always uses UTF-8 internally. RPC communication + (remote plugins/GUIs) must use UTF-8 strings. - 'encoding' cannot be changed after startup, because (1) it causes - non-ASCII text inside Vim to become invalid, and (2) it complicates - runtime logic. The recommended 'encoding' is "utf-8". Remote plugins - and GUIs only support utf-8. See |multibyte|. - - The character encoding of files can be different from 'encoding'. + The character encoding of files can be different than UTF-8. This is specified with 'fileencoding'. The conversion is done with iconv() or as specified with 'charconvert'. - If you need to know whether 'encoding' is a multi-byte encoding, you - can use: > - if has("multi_byte_encoding") -< - When you set this option, it fires the |EncodingChanged| autocommand - event so that you can set up fonts if necessary. - - When the option is set, the value is converted to lowercase. Thus - you can set it with uppercase values too. Underscores are translated - to '-' signs. - When the encoding is recognized, it is changed to the standard name. - For example "Latin-1" becomes "latin1", "ISO_88592" becomes - "iso-8859-2" and "utf8" becomes "utf-8". - - When "unicode", "ucs-2" or "ucs-4" is used, Vim internally uses utf-8. - You don't notice this while editing, but it does matter for the - |shada-file|. And Vim expects the terminal to use utf-8 too. Thus - setting 'encoding' to one of these values instead of utf-8 only has - effect for encoding used for files when 'fileencoding' is empty. - *'endofline'* *'eol'* *'noendofline'* *'noeol'* 'endofline' 'eol' boolean (default on) local to buffer @@ -2304,20 +2265,14 @@ A jump table for the options with a short description can be found at |Q_op|. feature} Sets the character encoding for the file of this buffer. - When 'fileencoding' is different from 'encoding', conversion will be + When 'fileencoding' is different from "utf-8", conversion will be done when writing the file. For reading see below. - When 'fileencoding' is empty, the same value as 'encoding' will be - used (no conversion when reading or writing a file). - Conversion will also be done when 'encoding' and 'fileencoding' are - both a Unicode encoding and 'fileencoding' is not utf-8. That's - because internally Unicode is always stored as utf-8. - WARNING: Conversion can cause loss of information! When - 'encoding' is "utf-8" or another Unicode encoding, conversion - is most likely done in a way that the reverse conversion - results in the same text. When 'encoding' is not "utf-8" some - characters may be lost! - - See 'encoding' for the possible values. Additionally, values may be + When 'fileencoding' is empty, the file will be saved with utf-8 + encoding. (no conversion when reading or writing a file). + WARNING: Conversion to a non-Unicode encoding can cause loss of + information! + + See |encoding-names| for the possible values. Additionally, values may be specified that can be handled by the converter, see |mbyte-conversion|. @@ -2330,8 +2285,8 @@ A jump table for the options with a short description can be found at |Q_op|. Prepending "8bit-" and "2byte-" has no meaning here, they are ignored. When the option is set, the value is converted to lowercase. Thus you can set it with uppercase values too. '_' characters are - replaced with '-'. If a name is recognized from the list for - 'encoding', it is replaced by the standard name. For example + replaced with '-'. If a name is recognized from the list at + |encoding-names|, it is replaced by the standard name. For example "ISO8859-2" becomes "iso-8859-2". When this option is set, after starting to edit a file, the 'modified' @@ -2354,12 +2309,8 @@ A jump table for the options with a short description can be found at |Q_op|. mentioned character encoding. If an error is detected, the next one in the list is tried. When an encoding is found that works, 'fileencoding' is set to it. If all fail, 'fileencoding' is set to - an empty string, which means the value of 'encoding' is used. - WARNING: Conversion can cause loss of information! When - 'encoding' is "utf-8" (or one of the other Unicode variants) - conversion is most likely done in a way that the reverse - conversion results in the same text. When 'encoding' is not - "utf-8" some non-ASCII characters may be lost! You can use + an empty string, which means that UTF-8 is used. + WARNING: Conversion can cause loss of information! You can use the |++bad| argument to specify what is done with characters that can't be converted. For an empty file or a file with only ASCII characters most encodings @@ -2385,11 +2336,11 @@ A jump table for the options with a short description can be found at |Q_op|. because Vim cannot detect an error, thus the encoding is always accepted. The special value "default" can be used for the encoding from the - environment. It is useful when 'encoding' is set to "utf-8" and - your environment uses a non-latin1 encoding, such as Russian. - When 'encoding' is "utf-8" and a file contains an illegal byte - sequence it won't be recognized as UTF-8. You can use the |8g8| - command to find the illegal byte sequence. + environment. It is useful when your environment uses a non-latin1 + encoding, such as Russian. + When a file contains an illegal UTF-8 byte sequence it won't be + recognized as "utf-8". You can use the |8g8| command to find the + illegal byte sequence. WRONG VALUES: WHAT'S WRONG: latin1,utf-8 "latin1" will always be used utf-8,ucs-bom,latin1 BOM won't be recognized in an utf-8 @@ -3048,8 +2999,7 @@ A jump table for the options with a short description can be found at |Q_op|. Note: The size of these fonts must be exactly twice as wide as the one specified with 'guifont' and the same height. - 'guifontwide' is only used when 'encoding' is set to "utf-8" and - 'guifontset' is empty or invalid. + 'guifontwide' is only used when 'guifontset' is empty or invalid. When 'guifont' is set and a valid font is found in it and 'guifontwide' is empty Vim will attempt to find a matching double-width font and set 'guifontwide' to it. @@ -3711,7 +3661,7 @@ A jump table for the options with a short description can be found at |Q_op|. 128 - 159 "~@" - "~_" 160 - 254 "| " - "|~" 255 "~?" - When 'encoding' is a Unicode one, illegal bytes from 128 to 255 are + Illegal bytes from 128 to 255 (invalid UTF-8) are displayed as , with the hexadecimal value of the byte. When 'display' contains "uhex" all unprintable characters are displayed as . @@ -3990,8 +3940,7 @@ A jump table for the options with a short description can be found at |Q_op|. omitted. The characters ':' and ',' should not be used. UTF-8 characters can - be used when 'encoding' is "utf-8", otherwise only printable - characters are allowed. All characters must be single width. + be used. All characters must be single width. Examples: > :set lcs=tab:>-,trail:- @@ -4088,7 +4037,6 @@ A jump table for the options with a short description can be found at |Q_op|. {only available when compiled with the |+multi_byte| feature} The maximum number of combining characters supported for displaying. - Only used when 'encoding' is "utf-8". The default is OK for most languages. Hebrew may require 4. Maximum value is 6. Even when this option is set to 2 you can still edit text with more @@ -5835,9 +5783,6 @@ A jump table for the options with a short description can be found at |Q_op|. (_xx is an underscore, two letters and followed by a non-letter). This is mainly for testing purposes. You must make sure the correct encoding is used, Vim doesn't check it. - When 'encoding' is set the word lists are reloaded. Thus it's a good - idea to set 'spelllang' after setting 'encoding' to avoid loading the - files twice. How the related spell files are found is explained here: |spell-load|. If the |spellfile.vim| plugin is active and you use a language name diff --git a/runtime/doc/vim_diff.txt b/runtime/doc/vim_diff.txt index 07729a43ee8489..fc0659ef6f12cb 100644 --- a/runtime/doc/vim_diff.txt +++ b/runtime/doc/vim_diff.txt @@ -40,7 +40,6 @@ these differences. - 'complete' doesn't include "i" - 'directory' defaults to ~/.local/share/nvim/swap// (|xdg|), auto-created - 'display' defaults to "lastline" -- 'encoding' defaults to "utf-8" - 'formatoptions' defaults to "tcqj" - 'history' defaults to 10000 (the maximum) - 'hlsearch' is set by default @@ -124,15 +123,12 @@ Functions: |msgpackdump()|, |msgpackparse()| provide msgpack de/serialization Events: - |TabNew| |TabNewEntered| - |TabClosed| |TermOpen| |TermClose| |TextYankPost| Highlight groups: - |hl-EndOfBuffer| |hl-Substitute| |hl-QuickFixLine| |hl-TermCursor| @@ -164,7 +160,7 @@ are always available and may be used simultaneously in separate plugins. The 'p')) mkdir() will silently exit. In Vim this was an error. 3. mkdir() error messages now include strerror() text when mkdir fails. -'encoding' cannot be changed after startup. +'encoding' is always "utf-8". |string()| and |:echo| behaviour changed: 1. No maximum recursion depth limit is applied to nested container @@ -271,6 +267,7 @@ Highlight groups: Other options: 'antialias' 'cpoptions' ("g", "w", "H", "*", "-", "j", and all POSIX flags were removed) + 'encoding' ("utf-8" is always used) 'guioptions' "t" flag was removed *'guipty'* (Nvim uses pipes and PTYs consistently on all platforms.) *'imactivatefunc'* *'imaf'* diff --git a/scripts/gendispatch.lua b/scripts/gendispatch.lua index 94789e1ef00e8e..397ccc9aaf1f45 100644 --- a/scripts/gendispatch.lua +++ b/scripts/gendispatch.lua @@ -232,8 +232,14 @@ for i = 1, #functions do converted = 'arg_'..j local rt = real_type(param[1]) if rt ~= 'Object' then - output:write('\n if (args.items['..(j - 1)..'].type == kObjectType'..rt..') {') - output:write('\n '..converted..' = args.items['..(j - 1)..'].data.'..rt:lower()..';') + if rt:match('^Buffer$') or rt:match('^Window$') or rt:match('^Tabpage$') then + -- Buffer, Window, and Tabpage have a specific type, but are stored in integer + output:write('\n if (args.items['..(j - 1)..'].type == kObjectType'..rt..' && args.items['..(j - 1)..'].data.integer >= 0) {') + output:write('\n '..converted..' = (handle_T)args.items['..(j - 1)..'].data.integer;') + else + output:write('\n if (args.items['..(j - 1)..'].type == kObjectType'..rt..') {') + output:write('\n '..converted..' = args.items['..(j - 1)..'].data.'..rt:lower()..';') + end if rt:match('^Buffer$') or rt:match('^Window$') or rt:match('^Tabpage$') or rt:match('^Boolean$') then -- accept nonnegative integers for Booleans, Buffers, Windows and Tabpages output:write('\n } else if (args.items['..(j - 1)..'].type == kObjectTypeInteger && args.items['..(j - 1)..'].data.integer >= 0) {') diff --git a/src/nvim/api/private/defs.h b/src/nvim/api/private/defs.h index 1d5ecd3071a0c9..223aab09dced74 100644 --- a/src/nvim/api/private/defs.h +++ b/src/nvim/api/private/defs.h @@ -91,9 +91,6 @@ typedef enum { struct object { ObjectType type; union { - Buffer buffer; - Window window; - Tabpage tabpage; Boolean boolean; Integer integer; Float floating; diff --git a/src/nvim/api/private/helpers.c b/src/nvim/api/private/helpers.c index 208c3b53c8eeec..bd83b1ff1d9137 100644 --- a/src/nvim/api/private/helpers.c +++ b/src/nvim/api/private/helpers.c @@ -616,13 +616,14 @@ bool object_to_vim(Object obj, typval_T *tv, Error *err) case kObjectTypeWindow: case kObjectTypeTabpage: case kObjectTypeInteger: - if (obj.data.integer > INT_MAX || obj.data.integer < INT_MIN) { + if (obj.data.integer > VARNUMBER_MAX + || obj.data.integer < VARNUMBER_MIN) { api_set_error(err, Validation, _("Integer value outside range")); return false; } tv->v_type = VAR_NUMBER; - tv->vval.v_number = (int)obj.data.integer; + tv->vval.v_number = (varnumber_T)obj.data.integer; break; case kObjectTypeFloat: diff --git a/src/nvim/api/private/helpers.h b/src/nvim/api/private/helpers.h index a946e35149c3d8..9fe8c351cff78e 100644 --- a/src/nvim/api/private/helpers.h +++ b/src/nvim/api/private/helpers.h @@ -37,15 +37,15 @@ #define BUFFER_OBJ(s) ((Object) { \ .type = kObjectTypeBuffer, \ - .data.buffer = s }) + .data.integer = s }) #define WINDOW_OBJ(s) ((Object) { \ .type = kObjectTypeWindow, \ - .data.window = s }) + .data.integer = s }) #define TABPAGE_OBJ(s) ((Object) { \ .type = kObjectTypeTabpage, \ - .data.tabpage = s }) + .data.integer = s }) #define ARRAY_OBJ(a) ((Object) { \ .type = kObjectTypeArray, \ diff --git a/src/nvim/api/window.c b/src/nvim/api/window.c index ef881fa0eb4839..1f555a6a050bcb 100644 --- a/src/nvim/api/window.c +++ b/src/nvim/api/window.c @@ -348,7 +348,7 @@ Tabpage nvim_win_get_tabpage(Window window, Error *err) /// @return Window number Integer nvim_win_get_number(Window window, Error *err) { - Integer rv = 0; + int rv = 0; win_T *win = find_window_by_handle(window, err); if (!win) { @@ -356,7 +356,7 @@ Integer nvim_win_get_number(Window window, Error *err) } int tabnr; - win_get_tabwin(window, &tabnr, (int *)&rv); + win_get_tabwin(window, &tabnr, &rv); return rv; } diff --git a/src/nvim/buffer_defs.h b/src/nvim/buffer_defs.h index ab5987612c3239..2e6316c74a91cf 100644 --- a/src/nvim/buffer_defs.h +++ b/src/nvim/buffer_defs.h @@ -488,9 +488,9 @@ struct file_buffer { bool file_id_valid; FileID file_id; - bool b_changed; /* 'modified': Set to true if something in the - file has been changed and not written out. */ - int b_changedtick; /* incremented for each change, also for undo */ + int b_changed; // 'modified': Set to true if something in the + // file has been changed and not written out. + int b_changedtick; // incremented for each change, also for undo bool b_saving; /* Set to true if we are in the middle of saving the buffer. */ @@ -655,7 +655,7 @@ struct file_buffer { long b_p_sts; ///< 'softtabstop' long b_p_sts_nopaste; ///< b_p_sts saved for paste mode char_u *b_p_sua; ///< 'suffixesadd' - bool b_p_swf; ///< 'swapfile' + int b_p_swf; ///< 'swapfile' long b_p_smc; ///< 'synmaxcol' char_u *b_p_syn; ///< 'syntax' long b_p_ts; ///< 'tabstop' diff --git a/src/nvim/charset.c b/src/nvim/charset.c index 61c5b108082165..c501b7e83fdc99 100644 --- a/src/nvim/charset.c +++ b/src/nvim/charset.c @@ -1612,9 +1612,7 @@ bool vim_islower(int c) return false; } - if (enc_latin1like) { - return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; - } + return (latin1flags[c] & LATIN1LOWER) == LATIN1LOWER; } return islower(c); } @@ -1643,9 +1641,7 @@ bool vim_isupper(int c) return false; } - if (enc_latin1like) { - return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; - } + return (latin1flags[c] & LATIN1UPPER) == LATIN1UPPER; } return isupper(c); } @@ -1670,9 +1666,7 @@ int vim_toupper(int c) return c; } - if (enc_latin1like) { - return latin1upper[c]; - } + return latin1upper[c]; } return TOUPPER_LOC(c); } @@ -1697,9 +1691,7 @@ int vim_tolower(int c) return c; } - if (enc_latin1like) { - return latin1lower[c]; - } + return latin1lower[c]; } return TOLOWER_LOC(c); } diff --git a/src/nvim/eval.c b/src/nvim/eval.c index 512555eac1d622..5d4241c8af1529 100644 --- a/src/nvim/eval.c +++ b/src/nvim/eval.c @@ -15612,6 +15612,39 @@ static void f_strftime(typval_T *argvars, typval_T *rettv, FunPtr fptr) } } +// "strgetchar()" function +static void f_strgetchar(typval_T *argvars, typval_T *rettv, FunPtr fptr) +{ + char_u *str; + int len; + int error = false; + int charidx; + + rettv->vval.v_number = -1; + str = get_tv_string_chk(&argvars[0]); + if (str == NULL) { + return; + } + len = (int)STRLEN(str); + charidx = get_tv_number_chk(&argvars[1], &error); + if (error) { + return; + } + + { + int byteidx = 0; + + while (charidx >= 0 && byteidx < len) { + if (charidx == 0) { + rettv->vval.v_number = mb_ptr2char(str + byteidx); + break; + } + charidx--; + byteidx += mb_cptr2len(str + byteidx); + } + } +} + /* * "stridx()" function */ @@ -15712,6 +15745,64 @@ static void f_strwidth(typval_T *argvars, typval_T *rettv, FunPtr fptr) rettv->vval.v_number = (varnumber_T) mb_string2cells(s); } +// "strcharpart()" function +static void f_strcharpart(typval_T *argvars, typval_T *rettv, FunPtr fptr) { + char_u *p; + int nchar; + int nbyte = 0; + int charlen; + int len = 0; + int slen; + int error = false; + + p = get_tv_string(&argvars[0]); + slen = (int)STRLEN(p); + + nchar = get_tv_number_chk(&argvars[1], &error); + if (!error) { + if (nchar > 0) { + while (nchar > 0 && nbyte < slen) { + nbyte += mb_cptr2len(p + nbyte); + nchar--; + } + } else { + nbyte = nchar; + } + } + if (argvars[2].v_type != VAR_UNKNOWN) { + charlen = get_tv_number(&argvars[2]); + while (charlen > 0 && nbyte + len < slen) { + int off = nbyte + len; + + if (off < 0) { + len += 1; + } else { + len += mb_cptr2len(p + off); + } + charlen--; + } + } else { + len = slen - nbyte; // default: all bytes that are available. + } + + // Only return the overlap between the specified part and the actual + // string. + if (nbyte < 0) { + len += nbyte; + nbyte = 0; + } else if (nbyte > slen) { + nbyte = slen; + } + if (len < 0) { + len = 0; + } else if (nbyte + len > slen) { + len = slen - nbyte; + } + + rettv->v_type = VAR_STRING; + rettv->vval.v_string = vim_strnsave(p + nbyte, len); +} + /* * "strpart()" function */ diff --git a/src/nvim/eval.lua b/src/nvim/eval.lua index eaaee8153344b6..bea25b36f3bfae 100644 --- a/src/nvim/eval.lua +++ b/src/nvim/eval.lua @@ -268,9 +268,11 @@ return { sqrt={args=1, func="float_op_wrapper", data="&sqrt"}, str2float={args=1}, str2nr={args={1, 2}}, + strcharpart={args={2, 3}}, strchars={args={1,2}}, strdisplaywidth={args={1, 2}}, strftime={args={1, 2}}, + strgetchar={args={2, 2}}, stridx={args={2, 3}}, string={args=1}, strlen={args=1}, diff --git a/src/nvim/ex_cmds.c b/src/nvim/ex_cmds.c index f8bb29db50f7ff..c96ab581bdfadc 100644 --- a/src/nvim/ex_cmds.c +++ b/src/nvim/ex_cmds.c @@ -4130,61 +4130,66 @@ void ex_global(exarg_T *eap) vim_regfree(regmatch.regprog); } -/* - * Execute "cmd" on lines marked with ml_setmarked(). - */ +/// Execute `cmd` on lines marked with ml_setmarked(). void global_exe(char_u *cmd) { - linenr_T old_lcount; /* b_ml.ml_line_count before the command */ - buf_T *old_buf = curbuf; /* remember what buffer we started in */ - linenr_T lnum; /* line number according to old situation */ - - /* - * Set current position only once for a global command. - * If global_busy is set, setpcmark() will not do anything. - * If there is an error, global_busy will be incremented. - */ + linenr_T old_lcount; // b_ml.ml_line_count before the command + buf_T *old_buf = curbuf; // remember what buffer we started in + linenr_T lnum; // line number according to old situation + int save_mapped_ctrl_c = mapped_ctrl_c; + + // Set current position only once for a global command. + // If global_busy is set, setpcmark() will not do anything. + // If there is an error, global_busy will be incremented. setpcmark(); - /* When the command writes a message, don't overwrite the command. */ - msg_didout = TRUE; + // When the command writes a message, don't overwrite the command. + msg_didout = true; + // Disable CTRL-C mapping, let it interrupt (potentially long output). + mapped_ctrl_c = 0; sub_nsubs = 0; sub_nlines = 0; - global_need_beginline = FALSE; + global_need_beginline = false; global_busy = 1; old_lcount = curbuf->b_ml.ml_line_count; + while (!got_int && (lnum = ml_firstmarked()) != 0 && global_busy == 1) { curwin->w_cursor.lnum = lnum; curwin->w_cursor.col = 0; - if (*cmd == NUL || *cmd == '\n') + if (*cmd == NUL || *cmd == '\n') { do_cmdline((char_u *)"p", NULL, NULL, DOCMD_NOWAIT); - else + } else { do_cmdline(cmd, NULL, NULL, DOCMD_NOWAIT); + } os_breakcheck(); } + mapped_ctrl_c = save_mapped_ctrl_c; global_busy = 0; - if (global_need_beginline) + if (global_need_beginline) { beginline(BL_WHITE | BL_FIX); - else - check_cursor(); /* cursor may be beyond the end of the line */ + } else { + check_cursor(); // cursor may be beyond the end of the line + } - /* the cursor may not have moved in the text but a change in a previous - * line may move it on the screen */ + // the cursor may not have moved in the text but a change in a previous + // line may move it on the screen changed_line_abv_curs(); - /* If it looks like no message was written, allow overwriting the - * command with the report for number of changes. */ - if (msg_col == 0 && msg_scrolled == 0) - msg_didout = FALSE; + // If it looks like no message was written, allow overwriting the + // command with the report for number of changes. + if (msg_col == 0 && msg_scrolled == 0) { + msg_didout = false; + } - /* If substitutes done, report number of substitutes, otherwise report - * number of extra or deleted lines. - * Don't report extra or deleted lines in the edge case where the buffer - * we are in after execution is different from the buffer we started in. */ - if (!do_sub_msg(false) && curbuf == old_buf) + // If substitutes done, report number of substitutes, otherwise report + // number of extra or deleted lines. + // Don't report extra or deleted lines in the edge case where the buffer + // we are in after execution is different from the buffer we started in. + if (!do_sub_msg(false) && curbuf == old_buf) { msgmore(curbuf->b_ml.ml_line_count - old_lcount); + } } #if defined(EXITFREE) diff --git a/src/nvim/fileio.c b/src/nvim/fileio.c index d6e669a67b3097..934d81dcd02559 100644 --- a/src/nvim/fileio.c +++ b/src/nvim/fileio.c @@ -4165,9 +4165,8 @@ static bool need_conversion(const char_u *fenc) same_encoding = (enc_flags != 0 && fenc_flags == enc_flags); } if (same_encoding) { - /* Specified encoding matches with 'encoding'. This requires - * conversion when 'encoding' is Unicode but not UTF-8. */ - return enc_unicode != 0; + // Specified file encoding matches UTF-8. + return false; } /* Encodings differ. However, conversion is not needed when 'enc' is any diff --git a/src/nvim/globals.h b/src/nvim/globals.h index 690be70c4de1b0..301a2c1663fe0f 100644 --- a/src/nvim/globals.h +++ b/src/nvim/globals.h @@ -778,44 +778,18 @@ EXTERN int vr_lines_changed INIT(= 0); /* #Lines changed by "gR" so far */ # define DBCS_2BYTE 1 /* 2byte- */ # define DBCS_DEBUG -1 -EXTERN int enc_dbcs INIT(= 0); /* One of DBCS_xxx values if - DBCS encoding */ -EXTERN int enc_unicode INIT(= 0); /* 2: UCS-2 or UTF-16, 4: UCS-4 */ -EXTERN bool enc_utf8 INIT(= false); /* UTF-8 encoded Unicode */ -EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */ -EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */ +// mbyte flags that used to depend on 'encoding'. These are now deprecated, as +// 'encoding' is always "utf-8". Code that use them can be refactored to +// remove dead code. +#define enc_dbcs false +#define enc_utf8 true +#define has_mbyte true /// Encoding used when 'fencs' is set to "default" EXTERN char_u *fenc_default INIT(= NULL); -/* - * To speed up BYTELEN() we fill a table with the byte lengths whenever - * enc_utf8 or enc_dbcs changes. - */ -EXTERN char mb_bytelen_tab[256]; - -/* - * Function pointers, used to quickly get to the right function. Each has - * three possible values: latin_ (8-bit), utfc_ or utf_ (utf-8) and dbcs_ - * (DBCS). - * The value is set in mb_init(); - */ -/* length of char in bytes, including following composing chars */ -EXTERN int (*mb_ptr2len)(const char_u *p) INIT(= latin_ptr2len); -/* idem, with limit on string length */ -EXTERN int (*mb_ptr2len_len)(const char_u *p, int size) INIT(= latin_ptr2len_len); -/* byte length of char */ -EXTERN int (*mb_char2len)(int c) INIT(= latin_char2len); -/* convert char to bytes, return the length */ -EXTERN int (*mb_char2bytes)(int c, char_u *buf) INIT(= latin_char2bytes); -EXTERN int (*mb_ptr2cells)(const char_u *p) INIT(= latin_ptr2cells); -EXTERN int (*mb_ptr2cells_len)(const char_u *p, int size) INIT( - = latin_ptr2cells_len); -EXTERN int (*mb_char2cells)(int c) INIT(= latin_char2cells); -EXTERN int (*mb_off2cells)(unsigned off, unsigned max_off) INIT( - = latin_off2cells); -EXTERN int (*mb_ptr2char)(const char_u *p) INIT(= latin_ptr2char); -EXTERN int (*mb_head_off)(const char_u *base, const char_u *p) INIT(= latin_head_off); +// To speed up BYTELEN() we keep a table with the byte lengths for utf-8 +EXTERN char utf8len_tab[256]; # if defined(USE_ICONV) && defined(DYNAMIC_ICONV) /* Pointers to functions and variables to be loaded at runtime */ diff --git a/src/nvim/macros.h b/src/nvim/macros.h index 503daa964805f8..79e545771eab05 100644 --- a/src/nvim/macros.h +++ b/src/nvim/macros.h @@ -122,32 +122,29 @@ /* Whether to draw the vertical bar on the right side of the cell. */ # define CURSOR_BAR_RIGHT (curwin->w_p_rl && (!(State & CMDLINE) || cmdmsg_rl)) -/* - * mb_ptr_adv(): advance a pointer to the next character, taking care of - * multi-byte characters if needed. - * mb_ptr_back(): backup a pointer to the previous character, taking care of - * multi-byte characters if needed. - * MB_COPY_CHAR(f, t): copy one char from "f" to "t" and advance the pointers. - * PTR2CHAR(): get character from pointer. - */ -/* Get the length of the character p points to */ -# define MB_PTR2LEN(p) (has_mbyte ? (*mb_ptr2len)(p) : 1) -/* Advance multi-byte pointer, skip over composing chars. */ -# define mb_ptr_adv(p) (p += has_mbyte ? (*mb_ptr2len)((char_u *)p) : 1) -/* Advance multi-byte pointer, do not skip over composing chars. */ -# define mb_cptr_adv(p) (p += \ - enc_utf8 ? utf_ptr2len(p) : has_mbyte ? (*mb_ptr2len)(p) : 1) -/* Backup multi-byte pointer. Only use with "p" > "s" ! */ -# define mb_ptr_back(s, p) (p -= has_mbyte ? ((*mb_head_off)((char_u *)s, (char_u *)p - 1) + 1) : 1) -/* get length of multi-byte char, not including composing chars */ -# define mb_cptr2len(p) (enc_utf8 ? utf_ptr2len(p) : (*mb_ptr2len)(p)) - -# define MB_COPY_CHAR(f, t) \ - if (has_mbyte) mb_copy_char((const char_u **)(&f), &t); \ - else *t++ = *f++ -# define MB_CHARLEN(p) (has_mbyte ? mb_charlen(p) : (int)STRLEN(p)) -# define MB_CHAR2LEN(c) (has_mbyte ? mb_char2len(c) : 1) -# define PTR2CHAR(p) (has_mbyte ? mb_ptr2char(p) : (int)*(p)) +// mb_ptr_adv(): advance a pointer to the next character, taking care of +// multi-byte characters if needed. +// mb_ptr_back(): backup a pointer to the previous character, taking care of +// multi-byte characters if needed. +// MB_COPY_CHAR(f, t): copy one char from "f" to "t" and advance the pointers. +// PTR2CHAR(): get character from pointer. + +// Get the length of the character p points to +# define MB_PTR2LEN(p) mb_ptr2len(p) +// Advance multi-byte pointer, skip over composing chars. +# define mb_ptr_adv(p) (p += mb_ptr2len((char_u *)p)) +// Advance multi-byte pointer, do not skip over composing chars. +# define mb_cptr_adv(p) (p += utf_ptr2len(p)) +// Backup multi-byte pointer. Only use with "p" > "s" ! +# define mb_ptr_back(s, p) (p -= mb_head_off((char_u *)s, (char_u *)p - 1) + 1) +// get length of multi-byte char, not including composing chars +# define mb_cptr2len(p) utf_ptr2len(p) + +# define MB_COPY_CHAR(f, t) mb_copy_char((const char_u **)(&f), &t); + +# define MB_CHARLEN(p) mb_charlen(p) +# define MB_CHAR2LEN(c) mb_char2len(c) +# define PTR2CHAR(p) mb_ptr2char(p) # define RESET_BINDING(wp) (wp)->w_p_scb = FALSE; (wp)->w_p_crb = FALSE diff --git a/src/nvim/main.c b/src/nvim/main.c index eb67483d08ac2e..ffd93532529742 100644 --- a/src/nvim/main.c +++ b/src/nvim/main.c @@ -177,7 +177,6 @@ void early_init(void) fs_init(); handle_init(); - (void)mb_init(); // init mb_bytelen_tab[] to ones eval_init(); // init global variables // Init the table of Normal mode commands. diff --git a/src/nvim/mbyte.c b/src/nvim/mbyte.c index e6312f9c00fdb7..7be0be71067c6c 100644 --- a/src/nvim/mbyte.c +++ b/src/nvim/mbyte.c @@ -1,68 +1,27 @@ -/* - * mbyte.c: Code specifically for handling multi-byte characters. - * Multibyte extensions partly by Sung-Hoon Baek - * - * The encoding used in the core is set with 'encoding'. When 'encoding' is - * changed, the following four variables are set (for speed). - * Currently these types of character encodings are supported: - * - * "enc_dbcs" When non-zero it tells the type of double byte character - * encoding (Chinese, Korean, Japanese, etc.). - * The cell width on the display is equal to the number of - * bytes. (exception: DBCS_JPNU with first byte 0x8e) - * Recognizing the first or second byte is difficult, it - * requires checking a byte sequence from the start. - * "enc_utf8" When TRUE use Unicode characters in UTF-8 encoding. - * The cell width on the display needs to be determined from - * the character value. - * Recognizing bytes is easy: 0xxx.xxxx is a single-byte - * char, 10xx.xxxx is a trailing byte, 11xx.xxxx is a leading - * byte of a multi-byte character. - * To make things complicated, up to six composing characters - * are allowed. These are drawn on top of the first char. - * For most editing the sequence of bytes with composing - * characters included is considered to be one character. - * "enc_unicode" When 2 use 16-bit Unicode characters (or UTF-16). - * When 4 use 32-but Unicode characters. - * Internally characters are stored in UTF-8 encoding to - * avoid NUL bytes. Conversion happens when doing I/O. - * "enc_utf8" will also be TRUE. - * - * "has_mbyte" is set when "enc_dbcs" or "enc_utf8" is non-zero. - * - * If none of these is TRUE, 8-bit bytes are used for a character. The - * encoding isn't currently specified (TODO). - * - * 'encoding' specifies the encoding used in the core. This is in registers, - * text manipulation, buffers, etc. Conversion has to be done when characters - * in another encoding are received or send: - * - * clipboard - * ^ - * | (2) - * V - * +---------------+ - * (1) | | (3) - * keyboard ----->| core |-----> display - * | | - * +---------------+ - * ^ - * | (4) - * V - * file - * - * (1) Typed characters arrive in the current locale. - * (2) Text will be made available with the encoding specified with - * 'encoding'. If this is not sufficient, system-specific conversion - * might be required. - * (3) For the GUI the correct font must be selected, no conversion done. - * (4) The encoding of the file is specified with 'fileencoding'. Conversion - * is to be done when it's different from 'encoding'. - * - * The ShaDa file is a special case: Only text is converted, not file names. - * Vim scripts may contain an ":encoding" command. This has an effect for - * some commands, like ":menutrans" - */ +/// mbyte.c: Code specifically for handling multi-byte characters. +/// Multibyte extensions partly by Sung-Hoon Baek +/// +/// The encoding used in nvim is always UTF-8. "enc_utf8" and "has_mbyte" is +/// thus always true. "enc_dbcs" is always zero. The 'encoding' option is +/// read-only and always reads "utf-8". +/// +/// The cell width on the display needs to be determined from the character +/// value. Recognizing UTF-8 bytes is easy: 0xxx.xxxx is a single-byte char, +/// 10xx.xxxx is a trailing byte, 11xx.xxxx is a leading byte of a multi-byte +/// character. To make things complicated, up to six composing characters +/// are allowed. These are drawn on top of the first char. For most editing +/// the sequence of bytes with composing characters included is considered to +/// be one character. +/// +/// UTF-8 is used everywhere in the core. This is in registers, text +/// manipulation, buffers, etc. Nvim core communicates with external plugins +/// and GUIs in this encoding. +/// +/// The encoding of a file is specified with 'fileencoding'. Conversion +/// is to be done when it's different from "utf-8". +/// +/// Vim scripts may contain an ":scriptencoding" command. This has an effect +/// for some commands, like ":menutrans". #include #include @@ -115,7 +74,7 @@ struct interval { * Bytes which are illegal when used as the first byte have a 1. * The NUL byte has length 1. */ -static char utf8len_tab[256] = +char utf8len_tab[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -384,207 +343,6 @@ int enc_canon_props(const char_u *name) return 0; } -/* - * Set up for using multi-byte characters. - * Called in three cases: - * - by main() to initialize (p_enc == NULL) - * - by set_init_1() after 'encoding' was set to its default. - * - by do_set() when 'encoding' has been set. - * p_enc must have been passed through enc_canonize() already. - * Sets the "enc_unicode", "enc_utf8", "enc_dbcs" and "has_mbyte" flags. - * Fills mb_bytelen_tab[] and returns NULL when there are no problems. - * When there is something wrong: Returns an error message and doesn't change - * anything. - */ -char_u * mb_init(void) -{ - int i; - int idx; - int n; - int enc_dbcs_new = 0; -#if defined(USE_ICONV) && !defined(WIN3264) && !defined(WIN32UNIX) \ - && !defined(MACOS) -# define LEN_FROM_CONV - vimconv_T vimconv; - char_u *p; -#endif - - if (p_enc == NULL) { - /* Just starting up: set the whole table to one's. */ - for (i = 0; i < 256; ++i) - mb_bytelen_tab[i] = 1; - return NULL; - } else if (STRNCMP(p_enc, "8bit-", 5) == 0 - || STRNCMP(p_enc, "iso-8859-", 9) == 0) { - /* Accept any "8bit-" or "iso-8859-" name. */ - enc_unicode = 0; - enc_utf8 = false; - } else if (STRNCMP(p_enc, "2byte-", 6) == 0) { - /* Unix: accept any "2byte-" name, assume current locale. */ - enc_dbcs_new = DBCS_2BYTE; - } else if ((idx = enc_canon_search(p_enc)) >= 0) { - i = enc_canon_table[idx].prop; - if (i & ENC_UNICODE) { - /* Unicode */ - enc_utf8 = true; - if (i & (ENC_2BYTE | ENC_2WORD)) - enc_unicode = 2; - else if (i & ENC_4BYTE) - enc_unicode = 4; - else - enc_unicode = 0; - } else if (i & ENC_DBCS) { - /* 2byte, handle below */ - enc_dbcs_new = enc_canon_table[idx].codepage; - } else { - /* Must be 8-bit. */ - enc_unicode = 0; - enc_utf8 = false; - } - } else /* Don't know what encoding this is, reject it. */ - return e_invarg; - - if (enc_dbcs_new != 0) { - enc_unicode = 0; - enc_utf8 = false; - } - enc_dbcs = enc_dbcs_new; - has_mbyte = (enc_dbcs != 0 || enc_utf8); - - - /* Detect an encoding that uses latin1 characters. */ - enc_latin1like = (enc_utf8 || STRCMP(p_enc, "latin1") == 0 - || STRCMP(p_enc, "iso-8859-15") == 0); - - /* - * Set the function pointers. - */ - if (enc_utf8) { - mb_ptr2len = utfc_ptr2len; - mb_ptr2len_len = utfc_ptr2len_len; - mb_char2len = utf_char2len; - mb_char2bytes = utf_char2bytes; - mb_ptr2cells = utf_ptr2cells; - mb_ptr2cells_len = utf_ptr2cells_len; - mb_char2cells = utf_char2cells; - mb_off2cells = utf_off2cells; - mb_ptr2char = utf_ptr2char; - mb_head_off = utf_head_off; - } else if (enc_dbcs != 0) { - mb_ptr2len = dbcs_ptr2len; - mb_ptr2len_len = dbcs_ptr2len_len; - mb_char2len = dbcs_char2len; - mb_char2bytes = dbcs_char2bytes; - mb_ptr2cells = dbcs_ptr2cells; - mb_ptr2cells_len = dbcs_ptr2cells_len; - mb_char2cells = dbcs_char2cells; - mb_off2cells = dbcs_off2cells; - mb_ptr2char = dbcs_ptr2char; - mb_head_off = dbcs_head_off; - } else { - mb_ptr2len = latin_ptr2len; - mb_ptr2len_len = latin_ptr2len_len; - mb_char2len = latin_char2len; - mb_char2bytes = latin_char2bytes; - mb_ptr2cells = latin_ptr2cells; - mb_ptr2cells_len = latin_ptr2cells_len; - mb_char2cells = latin_char2cells; - mb_off2cells = latin_off2cells; - mb_ptr2char = latin_ptr2char; - mb_head_off = latin_head_off; - } - - /* - * Fill the mb_bytelen_tab[] for MB_BYTE2LEN(). - */ -#ifdef LEN_FROM_CONV - /* When 'encoding' is different from the current locale mblen() won't - * work. Use conversion to "utf-8" instead. */ - vimconv.vc_type = CONV_NONE; - if (enc_dbcs) { - p = enc_locale(); - if (p == NULL || STRCMP(p, p_enc) != 0) { - convert_setup(&vimconv, p_enc, (char_u *)"utf-8"); - vimconv.vc_fail = true; - } - xfree(p); - } -#endif - - for (i = 0; i < 256; ++i) { - /* Our own function to reliably check the length of UTF-8 characters, - * independent of mblen(). */ - if (enc_utf8) - n = utf8len_tab[i]; - else if (enc_dbcs == 0) - n = 1; - else { - char buf[MB_MAXBYTES + 1]; - if (i == NUL) /* just in case mblen() can't handle "" */ - n = 1; - else { - buf[0] = i; - buf[1] = 0; -#ifdef LEN_FROM_CONV - if (vimconv.vc_type != CONV_NONE) { - /* - * string_convert() should fail when converting the first - * byte of a double-byte character. - */ - p = string_convert(&vimconv, (char_u *)buf, NULL); - if (p != NULL) { - xfree(p); - n = 1; - } else - n = 2; - } else -#endif - { - /* - * mblen() should return -1 for invalid (means the leading - * multibyte) character. However there are some platforms - * where mblen() returns 0 for invalid character. - * Therefore, following condition includes 0. - */ - ignored = mblen(NULL, 0); /* First reset the state. */ - if (mblen(buf, (size_t)1) <= 0) - n = 2; - else - n = 1; - } - } - } - mb_bytelen_tab[i] = n; - } - -#ifdef LEN_FROM_CONV - convert_setup(&vimconv, NULL, NULL); -#endif - - /* The cell width depends on the type of multi-byte characters. */ - (void)init_chartab(); - - /* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */ - screenalloc(false); - -#ifdef HAVE_WORKING_LIBINTL - /* GNU gettext 0.10.37 supports this feature: set the codeset used for - * translated messages independently from the current locale. */ - (void)bind_textdomain_codeset(PROJECT_NAME, - enc_utf8 ? "utf-8" : (char *)p_enc); -#endif - - - /* Fire an autocommand to let people do custom font setup. This must be - * after Vim has been setup for the new encoding. */ - apply_autocmds(EVENT_ENCODINGCHANGED, NULL, (char_u *)"", FALSE, curbuf); - - /* Need to reload spell dictionaries */ - spell_reload(); - - return NULL; -} - /* * Return the size of the BOM for the current buffer: * 0 - no BOM @@ -597,20 +355,15 @@ int bomb_size(void) int n = 0; if (curbuf->b_p_bomb && !curbuf->b_p_bin) { - if (*curbuf->b_p_fenc == NUL) { - if (enc_utf8) { - if (enc_unicode != 0) - n = enc_unicode; - else - n = 3; - } - } else if (STRCMP(curbuf->b_p_fenc, "utf-8") == 0) + if (*curbuf->b_p_fenc == NUL + || STRCMP(curbuf->b_p_fenc, "utf-8") == 0) { n = 3; - else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0 - || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) + } else if (STRNCMP(curbuf->b_p_fenc, "ucs-2", 5) == 0 + || STRNCMP(curbuf->b_p_fenc, "utf-16", 6) == 0) { n = 2; - else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0) + } else if (STRNCMP(curbuf->b_p_fenc, "ucs-4", 5) == 0) { n = 4; + } } return n; } @@ -803,99 +556,6 @@ int dbcs_class(unsigned lead, unsigned trail) return 3; } -/* - * mb_char2len() function pointer. - * Return length in bytes of character "c". - * Returns 1 for a single-byte character. - */ -int latin_char2len(int c) -{ - return 1; -} - -static int dbcs_char2len(int c) -{ - if (c >= 0x100) - return 2; - return 1; -} - -/* - * mb_char2bytes() function pointer. - * Convert a character to its bytes. - * Returns the length in bytes. - */ -int latin_char2bytes(int c, char_u *buf) -{ - buf[0] = c; - return 1; -} - -static int dbcs_char2bytes(int c, char_u *buf) -{ - if (c >= 0x100) { - buf[0] = (unsigned)c >> 8; - buf[1] = c; - /* Never use a NUL byte, it causes lots of trouble. It's an invalid - * character anyway. */ - if (buf[1] == NUL) - buf[1] = '\n'; - return 2; - } - buf[0] = c; - return 1; -} - -/* - * mb_ptr2len() function pointer. - * Get byte length of character at "*p" but stop at a NUL. - * For UTF-8 this includes following composing characters. - * Returns 0 when *p is NUL. - */ -int latin_ptr2len(const char_u *p) -{ - return MB_BYTE2LEN(*p); -} - -static int dbcs_ptr2len(const char_u *p) -{ - int len; - - /* Check if second byte is not missing. */ - len = MB_BYTE2LEN(*p); - if (len == 2 && p[1] == NUL) - len = 1; - return len; -} - -/* - * mb_ptr2len_len() function pointer. - * Like mb_ptr2len(), but limit to read "size" bytes. - * Returns 0 for an empty string. - * Returns 1 for an illegal char or an incomplete byte sequence. - */ -int latin_ptr2len_len(const char_u *p, int size) -{ - if (size < 1 || *p == NUL) - return 0; - return 1; -} - -static int dbcs_ptr2len_len(const char_u *p, int size) -{ - int len; - - if (size < 1 || *p == NUL) - return 0; - if (size == 1) - return 1; - /* Check that second byte is not missing. */ - len = MB_BYTE2LEN(*p); - if (len == 2 && p[1] == NUL) - len = 1; - return len; -} - /* * Return true if "c" is in "table". */ @@ -963,16 +623,8 @@ int utf_char2cells(int c) return 1; } -/* - * mb_ptr2cells() function pointer. - * Return the number of display cells character at "*p" occupies. - * This doesn't take care of unprintable characters, use ptr2cells() for that. - */ -int latin_ptr2cells(const char_u *p) -{ - return 1; -} - +/// Return the number of display cells character at "*p" occupies. +/// This doesn't take care of unprintable characters, use ptr2cells() for that. int utf_ptr2cells(const char_u *p) { int c; @@ -991,26 +643,9 @@ int utf_ptr2cells(const char_u *p) return 1; } -int dbcs_ptr2cells(const char_u *p) -{ - /* Number of cells is equal to number of bytes, except for euc-jp when - * the first byte is 0x8e. */ - if (enc_dbcs == DBCS_JPNU && *p == 0x8e) - return 1; - return MB_BYTE2LEN(*p); -} - -/* - * mb_ptr2cells_len() function pointer. - * Like mb_ptr2cells(), but limit string length to "size". - * For an empty string or truncated character returns 1. - */ -int latin_ptr2cells_len(const char_u *p, int size) -{ - return 1; -} - -static int utf_ptr2cells_len(const char_u *p, int size) +/// Like utf_ptr2cells(), but limit string length to "size". +/// For an empty string or truncated character returns 1. +int utf_ptr2cells_len(const char_u *p, int size) { int c; @@ -1030,35 +665,6 @@ static int utf_ptr2cells_len(const char_u *p, int size) return 1; } -static int dbcs_ptr2cells_len(const char_u *p, int size) -{ - /* Number of cells is equal to number of bytes, except for euc-jp when - * the first byte is 0x8e. */ - if (size <= 1 || (enc_dbcs == DBCS_JPNU && *p == 0x8e)) - return 1; - return MB_BYTE2LEN(*p); -} - -/* - * mb_char2cells() function pointer. - * Return the number of display cells character "c" occupies. - * Only takes care of multi-byte chars, not "^C" and such. - */ -int latin_char2cells(int c) -{ - return 1; -} - -static int dbcs_char2cells(int c) -{ - /* Number of cells is equal to number of bytes, except for euc-jp when - * the first byte is 0x8e. */ - if (enc_dbcs == DBCS_JPNU && ((unsigned)c >> 8) == 0x8e) - return 1; - /* use the first byte */ - return MB_BYTE2LEN((unsigned)c >> 8); -} - /// Calculate the number of cells occupied by string `str`. /// /// @param str The source string, may not be NULL, must be a NUL-terminated @@ -1075,50 +681,13 @@ size_t mb_string2cells(const char_u *str) return clen; } -/* - * mb_off2cells() function pointer. - * Return number of display cells for char at ScreenLines[off]. - * We make sure that the offset used is less than "max_off". - */ -int latin_off2cells(unsigned off, unsigned max_off) -{ - return 1; -} - -int dbcs_off2cells(unsigned off, unsigned max_off) -{ - /* never check beyond end of the line */ - if (off >= max_off) - return 1; - - /* Number of cells is equal to number of bytes, except for euc-jp when - * the first byte is 0x8e. */ - if (enc_dbcs == DBCS_JPNU && ScreenLines[off] == 0x8e) - return 1; - return MB_BYTE2LEN(ScreenLines[off]); -} - +/// Return number of display cells for char at ScreenLines[off]. +/// We make sure that the offset used is less than "max_off". int utf_off2cells(unsigned off, unsigned max_off) { return (off + 1 < max_off && ScreenLines[off + 1] == 0) ? 2 : 1; } -/* - * mb_ptr2char() function pointer. - * Convert a byte sequence into a character. - */ -int latin_ptr2char(const char_u *p) -{ - return *p; -} - -static int dbcs_ptr2char(const char_u *p) -{ - if (MB_BYTE2LEN(*p) > 1 && p[1] != NUL) - return (p[0] << 8) + p[1]; - return *p; -} - /* * Convert a UTF-8 byte sequence to a wide character. * If the sequence is illegal or truncated by a NUL the first byte is @@ -2065,68 +1634,9 @@ void show_utf8(void) msg(IObuff); } -/* - * mb_head_off() function pointer. - * Return offset from "p" to the first byte of the character it points into. - * If "p" points to the NUL at the end of the string return 0. - * Returns 0 when already at the first byte of a character. - */ -int latin_head_off(const char_u *base, const char_u *p) -{ - return 0; -} - -int dbcs_head_off(const char_u *base, const char_u *p) -{ - /* It can't be a trailing byte when not using DBCS, at the start of the - * string or the previous byte can't start a double-byte. */ - if (p <= base || MB_BYTE2LEN(p[-1]) == 1 || *p == NUL) { - return 0; - } - - /* This is slow: need to start at the base and go forward until the - * byte we are looking for. Return 1 when we went past it, 0 otherwise. */ - const char_u *q = base; - while (q < p) { - q += dbcs_ptr2len(q); - } - - return (q == p) ? 0 : 1; -} - -/* - * Special version of dbcs_head_off() that works for ScreenLines[], where - * single-width DBCS_JPNU characters are stored separately. - */ -int dbcs_screen_head_off(const char_u *base, const char_u *p) -{ - /* It can't be a trailing byte when not using DBCS, at the start of the - * string or the previous byte can't start a double-byte. - * For euc-jp an 0x8e byte in the previous cell always means we have a - * lead byte in the current cell. */ - if (p <= base - || (enc_dbcs == DBCS_JPNU && p[-1] == 0x8e) - || MB_BYTE2LEN(p[-1]) == 1 - || *p == NUL) - return 0; - - /* This is slow: need to start at the base and go forward until the - * byte we are looking for. Return 1 when we went past it, 0 otherwise. - * For DBCS_JPNU look out for 0x8e, which means the second byte is not - * stored as the next byte. */ - const char_u *q = base; - while (q < p) { - if (enc_dbcs == DBCS_JPNU && *q == 0x8e) { - ++q; - } - else { - q += dbcs_ptr2len(q); - } - } - - return (q == p) ? 0 : 1; -} - +/// Return offset from "p" to the first byte of the character it points into. +/// If "p" points to the NUL at the end of the string return 0. +/// Returns 0 when already at the first byte of a character. int utf_head_off(const char_u *base, const char_u *p) { int c; @@ -2232,26 +1742,20 @@ int mb_tail_off(char_u *base, char_u *p) if (*p == NUL) return 0; - if (enc_utf8) { - /* Find the last character that is 10xx.xxxx */ - for (i = 0; (p[i + 1] & 0xc0) == 0x80; ++i) - ; - /* Check for illegal sequence. */ - for (j = 0; p - j > base; ++j) - if ((p[-j] & 0xc0) != 0x80) - break; - if (utf8len_tab[p[-j]] != i + j + 1) - return 0; - return i; + // Find the last character that is 10xx.xxxx + for (i = 0; (p[i + 1] & 0xc0) == 0x80; i++) {} + + // Check for illegal sequence. + for (j = 0; p - j > base; j++) { + if ((p[-j] & 0xc0) != 0x80) { + break; + } } - /* It can't be the first byte if a double-byte when not using DBCS, at the - * end of the string or the byte can't start a double-byte. */ - if (enc_dbcs == 0 || p[1] == NUL || MB_BYTE2LEN(*p) == 1) + if (utf8len_tab[p[-j]] != i + j + 1) { return 0; - - /* Return 1 when on the lead byte, 0 when on the tail byte. */ - return 1 - dbcs_head_off(base, p); + } + return i; } /* @@ -2466,13 +1970,10 @@ int mb_fix_col(int col, int row) { col = check_col(col); row = check_row(row); - if (has_mbyte && ScreenLines != NULL && col > 0 - && ((enc_dbcs - && ScreenLines[LineOffset[row] + col] != NUL - && dbcs_screen_head_off(ScreenLines + LineOffset[row], - ScreenLines + LineOffset[row] + col)) - || (enc_utf8 && ScreenLines[LineOffset[row] + col] == 0))) + if (ScreenLines != NULL && col > 0 + && ScreenLines[LineOffset[row] + col] == 0) { return col - 1; + } return col; } diff --git a/src/nvim/mbyte.h b/src/nvim/mbyte.h index 0cfe2c4babdca9..2c92a0fbb2588c 100644 --- a/src/nvim/mbyte.h +++ b/src/nvim/mbyte.h @@ -9,8 +9,8 @@ * MB_BYTE2LEN_CHECK() can be used to count a special key as one byte. * Don't call MB_BYTE2LEN(b) with b < 0 or b > 255! */ -#define MB_BYTE2LEN(b) mb_bytelen_tab[b] -#define MB_BYTE2LEN_CHECK(b) (((b) < 0 || (b) > 255) ? 1 : mb_bytelen_tab[b]) +#define MB_BYTE2LEN(b) utf8len_tab[b] +#define MB_BYTE2LEN_CHECK(b) (((b) < 0 || (b) > 255) ? 1 : utf8len_tab[b]) /* properties used in enc_canon_table[] (first three mutually exclusive) */ #define ENC_8BIT 0x01 @@ -28,6 +28,18 @@ #define ENC_LATIN9 0x400 /* Latin9 */ #define ENC_MACROMAN 0x800 /* Mac Roman (not Macro Man! :-) */ +// TODO(bfredl): eventually we should keep only one of the namings +#define mb_ptr2len utfc_ptr2len +#define mb_ptr2len_len utfc_ptr2len_len +#define mb_char2len utf_char2len +#define mb_char2bytes utf_char2bytes +#define mb_ptr2cells utf_ptr2cells +#define mb_ptr2cells_len utf_ptr2cells_len +#define mb_char2cells utf_char2cells +#define mb_off2cells utf_off2cells +#define mb_ptr2char utf_ptr2char +#define mb_head_off utf_head_off + #ifdef INCLUDE_GENERATED_DECLARATIONS # include "mbyte.h.generated.h" #endif diff --git a/src/nvim/msgpack_rpc/helpers.c b/src/nvim/msgpack_rpc/helpers.c index 14e1c2d9782996..c3a909692f8662 100644 --- a/src/nvim/msgpack_rpc/helpers.c +++ b/src/nvim/msgpack_rpc/helpers.c @@ -21,7 +21,8 @@ static msgpack_zone zone; static msgpack_sbuffer sbuffer; #define HANDLE_TYPE_CONVERSION_IMPL(t, lt) \ - bool msgpack_rpc_to_##lt(const msgpack_object *const obj, t *const arg) \ + bool msgpack_rpc_to_##lt(const msgpack_object *const obj, \ + Integer *const arg) \ FUNC_ATTR_NONNULL_ALL \ { \ if (obj->type != MSGPACK_OBJECT_EXT \ @@ -44,12 +45,12 @@ static msgpack_sbuffer sbuffer; return true; \ } \ \ - void msgpack_rpc_from_##lt(t o, msgpack_packer *res) \ + void msgpack_rpc_from_##lt(Integer o, msgpack_packer *res) \ FUNC_ATTR_NONNULL_ARG(2) \ { \ msgpack_packer pac; \ msgpack_packer_init(&pac, &sbuffer, msgpack_sbuffer_write); \ - msgpack_pack_int64(&pac, o); \ + msgpack_pack_int64(&pac, (handle_T)o); \ msgpack_pack_ext(res, sbuffer.size, kObjectType##t); \ msgpack_pack_ext_body(res, sbuffer.data, sbuffer.size); \ msgpack_sbuffer_clear(&sbuffer); \ @@ -213,17 +214,17 @@ bool msgpack_rpc_to_object(const msgpack_object *const obj, Object *const arg) switch (cur.mobj->via.ext.type) { case kObjectTypeBuffer: { cur.aobj->type = kObjectTypeBuffer; - ret = msgpack_rpc_to_buffer(cur.mobj, &cur.aobj->data.buffer); + ret = msgpack_rpc_to_buffer(cur.mobj, &cur.aobj->data.integer); break; } case kObjectTypeWindow: { cur.aobj->type = kObjectTypeWindow; - ret = msgpack_rpc_to_window(cur.mobj, &cur.aobj->data.window); + ret = msgpack_rpc_to_window(cur.mobj, &cur.aobj->data.integer); break; } case kObjectTypeTabpage: { cur.aobj->type = kObjectTypeTabpage; - ret = msgpack_rpc_to_tabpage(cur.mobj, &cur.aobj->data.tabpage); + ret = msgpack_rpc_to_tabpage(cur.mobj, &cur.aobj->data.integer); break; } } @@ -369,15 +370,15 @@ void msgpack_rpc_from_object(const Object result, msgpack_packer *const res) break; } case kObjectTypeBuffer: { - msgpack_rpc_from_buffer(cur.aobj->data.buffer, res); + msgpack_rpc_from_buffer(cur.aobj->data.integer, res); break; } case kObjectTypeWindow: { - msgpack_rpc_from_window(cur.aobj->data.window, res); + msgpack_rpc_from_window(cur.aobj->data.integer, res); break; } case kObjectTypeTabpage: { - msgpack_rpc_from_tabpage(cur.aobj->data.tabpage, res); + msgpack_rpc_from_tabpage(cur.aobj->data.integer, res); break; } case kObjectTypeArray: { diff --git a/src/nvim/ops.c b/src/nvim/ops.c index 388a72adcebfce..0263bd15da94e6 100644 --- a/src/nvim/ops.c +++ b/src/nvim/ops.c @@ -1936,8 +1936,7 @@ int swapchar(int op_type, pos_T *pos) if (c >= 0x80 && op_type == OP_ROT13) return FALSE; - if (op_type == OP_UPPER && c == 0xdf - && (enc_latin1like || STRCMP(p_enc, "iso-8859-2") == 0)) { + if (op_type == OP_UPPER && c == 0xdf) { pos_T sp = curwin->w_cursor; /* Special handling of German sharp s: change to "SS". */ diff --git a/src/nvim/option.c b/src/nvim/option.c index ca66f84a7028e1..3bd1ce217ec1cf 100644 --- a/src/nvim/option.c +++ b/src/nvim/option.c @@ -781,14 +781,11 @@ void set_init_1(void) } fenc_default = p; - // Initialize multibyte (utf-8) handling - mb_init(); - - // Don't change &encoding when resetting to defaults with ":set all&". - opt_idx = findoption((char_u *)"encoding"); - if (opt_idx >= 0) { - options[opt_idx].flags |= P_NODEFAULT; - } +#ifdef HAVE_WORKING_LIBINTL + // GNU gettext 0.10.37 supports this feature: set the codeset used for + // translated messages independently from the current locale. + (void)bind_textdomain_codeset(PROJECT_NAME, (char *)p_enc); +#endif /* Set the default for 'helplang'. */ set_helplang_default(get_mess_lang()); @@ -2528,7 +2525,7 @@ did_set_string_option ( else if (varp == &p_sbo) { if (check_opt_strings(p_sbo, p_scbopt_values, TRUE) != OK) errmsg = e_invarg; - } else if (varp == &p_ambw || (bool *)varp == &p_emoji) { + } else if (varp == &p_ambw || (int *)varp == &p_emoji) { // 'ambiwidth' if (check_opt_strings(p_ambw, p_ambw_values, false) != OK) { errmsg = e_invarg; @@ -2581,19 +2578,17 @@ did_set_string_option ( errmsg = e_invarg; /* 'encoding' and 'fileencoding' */ } else if (varp == &p_enc || gvarp == &p_fenc) { - if (varp == &p_enc && did_source_startup_scripts) { - errmsg = e_afterinit; - } else if (gvarp == &p_fenc) { - if (!MODIFIABLE(curbuf) && opt_flags != OPT_GLOBAL) + if (gvarp == &p_fenc) { + if (!MODIFIABLE(curbuf) && opt_flags != OPT_GLOBAL) { errmsg = e_modifiable; - else if (vim_strchr(*varp, ',') != NULL) - /* No comma allowed in 'fileencoding'; catches confusing it - * with 'fileencodings'. */ + } else if (vim_strchr(*varp, ',') != NULL) { + // No comma allowed in 'fileencoding'; catches confusing it + // with 'fileencodings'. errmsg = e_invarg; - else { - /* May show a "+" in the title now. */ + } else { + // May show a "+" in the title now. redraw_titles(); - /* Add 'fileencoding' to the swap file. */ + // Add 'fileencoding' to the swap file. ml_setflags(curbuf); } } @@ -2604,17 +2599,12 @@ did_set_string_option ( xfree(*varp); *varp = p; if (varp == &p_enc) { - errmsg = mb_init(); - redraw_titles(); + // only encoding=utf-8 allowed + if (STRCMP(p_enc, "utf-8") != 0) { + errmsg = e_invarg; + } } } - - if (errmsg == NULL) { - /* When 'keymap' is used and 'encoding' changes, reload the keymap - * (with another encoding). */ - if (varp == &p_enc && *curbuf->b_p_keymap != NUL) - (void)keymap_init(); - } } else if (varp == &p_penc) { /* Canonize printencoding if VIM standard one */ p = enc_canonize(p_penc); @@ -3711,23 +3701,19 @@ set_bool_option ( } } } - } - - /* - * When 'lisp' option changes include/exclude '-' in - * keyword characters. - */ - else if (varp == (char_u *)&(curbuf->b_p_lisp)) { - (void)buf_init_chartab(curbuf, FALSE); /* ignore errors */ - } - /* when 'title' changed, may need to change the title; same for 'icon' */ - else if ((int *)varp == &p_title) { - did_set_title(FALSE); + } else if (varp == (char_u *)&(curbuf->b_p_lisp)) { + // When 'lisp' option changes include/exclude '-' in + // keyword characters. + (void)buf_init_chartab(curbuf, false); // ignore errors + } else if ((int *)varp == &p_title) { + // when 'title' changed, may need to change the title; same for 'icon' + did_set_title(false); } else if ((int *)varp == &p_icon) { - did_set_title(TRUE); - } else if ((bool *)varp == &curbuf->b_changed) { - if (!value) - save_file_ff(curbuf); /* Buffer is unchanged */ + did_set_title(true); + } else if ((int *)varp == &curbuf->b_changed) { + if (!value) { + save_file_ff(curbuf); // Buffer is unchanged + } redraw_titles(); modified_was_set = value; } @@ -3755,11 +3741,12 @@ set_bool_option ( else if ((int *)varp == &curwin->w_p_wrap) { if (curwin->w_p_wrap) curwin->w_leftcol = 0; - } else if ((bool *)varp == &p_ea) { - if (p_ea && !old_value) + } else if ((int *)varp == &p_ea) { + if (p_ea && !old_value) { win_equal(curwin, false, 0); - } else if ((bool *)varp == &p_acd) { - /* Change directories when the 'acd' option is set now. */ + } + } else if ((int *)varp == &p_acd) { + // Change directories when the 'acd' option is set now. do_autochdir(); } /* 'diff' */ @@ -4518,10 +4505,11 @@ get_option_value ( else { /* Special case: 'modified' is b_changed, but we also want to consider * it set when 'ff' or 'fenc' changed. */ - if ((bool *)varp == &curbuf->b_changed) + if ((int *)varp == &curbuf->b_changed) { *numval = curbufIsChanged(); - else + } else { *numval = *(int *)varp; + } } return 1; } @@ -4889,14 +4877,15 @@ showoneopt ( varp = get_varp_scope(p, opt_flags); - /* for 'modified' we also need to check if 'ff' or 'fenc' changed. */ - if ((p->flags & P_BOOL) && ((bool *)varp == &curbuf->b_changed - ? !curbufIsChanged() : !*(bool *)varp)) + // for 'modified' we also need to check if 'ff' or 'fenc' changed. + if ((p->flags & P_BOOL) && ((int *)varp == &curbuf->b_changed + ? !curbufIsChanged() : !*(int *)varp)) { MSG_PUTS("no"); - else if ((p->flags & P_BOOL) && *(int *)varp < 0) + } else if ((p->flags & P_BOOL) && *(int *)varp < 0) { MSG_PUTS("--"); - else + } else { MSG_PUTS(" "); + } MSG_PUTS(p->fullname); if (!(p->flags & P_BOOL)) { msg_putchar('='); diff --git a/src/nvim/option_defs.h b/src/nvim/option_defs.h index 57ad5f5d1a7a82..6e89a093c8f12a 100644 --- a/src/nvim/option_defs.h +++ b/src/nvim/option_defs.h @@ -1,8 +1,6 @@ #ifndef NVIM_OPTION_DEFS_H #define NVIM_OPTION_DEFS_H -#include - #include "nvim/types.h" #include "nvim/macros.h" // For EXTERN @@ -296,16 +294,16 @@ enum { * The following are actual variables for the options */ -EXTERN long p_aleph; /* 'aleph' */ -EXTERN bool p_acd; /* 'autochdir' */ -EXTERN char_u *p_ambw; /* 'ambiwidth' */ -EXTERN int p_ar; /* 'autoread' */ -EXTERN int p_aw; /* 'autowrite' */ -EXTERN int p_awa; /* 'autowriteall' */ -EXTERN char_u *p_bs; /* 'backspace' */ -EXTERN char_u *p_bg; /* 'background' */ -EXTERN int p_bk; /* 'backup' */ -EXTERN char_u *p_bkc; /* 'backupcopy' */ +EXTERN long p_aleph; // 'aleph' +EXTERN int p_acd; // 'autochdir' +EXTERN char_u *p_ambw; // 'ambiwidth' +EXTERN int p_ar; // 'autoread' +EXTERN int p_aw; // 'autowrite' +EXTERN int p_awa; // 'autowriteall' +EXTERN char_u *p_bs; // 'backspace' +EXTERN char_u *p_bg; // 'background' +EXTERN int p_bk; // 'backup' +EXTERN char_u *p_bkc; // 'backupcopy' EXTERN unsigned int bkc_flags; ///< flags from 'backupcopy' #ifdef IN_OPTION_C static char *(p_bkc_values[]) = @@ -403,9 +401,9 @@ static char *(p_dy_values[]) = { "lastline", "truncate", "uhex", NULL }; #define DY_TRUNCATE 0x002 #define DY_UHEX 0x004 EXTERN int p_ed; // 'edcompatible' -EXTERN bool p_emoji; // 'emoji' +EXTERN int p_emoji; // 'emoji' EXTERN char_u *p_ead; // 'eadirection' -EXTERN bool p_ea; // 'equalalways' +EXTERN int p_ea; // 'equalalways' EXTERN char_u *p_ep; // 'equalprg' EXTERN int p_eb; // 'errorbells' EXTERN char_u *p_ef; // 'errorfile' @@ -417,7 +415,7 @@ EXTERN int p_ek; // 'esckeys' EXTERN int p_exrc; // 'exrc' EXTERN char_u *p_fencs; // 'fileencodings' EXTERN char_u *p_ffs; // 'fileformats' -EXTERN bool p_fic; // 'fileignorecase' +EXTERN int p_fic; // 'fileignorecase' EXTERN char_u *p_fcl; // 'foldclose' EXTERN long p_fdls; // 'foldlevelstart' EXTERN char_u *p_fdo; // 'foldopen' @@ -623,7 +621,7 @@ EXTERN long p_titlelen; ///< 'titlelen' EXTERN char_u *p_titleold; ///< 'titleold' EXTERN char_u *p_titlestring; ///< 'titlestring' EXTERN char_u *p_tsr; ///< 'thesaurus' -EXTERN bool p_tgc; ///< 'termguicolors' +EXTERN int p_tgc; ///< 'termguicolors' EXTERN int p_ttimeout; ///< 'ttimeout' EXTERN long p_ttm; ///< 'ttimeoutlen' EXTERN char_u *p_udir; ///< 'undodir' @@ -652,26 +650,26 @@ char_u *p_vfile = (char_u *)""; /* used before options are initialized */ #else extern char_u *p_vfile; /* 'verbosefile' */ #endif -EXTERN int p_warn; /* 'warn' */ -EXTERN char_u *p_wop; /* 'wildoptions' */ -EXTERN long p_window; /* 'window' */ -EXTERN char_u *p_wak; /* 'winaltkeys' */ -EXTERN char_u *p_wig; /* 'wildignore' */ -EXTERN char_u *p_ww; /* 'whichwrap' */ -EXTERN long p_wc; /* 'wildchar' */ -EXTERN long p_wcm; /* 'wildcharm' */ -EXTERN bool p_wic; ///< 'wildignorecase' -EXTERN char_u *p_wim; /* 'wildmode' */ -EXTERN int p_wmnu; /* 'wildmenu' */ -EXTERN long p_wh; /* 'winheight' */ -EXTERN long p_wmh; /* 'winminheight' */ -EXTERN long p_wmw; /* 'winminwidth' */ -EXTERN long p_wiw; /* 'winwidth' */ -EXTERN bool p_ws; /* 'wrapscan' */ -EXTERN int p_write; /* 'write' */ -EXTERN int p_wa; /* 'writeany' */ -EXTERN int p_wb; /* 'writebackup' */ -EXTERN long p_wd; /* 'writedelay' */ +EXTERN int p_warn; // 'warn' +EXTERN char_u *p_wop; // 'wildoptions' +EXTERN long p_window; // 'window' +EXTERN char_u *p_wak; // 'winaltkeys' +EXTERN char_u *p_wig; // 'wildignore' +EXTERN char_u *p_ww; // 'whichwrap' +EXTERN long p_wc; // 'wildchar' +EXTERN long p_wcm; // 'wildcharm' +EXTERN int p_wic; // 'wildignorecase' +EXTERN char_u *p_wim; // 'wildmode' +EXTERN int p_wmnu; // 'wildmenu' +EXTERN long p_wh; // 'winheight' +EXTERN long p_wmh; // 'winminheight' +EXTERN long p_wmw; // 'winminwidth' +EXTERN long p_wiw; // 'winwidth' +EXTERN int p_ws; // 'wrapscan' +EXTERN int p_write; // 'write' +EXTERN int p_wa; // 'writeany' +EXTERN int p_wb; // 'writebackup' +EXTERN long p_wd; // 'writedelay' EXTERN int p_force_on; ///< options that cannot be turned off. EXTERN int p_force_off; ///< options that cannot be turned on. diff --git a/src/nvim/regexp_nfa.c b/src/nvim/regexp_nfa.c index 35308b74116704..384568259f8186 100644 --- a/src/nvim/regexp_nfa.c +++ b/src/nvim/regexp_nfa.c @@ -4560,9 +4560,11 @@ static int recursive_regmatch(nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T if (REG_MULTI) regline = reg_getline(reglnum); reginput = regline + save_reginput_col; - nfa_match = save_nfa_match; + if (result != NFA_TOO_EXPENSIVE) { + nfa_match = save_nfa_match; + nfa_listid = save_nfa_listid; + } nfa_endp = save_nfa_endp; - nfa_listid = save_nfa_listid; #ifdef REGEXP_DEBUG log_fd = fopen(NFA_REGEXP_RUN_LOG, "a"); diff --git a/src/nvim/screen.c b/src/nvim/screen.c index 3e4d016fe75fe5..cee3c62f43958b 100644 --- a/src/nvim/screen.c +++ b/src/nvim/screen.c @@ -5292,7 +5292,7 @@ void screen_puts_len(char_u *text, int textlen, int row, int col, int attr) int force_redraw_next = FALSE; int need_redraw; - const int l_has_mbyte = has_mbyte; + const bool l_has_mbyte = has_mbyte; const bool l_enc_utf8 = enc_utf8; const int l_enc_dbcs = enc_dbcs; @@ -5459,9 +5459,6 @@ void screen_puts_len(char_u *text, int textlen, int row, int col, int attr) /* If we detected the next character needs to be redrawn, but the text * doesn't extend up to there, update the character here. */ if (force_redraw_next && col < screen_Columns) { - if (l_enc_dbcs != 0 && dbcs_off2cells(off, max_off) > 1) - screen_char_2(off, row, col); - else screen_char(off, row, col); } } diff --git a/src/nvim/spell.c b/src/nvim/spell.c index ba7f31be25d53f..d9cdce8ca475dd 100644 --- a/src/nvim/spell.c +++ b/src/nvim/spell.c @@ -9266,9 +9266,7 @@ static void allcap_copy(char_u *word, char_u *wcopy) else c = *s++; - // We only change 0xdf to SS when we are certain latin1 is used. It - // would cause weird errors in other 8-bit encodings. - if (enc_latin1like && c == 0xdf) { + if (c == 0xdf) { c = 'S'; if (d - wcopy >= MAXWLEN - 1) break; @@ -12602,7 +12600,7 @@ static int spell_edit_score(slang_T *slang, char_u *badword, char_u *goodword) char_u *p; int wbadword[MAXWLEN]; int wgoodword[MAXWLEN]; - const int l_has_mbyte = has_mbyte; + const bool l_has_mbyte = has_mbyte; if (l_has_mbyte) { // Get the characters from the multi-byte strings and put them in an diff --git a/src/nvim/testdir/test64.in b/src/nvim/testdir/test64.in index c4585ecbceb0ac..ec11e15e351b8b 100644 --- a/src/nvim/testdir/test64.in +++ b/src/nvim/testdir/test64.in @@ -20,6 +20,7 @@ STARTTEST :"""" Previously written tests """""""""""""""""""""""""""""""" :"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" :" +:set noautoindent :call add(tl, [2, 'ab', 'aab', 'ab']) :call add(tl, [2, 'b', 'abcdef', 'b']) :call add(tl, [2, 'bc*', 'abccccdef', 'bcccc']) @@ -577,7 +578,7 @@ Gop:" :" Check patterns matching cursor position. :func! Postest() new - call setline(1, ['ffooooo', 'boboooo', 'zoooooo', 'koooooo', 'moooooo', "\t\t\tfoo", 'abababababababfoo', 'bababababababafoo', '********_']) + call setline(1, ['ffooooo', 'boboooo', 'zoooooo', 'koooooo', 'moooooo', "\t\t\tfoo", 'abababababababfoo', 'bababababababafoo', '********_', ' xxxxxxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx']) call setpos('.', [0, 1, 0, 0]) s/\%>3c.//g call setpos('.', [0, 2, 4, 0]) @@ -589,6 +590,7 @@ Gop:" %s/\%>6l\%3c./!/g %s/\%>7l\%12c./?/g %s/\%>7l\%<9l\%>5v\%<8v./#/g + $s/\%(|\u.*\)\@<=[^|\t]\+$//ge 1,$yank quit! endfunc diff --git a/src/nvim/testdir/test64.ok b/src/nvim/testdir/test64.ok index 92f06ea9f323a2..c218f8ea170096 100644 --- a/src/nvim/testdir/test64.ok +++ b/src/nvim/testdir/test64.ok @@ -1076,6 +1076,7 @@ moooooo ab!babababababfoo ba!ab##abab?bafoo **!*****_ + ! xxx?xxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx -1- ffo bob @@ -1086,6 +1087,7 @@ moooooo ab!babababababfoo ba!ab##abab?bafoo **!*****_ + ! xxx?xxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx -2- ffo bob @@ -1096,6 +1098,7 @@ moooooo ab!babababababfoo ba!ab##abab?bafoo **!*****_ + ! xxx?xxxxxxxx xxxx xxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxx xxxxx xxxxxxx xx xxxx xxxxxxxx xxxx xxxxxxxxxxx xxx xxxxxxx xxxxxxxxx xx xxxxxx xx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxx xxxxxxxx xxxxxxxxx xxxx xxx xxxx xxx xxx xxxxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxxxxxxx xx xxxxx xxx xxxxxxxx xxxxxx xxx xxx xxxxxxxxx xxxxxxx x xxxxxxxxx xx xxxxxx xxxxxxx xxxxxxxxxxxxxxxxxx xxxxxxx xxxxxxx xxx xxx xxxxxxxx xxxxxxx xxxx xxx xxxxxx xxxxx xxxxx xx xxxxxx xxxxxxx xxx xxxxxxxxxxxx xxxx xxxxxxxxx xxxxxx xxxxxx xxxxx xxx xxxxxxx xxxxxxxxxxxxxxxx xxxxxxxxx xxxxxxxxxx xxxx xx xxxxxxxx xxx xxxxxxxxxxx xxxxx Test Test END EN diff --git a/src/nvim/testdir/test_alot.vim b/src/nvim/testdir/test_alot.vim index 036a4c0470f1ee..87c1cd2c58f33b 100644 --- a/src/nvim/testdir/test_alot.vim +++ b/src/nvim/testdir/test_alot.vim @@ -4,6 +4,8 @@ source test_assign.vim source test_cursor_func.vim source test_ex_undo.vim +source test_expr.vim +source test_expr_utf8.vim source test_feedkeys.vim source test_cmdline.vim source test_menu.vim diff --git a/src/nvim/testdir/test_expr.vim b/src/nvim/testdir/test_expr.vim new file mode 100644 index 00000000000000..7ea4ebc7df60f6 --- /dev/null +++ b/src/nvim/testdir/test_expr.vim @@ -0,0 +1,25 @@ +" Tests for expressions. + +func Test_strgetchar() + call assert_equal(char2nr('a'), strgetchar('axb', 0)) + call assert_equal(char2nr('x'), strgetchar('axb', 1)) + call assert_equal(char2nr('b'), strgetchar('axb', 2)) + + call assert_equal(-1, strgetchar('axb', -1)) + call assert_equal(-1, strgetchar('axb', 3)) + call assert_equal(-1, strgetchar('', 0)) +endfunc + +func Test_strcharpart() + call assert_equal('a', strcharpart('axb', 0, 1)) + call assert_equal('x', strcharpart('axb', 1, 1)) + call assert_equal('b', strcharpart('axb', 2, 1)) + call assert_equal('xb', strcharpart('axb', 1)) + + call assert_equal('', strcharpart('axb', 1, 0)) + call assert_equal('', strcharpart('axb', 1, -1)) + call assert_equal('', strcharpart('axb', -1, 1)) + call assert_equal('', strcharpart('axb', -2, 2)) + + call assert_equal('a', strcharpart('axb', -1, 2)) +endfunc diff --git a/src/nvim/testdir/test_expr_utf8.vim b/src/nvim/testdir/test_expr_utf8.vim new file mode 100644 index 00000000000000..7bdcb4f65f8cdd --- /dev/null +++ b/src/nvim/testdir/test_expr_utf8.vim @@ -0,0 +1,38 @@ +" Tests for expressions using utf-8. +if !has('multi_byte') + finish +endif +scriptencoding utf-8 + +func Test_strgetchar_utf8() + call assert_equal(char2nr('á'), strgetchar('áxb', 0)) + call assert_equal(char2nr('x'), strgetchar('áxb', 1)) + + call assert_equal(char2nr('a'), strgetchar('àxb', 0)) + call assert_equal(char2nr('̀'), strgetchar('àxb', 1)) + call assert_equal(char2nr('x'), strgetchar('àxb', 2)) + + call assert_equal(char2nr('あ'), strgetchar('あaい', 0)) + call assert_equal(char2nr('a'), strgetchar('あaい', 1)) + call assert_equal(char2nr('い'), strgetchar('あaい', 2)) +endfunc + +func Test_strcharpart_utf8() + call assert_equal('áxb', strcharpart('áxb', 0)) + call assert_equal('á', strcharpart('áxb', 0, 1)) + call assert_equal('x', strcharpart('áxb', 1, 1)) + + call assert_equal('いうeお', strcharpart('あいうeお', 1)) + call assert_equal('い', strcharpart('あいうeお', 1, 1)) + call assert_equal('いう', strcharpart('あいうeお', 1, 2)) + call assert_equal('いうe', strcharpart('あいうeお', 1, 3)) + call assert_equal('いうeお', strcharpart('あいうeお', 1, 4)) + call assert_equal('eお', strcharpart('あいうeお', 3)) + call assert_equal('e', strcharpart('あいうeお', 3, 1)) + + call assert_equal('あ', strcharpart('あいうeお', -3, 4)) + + call assert_equal('a', strcharpart('àxb', 0, 1)) + call assert_equal('̀', strcharpart('àxb', 1, 1)) + call assert_equal('x', strcharpart('àxb', 2, 1)) +endfunc diff --git a/src/nvim/tui/input.c b/src/nvim/tui/input.c index 740716f0efa14b..9dc66420b019ca 100644 --- a/src/nvim/tui/input.c +++ b/src/nvim/tui/input.c @@ -31,8 +31,8 @@ void term_input_init(TermInput *input, Loop *loop) if (!term) { term = ""; // termkey_new_abstract assumes non-null (#2745) } - int enc_flag = enc_utf8 ? TERMKEY_FLAG_UTF8 : TERMKEY_FLAG_RAW; - input->tk = termkey_new_abstract(term, enc_flag); + + input->tk = termkey_new_abstract(term, TERMKEY_FLAG_UTF8); int curflags = termkey_get_canonflags(input->tk); termkey_set_canonflags(input->tk, curflags | TERMKEY_CANON_DELBS); diff --git a/src/nvim/tui/tui.c b/src/nvim/tui/tui.c index f252b00be2a967..5e30517c5a430f 100644 --- a/src/nvim/tui/tui.c +++ b/src/nvim/tui/tui.c @@ -611,6 +611,7 @@ static void suspend_event(void **argv) bool enable_mouse = data->mouse_enabled; tui_terminal_stop(ui); data->cont_received = false; + stream_set_blocking(input_global_fd(), true); // normalize stream (#2598) kill(0, SIGTSTP); while (!data->cont_received) { // poll the event loop until SIGCONT is received @@ -620,6 +621,7 @@ static void suspend_event(void **argv) if (enable_mouse) { tui_mouse_on(ui); } + stream_set_blocking(input_global_fd(), false); // libuv expects this // resume the main thread CONTINUE(data->bridge); } diff --git a/src/nvim/version.c b/src/nvim/version.c index 7ab8c84569718f..055a997cd48273 100644 --- a/src/nvim/version.c +++ b/src/nvim/version.c @@ -475,7 +475,7 @@ static int included_patches[] = { // 1970, // 1969 NA // 1968, - // 1967, + 1967, // 1966, // 1965 NA // 1964, @@ -661,10 +661,10 @@ static int included_patches[] = { // 1785, // 1784 NA // 1783, - // 1782, + 1782, // 1781, // 1780, - // 1779, + 1779, // 1778 NA // 1777 NA // 1776 NA @@ -702,19 +702,19 @@ static int included_patches[] = { // 1745 NA // 1744 NA // 1743 NA - // 1742, - // 1741, + 1742, + 1741, 1740, // 1739, // 1738, // 1737 NA // 1736 NA // 1735, - // 1734, + 1734, // 1733 NA 1732, // 1731, - // 1730, + 1730, // 1729 NA 1728, // 1727, diff --git a/test/functional/eval/json_functions_spec.lua b/test/functional/eval/json_functions_spec.lua index 159d775ff1e0a8..4a6758019be60b 100644 --- a/test/functional/eval/json_functions_spec.lua +++ b/test/functional/eval/json_functions_spec.lua @@ -489,18 +489,6 @@ describe('json_decode() function', function() '{"b": 3, "a": 1, "c": 4, "d": 2, "\\u0000": 4}') end) - it('converts strings to latin1 when &encoding is latin1', function() - restart('--cmd', 'set encoding=latin1') - eq('\171', funcs.json_decode('"\\u00AB"')) - sp_decode_eq({_TYPE='string', _VAL={'\n\171\n'}}, '"\\u0000\\u00AB\\u0000"') - end) - - it('fails to convert string to latin1 if it is impossible', function() - restart('--cmd', 'set encoding=latin1') - eq('Vim(call):E474: Failed to convert string "ꯍ" from UTF-8', - exc_exec('call json_decode(\'"\\uABCD"\')')) - end) - it('parses U+00C3 correctly', function() eq('\195\131', funcs.json_decode('"\195\131"')) end) @@ -528,14 +516,6 @@ describe('json_decode() function', function() eq({key={'val', 'val2'}, key2=1}, funcs.json_decode(str)) end) - it('always treats input as UTF-8', function() - -- When &encoding is latin1 string "«" is U+00C2 U+00AB U+00C2: «Â. So if - -- '"«"' was parsed as latin1 json_decode would return three characters, and - -- only one U+00AB when this string is parsed as latin1. - restart('--cmd', 'set encoding=latin1') - eq(('%c'):format(0xAB), funcs.json_decode('"«"')) - end) - it('does not overflow when writing error message about decoding ["", ""]', function() eq('\nE474: Attempt to decode a blank string' @@ -762,12 +742,6 @@ describe('json_encode() function', function() exc_exec('call json_encode(["", ""], 1)')) end) - it('converts strings from latin1 when &encoding is latin1', function() - clear('--cmd', 'set encoding=latin1') - eq('"\\u00AB"', funcs.json_encode('\171')) - eq('"\\u0000\\u00AB\\u0000"', eval('json_encode({"_TYPE": v:msgpack_types.string, "_VAL": ["\\n\171\\n"]})')) - end) - it('ignores improper values in &isprint', function() meths.set_option('isprint', '1') eq(1, eval('"\1" =~# "\\\\p"')) diff --git a/test/functional/ex_cmds/encoding_spec.lua b/test/functional/ex_cmds/encoding_spec.lua index e2b3e7e31df463..87ed7a2d0adc8b 100644 --- a/test/functional/ex_cmds/encoding_spec.lua +++ b/test/functional/ex_cmds/encoding_spec.lua @@ -15,27 +15,26 @@ describe('&encoding', function() execute('set encoding=latin1') -- error message expected feed('') - neq(nil, string.find(eval('v:errmsg'), '^E905:')) + neq(nil, string.find(eval('v:errmsg'), '^E474:')) eq('utf-8', eval('&encoding')) -- check nvim is still in utf-8 mode eq(3, eval('strwidth("Bär")')) end) - it('can be changed before startup', function() + it('cannot be changed before startup', function() clear('--cmd', 'set enc=latin1') - execute('set encoding=utf-8') -- error message expected feed('') - eq('latin1', eval('&encoding')) - eq(4, eval('strwidth("Bär")')) + neq(nil, string.find(eval('v:errmsg'), '^E474:')) + eq('utf-8', eval('&encoding')) + eq(3, eval('strwidth("Bär")')) end) - it('is not changed by `set all&`', function() - -- we need to set &encoding to something non-default. Use 'latin1' - clear('--cmd', 'set enc=latin1') - execute('set all&') - eq('latin1', eval('&encoding')) - eq(4, eval('strwidth("Bär")')) - end) + it('can be set to utf-8 without error', function() + execute('set encoding=utf-8') + eq("", eval('v:errmsg')) + clear('--cmd', 'set enc=utf-8') + eq("", eval('v:errmsg')) + end) end) diff --git a/test/functional/ex_cmds/global_spec.lua b/test/functional/ex_cmds/global_spec.lua new file mode 100644 index 00000000000000..81a0ef32489ad5 --- /dev/null +++ b/test/functional/ex_cmds/global_spec.lua @@ -0,0 +1,74 @@ +local helpers = require('test.functional.helpers')(after_each) +local Screen = require('test.functional.ui.screen') +local clear, feed, source = helpers.clear, helpers.feed, helpers.source + +if helpers.pending_win32(pending) then return end + +describe(':global', function() + before_each(function() + clear() + end) + + it('is interrupted by mapped CTRL-C', function() + if os.getenv("TRAVIS") and os.getenv("CLANG_SANITIZER") == "ASAN_UBSAN" then + -- XXX: ASAN_UBSAN is too slow to react to the CTRL-C. + pending("", function() end) + return + end + + source([[ + set nomore + set undolevels=-1 + nnoremap + for i in range(0, 99999) + put ='XXX' + endfor + put ='ZZZ' + 1 + .delete + ]]) + + local screen = Screen.new(52, 6) + screen:attach() + screen:set_default_attr_ids({ + [0] = {foreground = Screen.colors.White, + background = Screen.colors.Red}, + [1] = {bold = true, + foreground = Screen.colors.SeaGreen} + }) + + screen:expect([[ + ^XXX | + XXX | + XXX | + XXX | + XXX | + | + ]]) + + local function test_ctrl_c(ms) + feed(":global/^/p") + helpers.sleep(ms) + feed("") + screen:expect([[ + XXX | + XXX | + XXX | + XXX | + {0:Interrupted} | + Interrupt: {1:Press ENTER or type command to continue}^ | + ]]) + end + + -- The test is time-sensitive. Try with different sleep values. + local ms_values = {10, 50, 100} + for i, ms in ipairs(ms_values) do + if i < #ms_values then + local status, _ = pcall(test_ctrl_c, ms) + if status then break end + else -- Call the last attempt directly. + test_ctrl_c(ms) + end + end + end) +end) diff --git a/test/functional/shada/history_spec.lua b/test/functional/shada/history_spec.lua index 22e653b1d6555e..c4be9e563dea2e 100644 --- a/test/functional/shada/history_spec.lua +++ b/test/functional/shada/history_spec.lua @@ -4,9 +4,7 @@ local nvim_command, funcs, meths, nvim_feed, eq = helpers.command, helpers.funcs, helpers.meths, helpers.feed, helpers.eq local shada_helpers = require('test.functional.shada.helpers') -local reset, set_additional_cmd, clear = - shada_helpers.reset, shada_helpers.set_additional_cmd, - shada_helpers.clear +local reset, clear = shada_helpers.reset, shada_helpers.clear describe('ShaDa support code', function() before_each(reset) @@ -173,158 +171,57 @@ describe('ShaDa support code', function() eq('goo', funcs.getline(1)) end) - it('dumps and loads history correctly when &encoding is not UTF-8', function() - set_additional_cmd('set encoding=latin1') + it('dumps and loads history with UTF-8 characters', function() reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_feed(':echo "\171"\n') - nvim_command('qall') - reset() - eq('echo "\171"', funcs.histget(':', -1)) - end) - - it('dumps and loads history correctly when &encoding /= UTF-8 when dumping', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_feed(':echo "\171"\n') - set_additional_cmd('') - nvim_command('qall') - reset() - eq('echo "«"', funcs.histget(':', -1)) - end) - - it('dumps and loads history correctly when &encoding /= UTF-8 when loading', - function() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 nvim_feed(':echo "«"\n') - set_additional_cmd('set encoding=latin1') nvim_command('qall') reset() - eq('echo "\171"', funcs.histget(':', -1)) + eq('echo "«"', funcs.histget(':', -1)) end) - it('dumps and loads replacement correctly when &encoding is not UTF-8', + it('dumps and loads replacement with UTF-8 characters', function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/./\171/ge') + nvim_command('substitute/./«/ge') nvim_command('qall!') reset() funcs.setline('.', {'.'}) nvim_command('&') - eq('\171', funcs.getline('.')) - end) - - it('dumps&loads replacement correctly when &encoding /= UTF-8 when dumping', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/./\171/ge') - set_additional_cmd('') - nvim_command('qall') - reset() - funcs.setline('.', {'.'}) - nvim_command('&') eq('«', funcs.getline('.')) end) - it('dumps&loads replacement correctly when &encoding /= UTF-8 when loading', - function() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/./«/ge') - set_additional_cmd('set encoding=latin1') - nvim_command('qall') - reset() - funcs.setline('.', {'.'}) - nvim_command('&') - eq('\171', funcs.getline('.')) - end) - - it('dumps and loads substitute pattern correctly when &encoding is not UTF-8', + it('dumps and loads substitute pattern with UTF-8 characters', function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/\171/./ge') + nvim_command('substitute/«/./ge') nvim_command('qall!') reset() - funcs.setline('.', {'\171«'}) - nvim_command('&') - eq('.«', funcs.getline('.')) - end) - - it('dumps&loads s/pattern correctly when &encoding /= UTF-8 when dumping', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/\171/./ge') - set_additional_cmd('') - nvim_command('qall') - reset() funcs.setline('.', {'«\171'}) nvim_command('&') eq('.\171', funcs.getline('.')) end) - it('dumps&loads s/pattern correctly when &encoding /= UTF-8 when loading', + it('dumps and loads search pattern with UTF-8 characters', function() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('substitute/«/./ge') - set_additional_cmd('set encoding=latin1') - nvim_command('qall') - reset() - funcs.setline('.', {'\171«'}) - nvim_command('&') - eq('.«', funcs.getline('.')) - end) - - it('dumps and loads search pattern correctly when &encoding is not UTF-8', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('silent! /\171/') + nvim_command('silent! /«/') nvim_command('set shada+=/0') nvim_command('qall!') reset() funcs.setline('.', {'\171«'}) nvim_command('~&') - eq('«', funcs.getline('.')) - eq('', funcs.histget('/', -1)) - end) - - it('dumps&loads /pattern correctly when &encoding /= UTF-8 when dumping', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('silent! /\171/') - nvim_command('set shada+=/0') - set_additional_cmd('') - nvim_command('qall') - reset() - funcs.setline('.', {'«\171'}) - nvim_command('~&') eq('\171', funcs.getline('.')) eq('', funcs.histget('/', -1)) end) - it('dumps&loads /pattern correctly when &encoding /= UTF-8 when loading', + it('dumps and loads search pattern with 8-bit single-byte', function() -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - nvim_command('silent! /«/') + nvim_command('silent! /\171/') nvim_command('set shada+=/0') - set_additional_cmd('set encoding=latin1') - nvim_command('qall') + nvim_command('qall!') reset() funcs.setline('.', {'\171«'}) nvim_command('~&') eq('«', funcs.getline('.')) eq('', funcs.histget('/', -1)) end) + end) diff --git a/test/functional/shada/registers_spec.lua b/test/functional/shada/registers_spec.lua index f1c587c640a304..fc812f799c38ca 100644 --- a/test/functional/shada/registers_spec.lua +++ b/test/functional/shada/registers_spec.lua @@ -128,36 +128,24 @@ describe('ShaDa support code', function() eq({{}, ''}, getreg('h')) end) - it('dumps and loads register correctly when &encoding is not UTF-8', + it('dumps and loads register correctly with utf-8 contents', function() - set_additional_cmd('set encoding=latin1') reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - setreg('e', {'\171'}, 'c') + setreg('e', {'«'}, 'c') nvim_command('qall') reset() - eq({{'\171'}, 'v'}, getreg('e')) + eq({{'«'}, 'v'}, getreg('e')) end) - it('dumps and loads history correctly when &encoding /= UTF-8 when dumping', + it('dumps and loads history correctly with 8-bit single-byte', function() - set_additional_cmd('set encoding=latin1') reset() -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - setreg('e', {'\171'}, 'c') + setreg('e', {'\171«'}, 'c') set_additional_cmd('') nvim_command('qall') reset() - eq({{'«'}, 'v'}, getreg('e')) + eq({{'\171«'}, 'v'}, getreg('e')) end) - it('dumps and loads history correctly when &encoding /= UTF-8 when loading', - function() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - setreg('e', {'«'}, 'c') - set_additional_cmd('set encoding=latin1') - nvim_command('qall') - reset() - eq({{'\171'}, 'v'}, getreg('e')) - end) end) diff --git a/test/functional/shada/variables_spec.lua b/test/functional/shada/variables_spec.lua index 40101baf8f0274..15502f0b717d76 100644 --- a/test/functional/shada/variables_spec.lua +++ b/test/functional/shada/variables_spec.lua @@ -91,35 +91,13 @@ describe('ShaDa support code', function() eq(0, funcs.exists('g:str_var')) end) - it('dumps and loads variables correctly when &encoding is not UTF-8', + it('dumps and loads variables correctly with utf-8 strings', function() - set_additional_cmd('set encoding=latin1') reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - meths.set_var('STRVAR', '\171') - meths.set_var('LSTVAR', {'\171'}) - meths.set_var('DCTVAR', {['\171']='\171'}) - meths.set_var('NESTEDVAR', {['\171']={{'\171'}, {['\171']='\171'}, - {a='Test'}}}) - nvim_command('qall') - reset() - eq('\171', meths.get_var('STRVAR')) - eq({'\171'}, meths.get_var('LSTVAR')) - eq({['\171']='\171'}, meths.get_var('DCTVAR')) - eq({['\171']={{'\171'}, {['\171']='\171'}, {a='Test'}}}, - meths.get_var('NESTEDVAR')) - end) - - it('dumps and loads variables correctly when &encoding /= UTF-8 when dumping', - function() - set_additional_cmd('set encoding=latin1') - reset() - -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - meths.set_var('STRVAR', '\171') - meths.set_var('LSTVAR', {'\171'}) - meths.set_var('DCTVAR', {['\171']='\171'}) - meths.set_var('NESTEDVAR', {['\171']={{'\171'}, {['\171']='\171'}, - {a='Test'}}}) + meths.set_var('STRVAR', '«') + meths.set_var('LSTVAR', {'«'}) + meths.set_var('DCTVAR', {['«']='«'}) + meths.set_var('NESTEDVAR', {['«']={{'«'}, {['«']='«'}, {a='Test'}}}) set_additional_cmd('') nvim_command('qall') reset() @@ -129,20 +107,22 @@ describe('ShaDa support code', function() eq({['«']={{'«'}, {['«']='«'}, {a='Test'}}}, meths.get_var('NESTEDVAR')) end) - it('dumps and loads variables correctly when &encoding /= UTF-8 when loading', + it('dumps and loads variables correctly with 8-bit strings', function() + reset() -- \171 is U+00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK in latin1 - meths.set_var('STRVAR', '«') - meths.set_var('LSTVAR', {'«'}) - meths.set_var('DCTVAR', {['«']='«'}) - meths.set_var('NESTEDVAR', {['«']={{'«'}, {['«']='«'}, {a='Test'}}}) - set_additional_cmd('set encoding=latin1') + -- This is invalid unicode, but we should still dump and restore it. + meths.set_var('STRVAR', '\171') + meths.set_var('LSTVAR', {'\171'}) + meths.set_var('DCTVAR', {['«\171']='«\171'}) + meths.set_var('NESTEDVAR', {['\171']={{'\171«'}, {['\171']='\171'}, + {a='Test'}}}) nvim_command('qall') reset() eq('\171', meths.get_var('STRVAR')) eq({'\171'}, meths.get_var('LSTVAR')) - eq({['\171']='\171'}, meths.get_var('DCTVAR')) - eq({['\171']={{'\171'}, {['\171']='\171'}, {a='Test'}}}, + eq({['«\171']='«\171'}, meths.get_var('DCTVAR')) + eq({['\171']={{'\171«'}, {['\171']='\171'}, {a='Test'}}}, meths.get_var('NESTEDVAR')) end)