Permalink
Please sign in to comment.
Browse files
Prepare to implement 'read -r' and IFS splitting for 'read'.
Aboriginal Linux uses both of these mechanisms. - More spec tests for the read builtin. As usual, I uncovered shell incompatibilities, like whether they recognize C backslash-escapes in read input! - Add unit tests for the read builtin. For example, \ escapes IFS chars as well as newline. - Create a legacy.py module for splitting by IFS. 'read' and unquoted word splicing will both share some of this logic. At the very least, they both have the concept of "IFS whitespace".
- Loading branch information...
Showing
with
323 additions
and 177 deletions.
- +41 −3 core/builtin.py
- +34 −0 core/builtin_test.py
- +102 −0 core/legacy.py
- +92 −0 core/legacy_test.py
- +3 −94 core/word_eval.py
- +2 −80 core/word_eval_test.py
- +49 −0 spec/builtin-io.test.sh
| @@ -0,0 +1,102 @@ | ||
| #!/usr/bin/python | ||
| """ | ||
| legacy.py | ||
| """ | ||
| import re | ||
| from core import runtime | ||
| value_e = runtime.value_e | ||
| def GetIfs(mem): | ||
| """ | ||
| Used for splitting words in Splitter. | ||
| """ | ||
| val = mem.GetVar('IFS') | ||
| if val.tag == value_e.Undef: | ||
| return '' | ||
| elif val.tag == value_e.Str: | ||
| return val.s | ||
| else: | ||
| # TODO: Raise proper error | ||
| raise AssertionError("IFS shouldn't be an array") | ||
| def _Split(s, ifs): | ||
| """Helper function for IFS split.""" | ||
| parts = [''] | ||
| for c in s: | ||
| if c in ifs: | ||
| parts.append('') | ||
| else: | ||
| parts[-1] += c | ||
| return parts | ||
| def IfsSplit(s, ifs): | ||
| """ | ||
| http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_06_05 | ||
| https://www.gnu.org/software/bash/manual/bashref.html#Word-Splitting | ||
| Summary: | ||
| 1. ' \t\n' is special. Whitespace is trimmed off the front and back. | ||
| 2. if IFS is '', no field splitting is performed. | ||
| 3. Otherwise, suppose IFS = ' ,\t'. Then IFS whitespace is space or comma. | ||
| a. IFS whitespace isgnored at beginning and end. | ||
| b. any other IFS char delimits the field, along with adjacent IFS | ||
| whitespace. | ||
| c. IFS whitespace shall delimit a field. | ||
| # Can we do this be regex or something? Use regex match? | ||
| """ | ||
| assert isinstance(ifs, str), ifs | ||
| if not ifs: | ||
| return [s] # no splitting | ||
| # print("IFS SPLIT %r %r" % (s, ifs)) | ||
| # TODO: This detect if it's ALL whitespace? If ifs_other is empty? | ||
| if ifs == ' \t\n': | ||
| return _Split(s, ifs) | ||
| # Detect IFS whitespace | ||
| ifs_whitespace = '' | ||
| ifs_other = '' | ||
| for c in ifs: | ||
| if c in ' \t\n': | ||
| ifs_whitespace += c | ||
| else: | ||
| ifs_other += c | ||
| # TODO: Rule 3a. Ignore leading and trailing IFS whitespace? | ||
| # hack to make an RE | ||
| # Hm this escapes \t as \\\t? I guess that works. | ||
| ws_re = re.escape(ifs_whitespace) | ||
| other_re = re.escape(ifs_other) | ||
| #print('chars', repr(ifs_whitespace), repr(ifs_other)) | ||
| #print('RE', repr(ws_re), repr(other_re)) | ||
| # BUG: re.split() is the wrong model. It works with the 'delimiting' model. | ||
| # Forward iteration. TODO: grep for IFS in dash/mksh/bash/ash. | ||
| # ifs_ws | ifs_ws* non_ws_ifs ifs_ws* | ||
| if ifs_whitespace and ifs_other: | ||
| # first alternative is rule 3c. | ||
| # BUG: It matches the whitespace first? | ||
| pat = '[%s]+|[%s]*[%s][%s]*' % (ws_re, ws_re, other_re, ws_re) | ||
| elif ifs_whitespace: | ||
| pat = '[%s]+' % ws_re | ||
| elif ifs_other: | ||
| pat = '[%s]' % other_re | ||
| else: | ||
| raise AssertionError | ||
| #print('PAT', repr(pat)) | ||
| regex = re.compile(pat) | ||
| frags = regex.split(s) | ||
| #log('split %r by %r -> frags %s', s, pat, frags) | ||
| return frags |
| @@ -0,0 +1,92 @@ | ||
| #!/usr/bin/python -S | ||
| """ | ||
| legacy_test.py: Tests for legacy.py | ||
| """ | ||
| import unittest | ||
| from core import legacy # module under test | ||
| class SplitTest(unittest.TestCase): | ||
| def testIfsSplitEmpty(self): | ||
| self.assertEqual( | ||
| [''], legacy.IfsSplit('', ' \t\n')) | ||
| self.assertEqual( | ||
| ['', ''], legacy.IfsSplit(' ', ' \t\n')) | ||
| self.assertEqual( | ||
| [''], legacy.IfsSplit('', ' ')) | ||
| # No word splitting when no IFS. Hm. | ||
| self.assertEqual( | ||
| [''], legacy.IfsSplit('', '')) | ||
| def testIfsSplit(self): | ||
| self.assertEqual( | ||
| ['', 'foo', 'bar', ''], | ||
| legacy.IfsSplit('\tfoo bar\n', ' \t\n')) | ||
| self.assertEqual( | ||
| ['\tfoo bar\n'], | ||
| legacy.IfsSplit('\tfoo bar\n', '')) | ||
| self.assertEqual( | ||
| ['a', '', 'd'], | ||
| legacy.IfsSplit('abcd', 'bc')) | ||
| def testIfsSplit_Mixed(self): | ||
| self.assertEqual( | ||
| ['a', 'cd'], | ||
| legacy.IfsSplit('abcd', ' b')) | ||
| # IFS whitespace rule | ||
| self.assertEqual( | ||
| ['a', 'c'], | ||
| legacy.IfsSplit('abc', 'b ')) | ||
| self.assertEqual( | ||
| ['a', 'c'], | ||
| legacy.IfsSplit('a c', 'b ')) | ||
| self.assertEqual( | ||
| ['a', '', 'c'], | ||
| legacy.IfsSplit('abbc', 'b ')) | ||
| self.assertEqual( | ||
| ['', 'a', '', '', 'cd', ''], | ||
| legacy.IfsSplit('\ta b\tb cd\n', 'b \t\n')) | ||
| self.assertEqual( | ||
| ['', 'a', 'cd', ''], | ||
| legacy.IfsSplit('\tabcd\n', 'b \t\n')) | ||
| def testIfsSplit_Mixed2(self): | ||
| # Doesn't work yet | ||
| return | ||
| self.assertEqual( | ||
| ['a', '', '', 'b'], | ||
| legacy.IfsSplit('a _ _ _ b', '_ ')) | ||
| def testIfsSplitWhitespaceOnly(self): | ||
| # No non-whitespace IFS | ||
| self.assertEqual( | ||
| ['', 'a', 'c', ''], | ||
| legacy.IfsSplit(' a c ', ' ')) | ||
| self.assertEqual( | ||
| ['', 'c'], | ||
| legacy.IfsSplit(' c', ' \t\n')) | ||
| def testIfsSplitNonWhitespaceOnly(self): | ||
| self.assertEqual( | ||
| ['a', 'c'], | ||
| legacy.IfsSplit('a_c', '_')) | ||
| self.assertEqual( | ||
| ['', ''], | ||
| legacy.IfsSplit('_', '_')) | ||
| if __name__ == '__main__': | ||
| unittest.main() |
Oops, something went wrong.
0 comments on commit
672988d