diff --git a/Lib/shlex.py b/Lib/shlex.py index 5959f52dd12639..1b622166519c87 100644 --- a/Lib/shlex.py +++ b/Lib/shlex.py @@ -49,14 +49,14 @@ def __init__(self, instream=None, infile=None, posix=False, self.token = '' self.filestack = deque() self.source = None + # _pushback_chars is a push back queue used by lookahead logic + self._pushback_chars = deque() if not punctuation_chars: punctuation_chars = '' elif punctuation_chars is True: punctuation_chars = '();<>|&' self._punctuation_chars = punctuation_chars if punctuation_chars: - # _pushback_chars is a push back queue used by lookahead logic - self._pushback_chars = deque() # these chars added because allowed in file names, args, wildcards self.wordchars += '~-./*?=' #remove any punctuation chars from wordchars @@ -132,7 +132,7 @@ def read_token(self): quoted = False escapedstate = ' ' while True: - if self.punctuation_chars and self._pushback_chars: + if self._pushback_chars: nextchar = self._pushback_chars.pop() else: nextchar = self.instream.read(1) @@ -156,8 +156,18 @@ def read_token(self): else: continue elif nextchar in self.commenters: - self.instream.readline() - self.lineno += 1 + if self.posix: + # Consume comment until newline or end of file + while True: + nextchar = self.instream.read(1) + if not nextchar: + break + if nextchar == '\n': + self._pushback_chars.append(nextchar) + break + else: + self.instream.readline() + self.lineno += 1 elif self.posix and nextchar in self.escape: escapedstate = 'a' self.state = nextchar @@ -226,14 +236,23 @@ def read_token(self): else: continue elif nextchar in self.commenters: - self.instream.readline() - self.lineno += 1 if self.posix: + # Consume comment until newline or end of file + while True: + nextchar = self.instream.read(1) + if not nextchar: + break + if nextchar == '\n': + self._pushback_chars.append(nextchar) + break self.state = ' ' if self.token or (self.posix and quoted): break # emit current token else: continue + else: + self.instream.readline() + self.lineno += 1 elif self.state == 'c': if nextchar in self.punctuation_chars: self.token += nextchar diff --git a/Lib/test/test_shlex.py b/Lib/test/test_shlex.py index 2a355abdeeb30f..bdc57c3333d34b 100644 --- a/Lib/test/test_shlex.py +++ b/Lib/test/test_shlex.py @@ -368,6 +368,15 @@ def testPunctuationCharsReadOnly(self): with self.assertRaises(AttributeError): shlex_instance.punctuation_chars = False + def testNewlineAfterComment(self): + """Test that newline after comment is not consumed (POSIX compliance)""" + # When whitespace is customized to exclude newlines, newlines should + # be treated as tokens, even when following a comment + s = shlex.shlex('a # comment \n b', posix=True) + s.whitespace = ' ' + result = list(s) + self.assertEqual(result, ['a', '\n', 'b']) + @cpython_only def test_lazy_imports(self): import_helper.ensure_lazy_imports('shlex', {'collections', 're', 'os'}) diff --git a/Misc/NEWS.d/next/Library/2025-11-15-22-30-13.gh-issue-51338.8XI4Hk.rst b/Misc/NEWS.d/next/Library/2025-11-15-22-30-13.gh-issue-51338.8XI4Hk.rst new file mode 100644 index 00000000000000..998b26ef5d1fab --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-11-15-22-30-13.gh-issue-51338.8XI4Hk.rst @@ -0,0 +1 @@ +Fix :mod:`shlex` to preserve newlines after comments in POSIX mode.