Skip to content

Commit

Permalink
gh-62090: Simplify argparse usage formatting (GH-105039)
Browse files Browse the repository at this point in the history
Rationale
=========

argparse performs a complex formatting of the usage for argument grouping
and for line wrapping to fit the terminal width. This formatting has been
a constant source of bugs for at least 10 years (see linked issues below)
where defensive assertion errors are triggered or brackets and paranthesis
are not properly handeled.

Problem
=======

The current implementation of argparse usage formatting relies on regular
expressions to group arguments usage only to separate them again later
with another set of regular expressions. This is a complex and error prone
approach that caused all the issues linked below. Special casing certain
argument formats has not solved the problem. The following are some of
the most common issues:
- empty `metavar`
- mutually exclusive groups with `SUPPRESS`ed arguments
- metavars with whitespace
- metavars with brackets or paranthesis

Solution
========

The following two comments summarize the solution:
- #82091 (comment)
- #77048 (comment)

Mainly, the solution is to rewrite the usage formatting to avoid the
group-then-separate approach. Instead, the usage parts are kept separate
and only joined together at the end. This allows for a much simpler
implementation that is easier to understand and maintain. It avoids the
regular expressions approach and fixes the corresponding issues.

This closes the following GitHub issues:
-  #62090
-  #62549
-  #77048
-  #82091
-  #89743
-  #96310
-  #98666

These PRs become obsolete:
-  #15372
-  #96311
  • Loading branch information
hamdanal committed May 7, 2024
1 parent 49258ef commit de1428f
Show file tree
Hide file tree
Showing 3 changed files with 164 additions and 72 deletions.
100 changes: 28 additions & 72 deletions Lib/argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,17 +328,8 @@ def _format_usage(self, usage, actions, groups, prefix):
if len(prefix) + len(usage) > text_width:

# break usage into wrappable parts
part_regexp = (
r'\(.*?\)+(?=\s|$)|'
r'\[.*?\]+(?=\s|$)|'
r'\S+'
)
opt_usage = format(optionals, groups)
pos_usage = format(positionals, groups)
opt_parts = _re.findall(part_regexp, opt_usage)
pos_parts = _re.findall(part_regexp, pos_usage)
assert ' '.join(opt_parts) == opt_usage
assert ' '.join(pos_parts) == pos_usage
opt_parts = self._get_actions_usage_parts(optionals, groups)
pos_parts = self._get_actions_usage_parts(positionals, groups)

# helper for wrapping lines
def get_lines(parts, indent, prefix=None):
Expand Down Expand Up @@ -391,65 +382,36 @@ def get_lines(parts, indent, prefix=None):
return '%s%s\n\n' % (prefix, usage)

def _format_actions_usage(self, actions, groups):
return ' '.join(self._get_actions_usage_parts(actions, groups))

def _get_actions_usage_parts(self, actions, groups):
# find group indices and identify actions in groups
group_actions = set()
inserts = {}
for group in groups:
if not group._group_actions:
raise ValueError(f'empty group {group}')

if all(action.help is SUPPRESS for action in group._group_actions):
continue

try:
start = actions.index(group._group_actions[0])
except ValueError:
continue
else:
group_action_count = len(group._group_actions)
end = start + group_action_count
end = start + len(group._group_actions)
if actions[start:end] == group._group_actions:

suppressed_actions_count = 0
for action in group._group_actions:
group_actions.add(action)
if action.help is SUPPRESS:
suppressed_actions_count += 1

exposed_actions_count = group_action_count - suppressed_actions_count
if not exposed_actions_count:
continue

if not group.required:
if start in inserts:
inserts[start] += ' ['
else:
inserts[start] = '['
if end in inserts:
inserts[end] += ']'
else:
inserts[end] = ']'
elif exposed_actions_count > 1:
if start in inserts:
inserts[start] += ' ('
else:
inserts[start] = '('
if end in inserts:
inserts[end] += ')'
else:
inserts[end] = ')'
for i in range(start + 1, end):
inserts[i] = '|'
group_actions.update(group._group_actions)
inserts[start, end] = group

# collect all actions format strings
parts = []
for i, action in enumerate(actions):
for action in actions:

# suppressed arguments are marked with None
# remove | separators for suppressed arguments
if action.help is SUPPRESS:
parts.append(None)
if inserts.get(i) == '|':
inserts.pop(i)
elif inserts.get(i + 1) == '|':
inserts.pop(i + 1)
part = None

# produce all arg strings
elif not action.option_strings:
Expand All @@ -461,9 +423,6 @@ def _format_actions_usage(self, actions, groups):
if part[0] == '[' and part[-1] == ']':
part = part[1:-1]

# add the action string to the list
parts.append(part)

# produce the first way to invoke the option in brackets
else:
option_string = action.option_strings[0]
Expand All @@ -484,26 +443,23 @@ def _format_actions_usage(self, actions, groups):
if not action.required and action not in group_actions:
part = '[%s]' % part

# add the action string to the list
parts.append(part)
# add the action string to the list
parts.append(part)

# insert things at the necessary indices
for i in sorted(inserts, reverse=True):
parts[i:i] = [inserts[i]]

# join all the action items with spaces
text = ' '.join([item for item in parts if item is not None])

# clean up separators for mutually exclusive groups
open = r'[\[(]'
close = r'[\])]'
text = _re.sub(r'(%s) ' % open, r'\1', text)
text = _re.sub(r' (%s)' % close, r'\1', text)
text = _re.sub(r'%s *%s' % (open, close), r'', text)
text = text.strip()
# group mutually exclusive actions
for start, end in sorted(inserts, reverse=True):
group = inserts[start, end]
group_parts = [item for item in parts[start:end] if item is not None]
if group.required:
open, close = "()" if len(group_parts) > 1 else ("", "")
else:
open, close = "[]"
parts[start] = open + " | ".join(group_parts) + close
for i in range(start + 1, end):
parts[i] = None

# return the text
return text
# return the usage parts
return [item for item in parts if item is not None]

def _format_text(self, text):
if '%(prog)' in text:
Expand Down
134 changes: 134 additions & 0 deletions Lib/test/test_argparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -4255,6 +4255,140 @@ class TestHelpUsagePositionalsOnlyWrap(HelpTestCase):
version = ''


class TestHelpUsageMetavarsSpacesParentheses(HelpTestCase):
# https://github.com/python/cpython/issues/62549
# https://github.com/python/cpython/issues/89743
parser_signature = Sig(prog='PROG')
argument_signatures = [
Sig('-n1', metavar='()', help='n1'),
Sig('-o1', metavar='(1, 2)', help='o1'),
Sig('-u1', metavar=' (uu) ', help='u1'),
Sig('-v1', metavar='( vv )', help='v1'),
Sig('-w1', metavar='(w)w', help='w1'),
Sig('-x1', metavar='x(x)', help='x1'),
Sig('-y1', metavar='yy)', help='y1'),
Sig('-z1', metavar='(zz', help='z1'),
Sig('-n2', metavar='[]', help='n2'),
Sig('-o2', metavar='[1, 2]', help='o2'),
Sig('-u2', metavar=' [uu] ', help='u2'),
Sig('-v2', metavar='[ vv ]', help='v2'),
Sig('-w2', metavar='[w]w', help='w2'),
Sig('-x2', metavar='x[x]', help='x2'),
Sig('-y2', metavar='yy]', help='y2'),
Sig('-z2', metavar='[zz', help='z2'),
]

usage = '''\
usage: PROG [-h] [-n1 ()] [-o1 (1, 2)] [-u1 (uu) ] [-v1 ( vv )] [-w1 (w)w]
[-x1 x(x)] [-y1 yy)] [-z1 (zz] [-n2 []] [-o2 [1, 2]] [-u2 [uu] ]
[-v2 [ vv ]] [-w2 [w]w] [-x2 x[x]] [-y2 yy]] [-z2 [zz]
'''
help = usage + '''\
options:
-h, --help show this help message and exit
-n1 () n1
-o1 (1, 2) o1
-u1 (uu) u1
-v1 ( vv ) v1
-w1 (w)w w1
-x1 x(x) x1
-y1 yy) y1
-z1 (zz z1
-n2 [] n2
-o2 [1, 2] o2
-u2 [uu] u2
-v2 [ vv ] v2
-w2 [w]w w2
-x2 x[x] x2
-y2 yy] y2
-z2 [zz z2
'''
version = ''


class TestHelpUsageNoWhitespaceCrash(TestCase):

def test_all_suppressed_mutex_followed_by_long_arg(self):
# https://github.com/python/cpython/issues/62090
# https://github.com/python/cpython/issues/96310
parser = argparse.ArgumentParser(prog='PROG')
mutex = parser.add_mutually_exclusive_group()
mutex.add_argument('--spam', help=argparse.SUPPRESS)
parser.add_argument('--eggs-eggs-eggs-eggs-eggs-eggs')
usage = textwrap.dedent('''\
usage: PROG [-h]
[--eggs-eggs-eggs-eggs-eggs-eggs EGGS_EGGS_EGGS_EGGS_EGGS_EGGS]
''')
self.assertEqual(parser.format_usage(), usage)

def test_newline_in_metavar(self):
# https://github.com/python/cpython/issues/77048
mapping = ['123456', '12345', '12345', '123']
parser = argparse.ArgumentParser('11111111111111')
parser.add_argument('-v', '--verbose',
help='verbose mode', action='store_true')
parser.add_argument('targets',
help='installation targets',
nargs='+',
metavar='\n'.join(mapping))
usage = textwrap.dedent('''\
usage: 11111111111111 [-h] [-v]
123456
12345
12345
123 [123456
12345
12345
123 ...]
''')
self.assertEqual(parser.format_usage(), usage)

def test_empty_metavar_required_arg(self):
# https://github.com/python/cpython/issues/82091
parser = argparse.ArgumentParser(prog='PROG')
parser.add_argument('--nil', metavar='', required=True)
parser.add_argument('--a', metavar='A' * 70)
usage = (
'usage: PROG [-h] --nil \n'
' [--a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
'AAAAAAAAAAAAAAAAAAAAAAA]\n'
)
self.assertEqual(parser.format_usage(), usage)

def test_all_suppressed_mutex_with_optional_nargs(self):
# https://github.com/python/cpython/issues/98666
parser = argparse.ArgumentParser(prog='PROG')
mutex = parser.add_mutually_exclusive_group()
mutex.add_argument(
'--param1',
nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
mutex.add_argument(
'--param2',
nargs='?', const='default', metavar='NAME', help=argparse.SUPPRESS)
usage = 'usage: PROG [-h]\n'
self.assertEqual(parser.format_usage(), usage)

def test_nested_mutex_groups(self):
parser = argparse.ArgumentParser(prog='PROG')
g = parser.add_mutually_exclusive_group()
g.add_argument("--spam")
with warnings.catch_warnings():
warnings.simplefilter('ignore', DeprecationWarning)
gg = g.add_mutually_exclusive_group()
gg.add_argument("--hax")
gg.add_argument("--hox", help=argparse.SUPPRESS)
gg.add_argument("--hex")
g.add_argument("--eggs")
parser.add_argument("--num")

usage = textwrap.dedent('''\
usage: PROG [-h] [--spam SPAM | [--hax HAX | --hex HEX] | --eggs EGGS]
[--num NUM]
''')
self.assertEqual(parser.format_usage(), usage)


class TestHelpVariableExpansion(HelpTestCase):
"""Test that variables are expanded properly in help messages"""

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix assertion errors caused by whitespace in metavars or ``SUPPRESS``-ed groups
in :mod:`argparse` by simplifying usage formatting. Patch by Ali Hamdan.

0 comments on commit de1428f

Please sign in to comment.