Skip to content

Commit 2e6150a

Browse files
[3.9] gh-136065: Fix quadratic complexity in os.path.expandvars() (GH-134952) (GH-140839)
(cherry picked from commit f029e8d) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> Co-authored-by: Łukasz Langa <lukasz@langa.pl>
1 parent 798eaca commit 2e6150a

File tree

5 files changed

+96
-116
lines changed

5 files changed

+96
-116
lines changed

Lib/ntpath.py

Lines changed: 41 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -335,17 +335,23 @@ def expanduser(path):
335335
# XXX With COMMAND.COM you can use any characters in a variable name,
336336
# XXX except '^|<>='.
337337

338+
_varpattern = r"'[^']*'?|%(%|[^%]*%?)|\$(\$|[-\w]+|\{[^}]*\}?)"
339+
_varsub = None
340+
_varsubb = None
341+
338342
def expandvars(path):
339343
"""Expand shell variables of the forms $var, ${var} and %var%.
340344
341345
Unknown variables are left unchanged."""
342346
path = os.fspath(path)
347+
global _varsub, _varsubb
343348
if isinstance(path, bytes):
344349
if b'$' not in path and b'%' not in path:
345350
return path
346-
import string
347-
varchars = bytes(string.ascii_letters + string.digits + '_-', 'ascii')
348-
quote = b'\''
351+
if not _varsubb:
352+
import re
353+
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
354+
sub = _varsubb
349355
percent = b'%'
350356
brace = b'{'
351357
rbrace = b'}'
@@ -354,94 +360,44 @@ def expandvars(path):
354360
else:
355361
if '$' not in path and '%' not in path:
356362
return path
357-
import string
358-
varchars = string.ascii_letters + string.digits + '_-'
359-
quote = '\''
363+
if not _varsub:
364+
import re
365+
_varsub = re.compile(_varpattern, re.ASCII).sub
366+
sub = _varsub
360367
percent = '%'
361368
brace = '{'
362369
rbrace = '}'
363370
dollar = '$'
364371
environ = os.environ
365-
res = path[:0]
366-
index = 0
367-
pathlen = len(path)
368-
while index < pathlen:
369-
c = path[index:index+1]
370-
if c == quote: # no expansion within single quotes
371-
path = path[index + 1:]
372-
pathlen = len(path)
373-
try:
374-
index = path.index(c)
375-
res += c + path[:index + 1]
376-
except ValueError:
377-
res += c + path
378-
index = pathlen - 1
379-
elif c == percent: # variable or '%'
380-
if path[index + 1:index + 2] == percent:
381-
res += c
382-
index += 1
383-
else:
384-
path = path[index+1:]
385-
pathlen = len(path)
386-
try:
387-
index = path.index(percent)
388-
except ValueError:
389-
res += percent + path
390-
index = pathlen - 1
391-
else:
392-
var = path[:index]
393-
try:
394-
if environ is None:
395-
value = os.fsencode(os.environ[os.fsdecode(var)])
396-
else:
397-
value = environ[var]
398-
except KeyError:
399-
value = percent + var + percent
400-
res += value
401-
elif c == dollar: # variable or '$$'
402-
if path[index + 1:index + 2] == dollar:
403-
res += c
404-
index += 1
405-
elif path[index + 1:index + 2] == brace:
406-
path = path[index+2:]
407-
pathlen = len(path)
408-
try:
409-
index = path.index(rbrace)
410-
except ValueError:
411-
res += dollar + brace + path
412-
index = pathlen - 1
413-
else:
414-
var = path[:index]
415-
try:
416-
if environ is None:
417-
value = os.fsencode(os.environ[os.fsdecode(var)])
418-
else:
419-
value = environ[var]
420-
except KeyError:
421-
value = dollar + brace + var + rbrace
422-
res += value
423-
else:
424-
var = path[:0]
425-
index += 1
426-
c = path[index:index + 1]
427-
while c and c in varchars:
428-
var += c
429-
index += 1
430-
c = path[index:index + 1]
431-
try:
432-
if environ is None:
433-
value = os.fsencode(os.environ[os.fsdecode(var)])
434-
else:
435-
value = environ[var]
436-
except KeyError:
437-
value = dollar + var
438-
res += value
439-
if c:
440-
index -= 1
372+
373+
def repl(m):
374+
lastindex = m.lastindex
375+
if lastindex is None:
376+
return m[0]
377+
name = m[lastindex]
378+
if lastindex == 1:
379+
if name == percent:
380+
return name
381+
if not name.endswith(percent):
382+
return m[0]
383+
name = name[:-1]
441384
else:
442-
res += c
443-
index += 1
444-
return res
385+
if name == dollar:
386+
return name
387+
if name.startswith(brace):
388+
if not name.endswith(rbrace):
389+
return m[0]
390+
name = name[1:-1]
391+
392+
try:
393+
if environ is None:
394+
return os.fsencode(os.environ[os.fsdecode(name)])
395+
else:
396+
return environ[name]
397+
except KeyError:
398+
return m[0]
399+
400+
return sub(repl, path)
445401

446402

447403
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B.

Lib/posixpath.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -275,56 +275,53 @@ def expanduser(path):
275275
# This expands the forms $variable and ${variable} only.
276276
# Non-existent variables are left unchanged.
277277

278-
_varprog = None
279-
_varprogb = None
278+
_varpattern = r'\$(\w+|\{[^}]*\}?)'
279+
_varsub = None
280+
_varsubb = None
280281

281282
def expandvars(path):
282283
"""Expand shell variables of form $var and ${var}. Unknown variables
283284
are left unchanged."""
284285
path = os.fspath(path)
285-
global _varprog, _varprogb
286+
global _varsub, _varsubb
286287
if isinstance(path, bytes):
287288
if b'$' not in path:
288289
return path
289-
if not _varprogb:
290+
if not _varsubb:
290291
import re
291-
_varprogb = re.compile(br'\$(\w+|\{[^}]*\})', re.ASCII)
292-
search = _varprogb.search
292+
_varsubb = re.compile(_varpattern.encode(), re.ASCII).sub
293+
sub = _varsubb
293294
start = b'{'
294295
end = b'}'
295296
environ = getattr(os, 'environb', None)
296297
else:
297298
if '$' not in path:
298299
return path
299-
if not _varprog:
300+
if not _varsub:
300301
import re
301-
_varprog = re.compile(r'\$(\w+|\{[^}]*\})', re.ASCII)
302-
search = _varprog.search
302+
_varsub = re.compile(_varpattern, re.ASCII).sub
303+
sub = _varsub
303304
start = '{'
304305
end = '}'
305306
environ = os.environ
306-
i = 0
307-
while True:
308-
m = search(path, i)
309-
if not m:
310-
break
311-
i, j = m.span(0)
312-
name = m.group(1)
313-
if name.startswith(start) and name.endswith(end):
307+
308+
def repl(m):
309+
name = m[1]
310+
if name.startswith(start):
311+
if not name.endswith(end):
312+
return m[0]
314313
name = name[1:-1]
315314
try:
316315
if environ is None:
317316
value = os.fsencode(os.environ[os.fsdecode(name)])
318317
else:
319318
value = environ[name]
320319
except KeyError:
321-
i = j
320+
return m[0]
322321
else:
323-
tail = path[j:]
324-
path = path[:i] + value
325-
i = len(path)
326-
path += tail
327-
return path
322+
return value
323+
324+
return sub(repl, path)
328325

329326

330327
# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.

Lib/test/test_genericpath.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import warnings
1010
from test import support
1111
from test.support.script_helper import assert_python_ok
12-
from test.support import FakePath
12+
from test.support import FakePath, EnvironmentVarGuard
1313

1414

1515
def create_file(filename, data=b'foo'):
@@ -374,7 +374,7 @@ def test_splitdrive(self):
374374

375375
def test_expandvars(self):
376376
expandvars = self.pathmodule.expandvars
377-
with support.EnvironmentVarGuard() as env:
377+
with EnvironmentVarGuard() as env:
378378
env.clear()
379379
env["foo"] = "bar"
380380
env["{foo"] = "baz1"
@@ -408,7 +408,7 @@ def test_expandvars_nonascii(self):
408408
expandvars = self.pathmodule.expandvars
409409
def check(value, expected):
410410
self.assertEqual(expandvars(value), expected)
411-
with support.EnvironmentVarGuard() as env:
411+
with EnvironmentVarGuard() as env:
412412
env.clear()
413413
nonascii = support.FS_NONASCII
414414
env['spam'] = nonascii
@@ -429,6 +429,19 @@ def check(value, expected):
429429
os.fsencode('$bar%s bar' % nonascii))
430430
check(b'$spam}bar', os.fsencode('%s}bar' % nonascii))
431431

432+
@support.requires_resource('cpu')
433+
def test_expandvars_large(self):
434+
expandvars = self.pathmodule.expandvars
435+
with EnvironmentVarGuard() as env:
436+
env.clear()
437+
env["A"] = "B"
438+
n = 100_000
439+
self.assertEqual(expandvars('$A'*n), 'B'*n)
440+
self.assertEqual(expandvars('${A}'*n), 'B'*n)
441+
self.assertEqual(expandvars('$A!'*n), 'B!'*n)
442+
self.assertEqual(expandvars('${A}A'*n), 'BA'*n)
443+
self.assertEqual(expandvars('${'*10*n), '${'*10*n)
444+
432445
def test_abspath(self):
433446
self.assertIn("foo", self.pathmodule.abspath("foo"))
434447
with warnings.catch_warnings():

Lib/test/test_ntpath.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
import ntpath
22
import os
3-
import subprocess
43
import sys
54
import unittest
65
import warnings
76
from ntpath import ALLOW_MISSING
8-
from test.support import TestFailed, FakePath
7+
from test.support import TestFailed, FakePath, EnvironmentVarGuard
98
from test import support, test_genericpath
109
from tempfile import TemporaryFile
1110

@@ -642,7 +641,7 @@ def test_realpath_cwd(self):
642641
ntpath.realpath("file.txt", **kwargs))
643642

644643
def test_expandvars(self):
645-
with support.EnvironmentVarGuard() as env:
644+
with EnvironmentVarGuard() as env:
646645
env.clear()
647646
env["foo"] = "bar"
648647
env["{foo"] = "baz1"
@@ -671,7 +670,7 @@ def test_expandvars(self):
671670
def test_expandvars_nonascii(self):
672671
def check(value, expected):
673672
tester('ntpath.expandvars(%r)' % value, expected)
674-
with support.EnvironmentVarGuard() as env:
673+
with EnvironmentVarGuard() as env:
675674
env.clear()
676675
nonascii = support.FS_NONASCII
677676
env['spam'] = nonascii
@@ -687,10 +686,23 @@ def check(value, expected):
687686
check('%spam%bar', '%sbar' % nonascii)
688687
check('%{}%bar'.format(nonascii), 'ham%sbar' % nonascii)
689688

689+
@support.requires_resource('cpu')
690+
def test_expandvars_large(self):
691+
expandvars = ntpath.expandvars
692+
with EnvironmentVarGuard() as env:
693+
env.clear()
694+
env["A"] = "B"
695+
n = 100_000
696+
self.assertEqual(expandvars('%A%'*n), 'B'*n)
697+
self.assertEqual(expandvars('%A%A'*n), 'BA'*n)
698+
self.assertEqual(expandvars("''"*n + '%%'), "''"*n + '%')
699+
self.assertEqual(expandvars("%%"*n), "%"*n)
700+
self.assertEqual(expandvars("$$"*n), "$"*n)
701+
690702
def test_expanduser(self):
691703
tester('ntpath.expanduser("test")', 'test')
692704

693-
with support.EnvironmentVarGuard() as env:
705+
with EnvironmentVarGuard() as env:
694706
env.clear()
695707
tester('ntpath.expanduser("~test")', '~test')
696708

@@ -908,6 +920,7 @@ def test_nt_helpers(self):
908920
self.assertIsInstance(b_final_path, bytes)
909921
self.assertGreater(len(b_final_path), 0)
910922

923+
911924
class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
912925
pathmodule = ntpath
913926
attributes = ['relpath']
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix quadratic complexity in :func:`os.path.expandvars`.

0 commit comments

Comments
 (0)