Skip to content

Commit

Permalink
Teach lit to expand glob expressions.
Browse files Browse the repository at this point in the history
This will enable removing hacks throughout the codebase
in clang and compiler-rt that feed multiple inputs to a
testing utility by globbing, all of which are either disabled
on Windows currently or using xargs / find hacks.

Differential Revision: https://reviews.llvm.org/D30380

llvm-svn: 296904
  • Loading branch information
Zachary Turner committed Mar 3, 2017
1 parent 0a4ec55 commit b471d4f
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 9 deletions.
Empty file.
28 changes: 28 additions & 0 deletions llvm/test/Other/lit-globbing.ll
@@ -0,0 +1,28 @@
RUN: echo TA > %T/TA.txt
RUN: echo TB > %T/TB.txt
RUN: echo TAB > %T/TAB.txt

RUN: echo %T/TA* | FileCheck -check-prefix=STAR %s
RUN: echo %T/'TA'* | FileCheck -check-prefix=STAR %s
RUN: echo %T/T'A'* | FileCheck -check-prefix=STAR %s

RUN: echo %T/T?.txt | FileCheck -check-prefix=QUESTION %s
RUN: echo %T/'T'?.txt | FileCheck -check-prefix=QUESTION %s

RUN: echo %T/T??.txt | FileCheck -check-prefix=QUESTION2 %s
RUN: echo %T/'T'??.txt | FileCheck -check-prefix=QUESTION2 %s

RUN: echo 'T*' 'T?.txt' 'T??.txt' | FileCheck -check-prefix=QUOTEDARGS %s

STAR-NOT: TB.txt
STAR: {{(TA.txt.*TAB.txt|TAB.txt.*TA.txt)}}

QUESTION-NOT: TAB.txt
QUESTION: {{(TA.txt.*TB.txt|TB.txt.*TA.txt)}}

QUESTION2-NOT: TA.txt
QUESTION2-NOT: TB.txt
QUESTION2: TAB.txt

QUOTEDARGS-NOT: .txt
QUOTEDARGS: T* T?.txt T??.txt
18 changes: 18 additions & 0 deletions llvm/utils/lit/lit/ShCommands.py
Expand Up @@ -35,6 +35,24 @@ def toShell(self, file):
else:
file.write("%s%s '%s'" % (r[0][1], r[0][0], r[1]))

class GlobItem:
def __init__(self, pattern):
self.pattern = pattern

def __repr__(self):
return self.pattern

def __eq__(self, other):
if not isinstance(other, Command):
return False

return (self.pattern == other.pattern)

def resolve(self):
import glob
results = glob.glob(self.pattern)
return [self.pattern] if len(results) == 0 else results

class Pipeline:
def __init__(self, commands, negate=False, pipe_err=False):
self.commands = commands
Expand Down
36 changes: 27 additions & 9 deletions llvm/utils/lit/lit/ShUtil.py
Expand Up @@ -2,7 +2,7 @@
import itertools

import lit.util
from lit.ShCommands import Command, Pipeline, Seq
from lit.ShCommands import Command, GlobItem, Pipeline, Seq

class ShLexer:
def __init__(self, data, win32Escapes = False):
Expand Down Expand Up @@ -40,13 +40,15 @@ def lex_arg_fast(self, c):
return None

self.pos = self.pos - 1 + len(chunk)
return chunk
return GlobItem(chunk) if '*' in chunk or '?' in chunk else chunk

def lex_arg_slow(self, c):
if c in "'\"":
str = self.lex_arg_quoted(c)
else:
str = c
unquoted_glob_char = False
quoted_glob_char = False
while self.pos != self.end:
c = self.look()
if c.isspace() or c in "|&;":
Expand All @@ -65,12 +67,12 @@ def lex_arg_slow(self, c):
tok = self.lex_one_token()
assert isinstance(tok, tuple) and len(tok) == 1
return (tok[0], num)
elif c == '"':
elif c == '"' or c == "'":
self.eat()
str += self.lex_arg_quoted('"')
elif c == "'":
self.eat()
str += self.lex_arg_quoted("'")
quoted_arg = self.lex_arg_quoted(c)
if '*' in quoted_arg or '?' in quoted_arg:
quoted_glob_char = True
str += quoted_arg
elif not self.win32Escapes and c == '\\':
# Outside of a string, '\\' escapes everything.
self.eat()
Expand All @@ -79,9 +81,25 @@ def lex_arg_slow(self, c):
"escape at end of quoted argument in: %r" % self.data)
return str
str += self.eat()
elif c in '*?':
unquoted_glob_char = True
str += self.eat()
else:
str += self.eat()
return str
# If a quote character is present, lex_arg_quoted will remove the quotes
# and append the argument directly. This causes a problem when the
# quoted portion contains a glob character, as the character will no
# longer be treated literally. If glob characters occur *only* inside
# of quotes, then we can handle this by not globbing at all, and if
# glob characters occur *only* outside of quotes, we can still glob just
# fine. But if a glob character occurs both inside and outside of
# quotes this presents a problem. In practice this is such an obscure
# edge case that it doesn't seem worth the added complexity to support.
# By adding an assertion, it means some bot somewhere will catch this
# and flag the user of a non-portable test (which could almost certainly
# be re-written to work correctly without triggering this).
assert not (quoted_glob_char and unquoted_glob_char)
return GlobItem(str) if unquoted_glob_char else str

def lex_arg_quoted(self, delim):
str = ''
Expand Down Expand Up @@ -202,7 +220,7 @@ def parse_command(self):
break

# If this is an argument, just add it to the current command.
if isinstance(tok, str):
if isinstance(tok, (str, GlobItem)):
args.append(self.lex())
continue

Expand Down
13 changes: 13 additions & 0 deletions llvm/utils/lit/lit/TestRunner.py
Expand Up @@ -5,6 +5,7 @@
import tempfile
import threading

from lit.ShCommands import GlobItem
import lit.ShUtil as ShUtil
import lit.Test as Test
import lit.util
Expand Down Expand Up @@ -141,6 +142,15 @@ def executeShCmd(cmd, shenv, results, timeout=0):

return (finalExitCode, timeoutInfo)

def expand_glob_expressions(cmd, args):
result = [args[0]]
for arg in args[1:]:
if isinstance(arg, GlobItem):
result.extend(arg.resolve())
else:
result.append(arg)
return result

def quote_windows_command(seq):
"""
Reimplement Python's private subprocess.list2cmdline for MSys compatibility
Expand Down Expand Up @@ -372,6 +382,9 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
named_temp_files.append(f.name)
args[i] = f.name

# Expand all glob expressions
args = expand_glob_expressions(j, args)

# On Windows, do our own command line quoting for better compatibility
# with some core utility distributions.
if kIsWindows:
Expand Down

0 comments on commit b471d4f

Please sign in to comment.