Permalink
Browse files

only do separate cats when really needed

  • Loading branch information...
1 parent 535ae79 commit 46b92b03b5b8701a2b32c6b65f2771a8654e1018 @klbostee committed Jul 26, 2010
Showing with 8 additions and 3 deletions.
  1. +8 −3 dumbo/backends/streaming.py
@@ -220,9 +220,14 @@ def cat(self, path, opts):
shortcuts=dict(configopts('jars')))
try:
import typedbytes
- ls = os.popen('%s %s/bin/hadoop dfs -ls %s' % (hadenv, self.hadoop, path))
- for line in ls:
- subpath = line.split()[-1]
+ if sum(c in path for c in ("*", "?", "{")) > 0:
+ # cat each file separately when the path contains special chars
+ ls = os.popen('%s %s/bin/hadoop dfs -ls %s' % \
+ (hadenv, self.hadoop, path))
+ subpaths = (line.split()[-1] for line in ls)
+ else:
+ subpaths = [path]
+ for subpath in subpaths:
if not subpath.startswith("/"):
continue
dumptb = os.popen('%s %s/bin/hadoop jar %s dumptb %s 2> /dev/null'

0 comments on commit 46b92b0

Please sign in to comment.