Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Fix typedbytes.pyc file path should start with file:// #64

Closed
wants to merge 1 commit into from

2 participants

@d2207197

dumbo generate wrong typedbytes argument: -jobconf 'tmpfiles=/usr/local/lib/python2.7/dist-packages/typedbytes.pyc'
the correct one should be -file '/usr/local/lib/python2.7/dist-packages/typedbytes.pyc'

dumbo error messages:

EXEC: HADOOP_CLASSPATH=":$HADOOP_CLASSPATH" /usr/bin/hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar -outputformat 'org.apache.hadoop.mapred.SequenceFileOutputFormat' -inputformat 'org.apache.hadoop.streaming.AutoInputFormat' -reducer 'python -m tf-idf-dumbo red 0 262144000' -mapper 'python -m tf-idf-dumbo map 0 262144000' -file '/home/external/joe/nlp/lab11/vocab.mail.txt' -file '/home/external/joe/nlp/lab11/tf-idf-dumbo.py' -file '/usr/local/lib/python2.7/dist-packages/dumbo/backends/common.pyc' -output 'enron-tf-idf_pre1' -jobconf 'stream.map.input=typedbytes' -jobconf 'stream.reduce.input=typedbytes' -jobconf 'stream.map.output=typedbytes' -jobconf 'stream.reduce.output=typedbytes' -jobconf 'mapred.job.name=tf-idf-dumbo.py (1/2)' -jobconf 'tmpfiles=/usr/local/lib/python2.7/dist-packages/typedbytes.pyc' -input '/tmp/enronsample' -cmdenv 'dumbo_mrbase_class=dumbo.backends.common.MapRedBase' -cmdenv 'dumbo_jk_class=dumbo.backends.common.JoinKey' -cmdenv 'dumbo_runinfo_class=dumbo.backends.streaming.StreamingRunInfo' -cmdenv ''PYTHONPATH=common.pyc'
...
12/12/15 23:47:15 ERROR security.UserGroupInformation: PriviledgedActionException as:joe (auth:SIMPLE) cause:java.io.FileNotFoundException: File does not exist: /usr/local/lib/python2.7/dist-packages/typedbytes.pyc
12/12/15 23:47:15 ERROR security.UserGroupInformation: PriviledgedActionException as:joe (auth:SIMPLE) cause:java.io.FileNotFoundException: File does not exist: /usr/local/lib/python2.7/dist-packages/typedbytes.pyc
12/12/15 23:47:15 ERROR streaming.StreamJob: Error launching job , bad input path : File does notexist: /usr/local/lib/python2.7/dist-packages/typedbytes.pycStreaming Command Failed!
@klbostee
Owner

Did pretty much the same thing in:

b67a7b1

Thanks anyway though!

@klbostee klbostee closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Dec 15, 2012
  1. @d2207197
This page is out of date. Refresh to see the latest.
Showing with 10 additions and 10 deletions.
  1. +10 −10 dumbo/backends/streaming.py
View
20 dumbo/backends/streaming.py
@@ -24,10 +24,10 @@
class StreamingBackend(Backend):
-
+
def matches(self, opts):
return bool(opts['hadoop'])
-
+
def create_iteration(self, opts):
return StreamingIteration(opts.pop('prog')[0], opts)
@@ -77,7 +77,7 @@ def run(self):
if modpath.endswith('.egg'):
addedopts.add('libegg', modpath)
else:
- opts.add('file', modpath)
+ opts.add('file', 'file://' + modpath)
opts.add('jobconf', 'stream.map.input=typedbytes')
opts.add('jobconf', 'stream.reduce.input=typedbytes')
@@ -204,11 +204,11 @@ def run(self):
return retval
class StreamingFileSystem(FileSystem):
-
+
def __init__(self, hadoop):
self.hadoop = hadoop
self.hdfs = hadoop + '/bin/hadoop fs'
-
+
def cat(self, path, opts):
streamingjar = findjar(self.hadoop, 'streaming',
opts['hadooplib'] if 'hadooplib' in opts else None)
@@ -237,23 +237,23 @@ def cat(self, path, opts):
except IOError:
pass # ignore
return 0
-
+
def ls(self, path, opts):
return execute("%s -ls '%s'" % (self.hdfs, path),
printcmd=False)
-
+
def exists(self, path, opts):
shellcmd = "%s -stat '%s' >/dev/null 2>&1"
return 1 - int(execute(shellcmd % (self.hdfs, path), printcmd=False) == 0)
-
+
def rm(self, path, opts):
return execute("%s -rmr '%s'" % (self.hdfs, path),
printcmd=False)
-
+
def put(self, path1, path2, opts):
return execute("%s -put '%s' '%s'" % (self.hdfs, path1,
path2), printcmd=False)
-
+
def get(self, path1, path2, opts):
return execute("%s -get '%s' '%s'" % (self.hdfs, path1,
path2), printcmd=False)
Something went wrong with that request. Please try again.