diff --git a/tests/mr_os_walk_job.py b/tests/mr_os_walk_job.py new file mode 100644 index 000000000..be40218a9 --- /dev/null +++ b/tests/mr_os_walk_job.py @@ -0,0 +1,39 @@ +# Copyright 2013 David Marin +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import os.path + +from mrjob.job import MRJob +from mrjob.protocol import RawValueProtocol + + +class MROSWalkJob(MRJob): + """Recursively return the name and size of each file in the current dir.""" + + def mapper_final(self): + # hook for test_local.LocalRunnerSetupTestCase.test_python_archive() + try: + import foo + except ImportError: + pass + + for dirpath, _, filenames in os.walk('.'): + for filename in filenames: + path = os.path.join(dirpath, filename) + size = os.path.getsize(path) + yield path, size + + +if __name__ == '__main__': + MROSWalkJob.run() diff --git a/tests/test_local.py b/tests/test_local.py index a1def7c58..1134dd2d1 100644 --- a/tests/test_local.py +++ b/tests/test_local.py @@ -1,4 +1,4 @@ -# Copyright 2009-2012 Yelp and Contributors +# Copyright 2009-2013 Yelp and Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -36,15 +36,17 @@ from mrjob.util import bash_wrap from mrjob.util import cmd_line from mrjob.util import read_file +from mrjob.util import tar_and_gzip from tests.mr_cmd_job import CmdJob from tests.mr_counting_job import MRCountingJob from tests.mr_exit_42_job import MRExit42Job from tests.mr_filter_job import FilterJob from tests.mr_job_where_are_you import MRJobWhereAreYou +from tests.mr_os_walk_job import MROSWalkJob from tests.mr_test_jobconf import MRTestJobConf -from tests.mr_word_count import MRWordCount from tests.mr_two_step_job import MRTwoStepJob from tests.mr_verbose_job import MRVerboseJob +from tests.mr_word_count import MRWordCount from tests.quiet import no_handlers_for_logger from tests.sandbox import mrjob_conf_patcher from tests.sandbox import EmptyMrjobConfTestCase @@ -753,3 +755,113 @@ def test_reducer_pre_filter(self): lines = [line.strip() for line in list(r.stream_output())] self.assertItemsEqual(lines, ['x$', 'y$', 'z$']) + + +class LocalRunnerSetupTestCase(SandboxedTestCase): + + def setUp(self): + super(LocalRunnerSetupTestCase, self).setUp() + + os.mkdir(os.path.join(self.tmp_dir, 'foo')) + + self.foo_py = os.path.join(self.tmp_dir, 'foo', 'foo.py') + + # if our job can import foo, getsize will return 2x as many bytes + with open(self.foo_py, 'w') as foo_py: + foo_py.write('import os.path\n' + 'from os.path import getsize as _real_getsize\n' + 'os.path.getsize = lambda p: _real_getsize(p) * 2') + + self.foo_sh = os.path.join(self.tmp_dir, 'foo', 'foo.sh') + + with open(self.foo_sh, 'w') as foo_sh: + foo_sh.write('#!/bin/sh\n' + 'touch foo.sh-made-this\n') + os.chmod(self.foo_sh, stat.S_IRWXU) + + self.foo_tar_gz = os.path.join(self.tmp_dir, 'foo.tar.gz') + tar_and_gzip(os.path.join(self.tmp_dir, 'foo'), self.foo_tar_gz) + + self.foo_py_size = os.path.getsize(self.foo_py) + self.foo_sh_size = os.path.getsize(self.foo_sh) + self.foo_tar_gz_size = os.path.getsize(self.foo_tar_gz) + + def test_file_upload(self): + job = MROSWalkJob(['--runner=local', '--no-bootstrap-mrjob', + '--file', self.foo_sh, + '--file', self.foo_sh + '#bar.sh', + ]) + job.sandbox() + + with job.make_runner() as r: + r.run() + + path_to_size = dict(job.parse_output_line(line) + for line in r.stream_output()) + + self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size) + self.assertEqual(path_to_size.get('./bar.sh'), self.foo_sh_size) + + def test_archive_upload(self): + job = MROSWalkJob(['--runner=local', '--no-bootstrap-mrjob', + '--archive', self.foo_tar_gz, + '--archive', self.foo_tar_gz + '#foo', + ]) + job.sandbox() + + with job.make_runner() as r: + r.run() + + path_to_size = dict(job.parse_output_line(line) + for line in r.stream_output()) + + self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'), + self.foo_py_size) + self.assertEqual(path_to_size.get('./foo/foo.py'), + self.foo_py_size) + + def test_python_archive(self): + job = MROSWalkJob( + ['--runner=local', '--no-bootstrap-mrjob', + '--python-archive', self.foo_tar_gz]) + job.sandbox() + + with job.make_runner() as r: + r.run() + + path_to_size = dict(job.parse_output_line(line) + for line in r.stream_output()) + + # foo.py should be there, and getsize() should be patched to return + # double the number of bytes + self.assertEqual(path_to_size.get('./foo.tar.gz/foo.py'), + self.foo_py_size * 2) + + def test_setup_cmd(self): + job = MROSWalkJob( + ['--runner=local', '--no-bootstrap-mrjob', + '--setup-cmd', 'touch bar']) + job.sandbox() + + with job.make_runner() as r: + r.run() + + path_to_size = dict(job.parse_output_line(line) + for line in r.stream_output()) + + self.assertIn('./bar', path_to_size) + + def test_setup_script(self): + job = MROSWalkJob( + ['--runner=local', '--no-bootstrap-mrjob', + '--setup-script', self.foo_sh]) + job.sandbox() + + with job.make_runner() as r: + r.run() + + path_to_size = dict(job.parse_output_line(line) + for line in r.stream_output()) + + self.assertEqual(path_to_size.get('./foo.sh'), self.foo_sh_size) + self.assertIn('./foo.sh-made-this', path_to_size)