forked from discoproject/disco
/
test_input.py
70 lines (56 loc) · 2.35 KB
/
test_input.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from disco.job import JobChain
from disco.error import JobError
from disco.test import TestCase, TestJob
from disco.worker import task_io
class MapJob(TestJob):
partitions = 3
@staticmethod
def map(e, params):
yield e, 'against_me'
class ReduceJob(TestJob):
@staticmethod
def reduce(iter, params):
for item in iter:
yield item, 'mmm'
class MergeReduceJob(ReduceJob):
pass
class MapReduceJob(MapJob, ReduceJob):
pass
class InputTestCase(TestCase):
def serve(self, path):
return b'smoothies'
def test_empty_map(self):
self.job = MapJob().run(input=[])
self.assertResults(self.job, [])
def test_empty_reduce(self):
self.job = ReduceJob().run(input=[])
self.assertResults(self.job, [])
def test_empty_mapreduce(self):
self.job = MapReduceJob().run(input=[])
self.assertResults(self.job, [])
def test_partitioned_map(self):
self.job = MapJob().run(input=['raw://organic_vodka'], partitions=2)
self.assertResults(self.job, [('organic_vodka', 'against_me')])
def test_nonpartitioned_map(self):
self.job = MapJob().run(input=['raw://organic_vodka'], partitions=None)
self.assertResults(self.job, [('organic_vodka', 'against_me')])
def test_nonpartitioned_reduce(self):
self.job = ReduceJob().run(input=self.test_server.urls(['test']),
partitions=None,
reduce_reader=None)
self.assertResults(self.job, [(b'smoothies', 'mmm')])
def test_partitioned_mapreduce(self):
self.job = MapReduceJob().run(input=self.test_server.urls(['test']),
partitions=8,
reduce_reader=task_io.chain_reader)
self.assertResults(self.job, [((b'smoothies', 'against_me'), 'mmm')])
def test_partitioned_reduce(self):
beers = ['sam_adams', 'trader_jose', 'boont_esb']
input = ['raw://{0}'.format(beer) for beer in beers]
a, b, c, d = MapJob(), MapJob(), ReduceJob(), MergeReduceJob()
self.job = JobChain({a: input,
b: input,
c: [a, b],
d: [a, b]})
self.job.wait()
self.assertAllEqual(sorted(self.results(c)), sorted(self.results(d)))