Skip to content
Newer
Older
100644 371 lines (305 sloc) 9.38 KB
640886c Polymorphic MapReduce!
Eric Holk authored
1 /**
2 A parallel word-frequency counting program.
3
4 This is meant primarily to demonstrate Rust's MapReduce framework.
5
6 It takes a list of files on the command line and outputs a list of
7 words along with how many times each word is used.
8
9 */
10
05543fd @eholk Make tests pass
eholk authored
11 // xfail-pretty
12
640886c Polymorphic MapReduce!
Eric Holk authored
13 use std;
14
e5d095d @catamorphism Change option::t to option
catamorphism authored
15 import option = option;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
16 import option::some;
17 import option::none;
18 import str;
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
19 import std::map;
20 import std::map::hashmap;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
21 import vec;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
22 import io;
23 import io::{reader_util, writer_util};
640886c Polymorphic MapReduce!
Eric Holk authored
24
25 import std::time;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
26 import u64;
27
28 import task;
29 import comm;
30 import comm::chan;
31 import comm::port;
32 import comm::recv;
33 import comm::send;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
34 import comm::methods;
35
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
36 macro_rules! move {
37 { $x:expr } => { unsafe { let y <- *ptr::addr_of($x); y } }
38 }
39
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
40 trait word_reader {
41 fn read_word() -> option<str>;
42 }
43
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
44 trait hash_key {
45 fn hash() -> uint;
46 fn eq(self) -> bool;
47 }
48
49 fn mk_hash<K: const hash_key, V: copy>() -> map::hashmap<K, V> {
50 fn hashfn<K: const hash_key>(k: K) -> uint { k.hash() }
51
52 map::hashmap(hashfn::<K>, |x, y| x.eq(y))
53 }
54
55 impl of hash_key for str {
56 fn hash() -> uint { str::hash(self) }
57 fn eq(&&x: str) -> bool { str::eq(self, x) }
58 }
59
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
60 // These used to be in task, but they disappeard.
61 type joinable_task = port<()>;
d1fc2b5 @brson Convert to new closure syntax
brson authored
62 fn spawn_joinable(+f: fn~()) -> joinable_task {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
63 let p = port();
64 let c = chan(p);
d1fc2b5 @brson Convert to new closure syntax
brson authored
65 do task::spawn() |move f| {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
66 f();
67 c.send(());
68 }
69 p
70 }
71
72 fn join(t: joinable_task) {
73 t.recv()
74 }
640886c Polymorphic MapReduce!
Eric Holk authored
75
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
76 impl of word_reader for io::reader {
77 fn read_word() -> option<str> { read_word(self) }
78 }
79
80 fn file_word_reader(filename: str) -> word_reader {
81 alt io::file_reader(filename) {
82 result::ok(f) { f as word_reader }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
83 result::err(e) { fail #fmt("%?", e) }
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
84 }
85 }
640886c Polymorphic MapReduce!
Eric Holk authored
86
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
87 fn map(f: fn~() -> word_reader, emit: map_reduce::putter<str, int>) {
88 let f = f();
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
89 loop {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
90 alt f.read_word() {
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
91 some(w) { emit(w, 1); }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
92 none { break; }
640886c Polymorphic MapReduce!
Eric Holk authored
93 }
94 }
95 }
96
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
97 fn reduce(&&word: str, get: map_reduce::getter<int>) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
98 let mut count = 0;
640886c Polymorphic MapReduce!
Eric Holk authored
99
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
100 loop { alt get() { some(_) { count += 1; } none { break; } } }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
101
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
102 io::println(#fmt("%s\t%?", word, count));
640886c Polymorphic MapReduce!
Eric Holk authored
103 }
104
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
105 class box<T> {
106 let mut contents: option<T>;
107 new(+x: T) { self.contents = some(x); }
108
109 fn swap(f: fn(+T) -> T) {
110 let mut tmp = none;
111 self.contents <-> tmp;
112 self.contents = some(f(option::unwrap(tmp)));
113 }
114
115 fn unwrap() -> T {
116 let mut tmp = none;
117 self.contents <-> tmp;
118 option::unwrap(tmp)
119 }
120 }
121
640886c Polymorphic MapReduce!
Eric Holk authored
122 mod map_reduce {
123 export putter;
124 export getter;
125 export mapper;
126 export reducer;
127 export map_reduce;
128
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
129 type putter<K: send, V: send> = fn(K, V);
640886c Polymorphic MapReduce!
Eric Holk authored
130
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
131 type mapper<K1: send, K2: send, V: send> = fn~(K1, putter<K2, V>);
640886c Polymorphic MapReduce!
Eric Holk authored
132
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
133 type getter<V: send> = fn() -> option<V>;
640886c Polymorphic MapReduce!
Eric Holk authored
134
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
135 type reducer<K: copy send, V: copy send> = fn~(K, getter<V>);
640886c Polymorphic MapReduce!
Eric Holk authored
136
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
137 enum ctrl_proto<K: copy send, V: copy send> {
138 find_reducer(K, chan<chan<reduce_proto<V>>>),
139 mapper_done
640886c Polymorphic MapReduce!
Eric Holk authored
140 }
141
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
142
143 proto! ctrl_proto {
144 open: send<K: copy send, V: copy send> {
145 find_reducer(K) -> reducer_response<K, V>,
146 mapper_done -> terminated
147 }
148
149 reducer_response: recv<K: copy send, V: copy send> {
150 reducer(chan<reduce_proto<V>>) -> open<K, V>
151 }
152
153 terminated: send { }
154 }
155
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
156 enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
640886c Polymorphic MapReduce!
Eric Holk authored
157
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
158 fn start_mappers<K1: copy send, K2: const copy send hash_key,
159 V: copy send>(
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
160 map: mapper<K1, K2, V>,
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
161 &ctrls: ~[ctrl_proto::server::open<K2, V>],
162 inputs: ~[K1])
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes
msullivan authored
163 -> ~[joinable_task]
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
164 {
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes
msullivan authored
165 let mut tasks = ~[];
d1fc2b5 @brson Convert to new closure syntax
brson authored
166 for inputs.each |i| {
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
167 let (ctrl, ctrl_server) = ctrl_proto::init();
168 let ctrl = box(ctrl);
bf88ff5 @eholk Remove slow vec+=, and make word-count difficulty harder.
eholk authored
169 vec::push(tasks, spawn_joinable(|| map_task(map, ctrl, i) ));
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
170 vec::push(ctrls, ctrl_server);
640886c Polymorphic MapReduce!
Eric Holk authored
171 }
172 ret tasks;
173 }
174
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
175 fn map_task<K1: copy send, K2: const copy send hash_key, V: copy send>(
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
176 map: mapper<K1, K2, V>,
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
177 ctrl: box<ctrl_proto::client::open<K2, V>>,
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
178 input: K1)
179 {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
180 // log(error, "map_task " + input);
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
181 let intermediates = mk_hash();
640886c Polymorphic MapReduce!
Eric Holk authored
182
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
183 do map(input) |key, val| {
184 let mut c = none;
185 alt intermediates.find(key) {
186 some(_c) { c = some(_c); }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
187 none {
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
188 do ctrl.swap |ctrl| {
189 let ctrl = ctrl_proto::client::find_reducer(ctrl, key);
190 alt pipes::recv(ctrl) {
191 ctrl_proto::reducer(c_, ctrl) {
192 c = some(c_);
193 move!{ctrl}
194 }
195 }
196 }
197 intermediates.insert(key, c.get());
198 send(c.get(), ref);
640886c Polymorphic MapReduce!
Eric Holk authored
199 }
200 }
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
201 send(c.get(), emit_val(val));
640886c Polymorphic MapReduce!
Eric Holk authored
202 }
203
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
204 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
205 {
640886c Polymorphic MapReduce!
Eric Holk authored
206 send(v, release);
207 }
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
208 for intermediates.each_value |v| { send(v, release) }
209 ctrl_proto::client::mapper_done(ctrl.unwrap());
640886c Polymorphic MapReduce!
Eric Holk authored
210 }
211
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
212 fn reduce_task<K: copy send, V: copy send>(
213 reduce: reducer<K, V>,
214 key: K,
215 out: chan<chan<reduce_proto<V>>>)
216 {
640886c Polymorphic MapReduce!
Eric Holk authored
217 let p = port();
218
219 send(out, chan(p));
220
4dcf84e @brson Remove bind. Issue #2189
brson authored
221 let mut ref_count = 0;
222 let mut is_done = false;
640886c Polymorphic MapReduce!
Eric Holk authored
223
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
224 fn get<V: copy send>(p: port<reduce_proto<V>>,
225 &ref_count: int, &is_done: bool)
ca1df2b @marijnh Pretty-print for new arg-mode syntax
marijnh authored
226 -> option<V> {
640886c Polymorphic MapReduce!
Eric Holk authored
227 while !is_done || ref_count > 0 {
228 alt recv(p) {
229 emit_val(v) {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
230 // #error("received %d", v);
640886c Polymorphic MapReduce!
Eric Holk authored
231 ret some(v);
232 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
233 done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
234 // #error("all done");
640886c Polymorphic MapReduce!
Eric Holk authored
235 is_done = true;
236 }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
237 ref { ref_count += 1; }
238 release { ref_count -= 1; }
640886c Polymorphic MapReduce!
Eric Holk authored
239 }
240 }
241 ret none;
242 }
243
d1fc2b5 @brson Convert to new closure syntax
brson authored
244 reduce(key, || get(p, ref_count, is_done) );
640886c Polymorphic MapReduce!
Eric Holk authored
245 }
246
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
247 fn map_reduce<K1: copy send, K2: const copy send hash_key, V: copy send>(
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
248 map: mapper<K1, K2, V>,
249 reduce: reducer<K2, V>,
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes
msullivan authored
250 inputs: ~[K1])
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
251 {
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
252 let mut ctrl = ~[];
640886c Polymorphic MapReduce!
Eric Holk authored
253
254 // This task becomes the master control task. It task::_spawns
255 // to do the rest.
256
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
257 let reducers = mk_hash();
258 let mut tasks = start_mappers(map, ctrl, inputs);
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
259 let mut num_mappers = vec::len(inputs) as int;
640886c Polymorphic MapReduce!
Eric Holk authored
260
261 while num_mappers > 0 {
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
262 let (_ready, message, ctrls) = pipes::select(ctrl);
263 alt option::unwrap(message) {
264 ctrl_proto::mapper_done(_) {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
265 // #error("received mapper terminated.");
640886c Polymorphic MapReduce!
Eric Holk authored
266 num_mappers -= 1;
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
267 ctrl = ctrls;
640886c Polymorphic MapReduce!
Eric Holk authored
268 }
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
269 ctrl_proto::find_reducer(k, cc) {
640886c Polymorphic MapReduce!
Eric Holk authored
270 let c;
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
271 // log(error, "finding reducer for " + k);
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
272 alt reducers.find(k) {
640886c Polymorphic MapReduce!
Eric Holk authored
273 some(_c) {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
274 // log(error,
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
275 // "reusing existing reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
276 c = _c;
277 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
278 none {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
279 // log(error, "creating new reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
280 let p = port();
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
281 let ch = chan(p);
640886c Polymorphic MapReduce!
Eric Holk authored
282 let r = reduce, kk = k;
bf88ff5 @eholk Remove slow vec+=, and make word-count difficulty harder.
eholk authored
283 vec::push(tasks,
284 spawn_joinable(|| reduce_task(r, kk, ch) ));
640886c Polymorphic MapReduce!
Eric Holk authored
285 c = recv(p);
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
286 reducers.insert(k, c);
640886c Polymorphic MapReduce!
Eric Holk authored
287 }
288 }
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
289 ctrl = vec::append_one(
290 ctrls,
291 ctrl_proto::server::reducer(move!{cc}, c));
640886c Polymorphic MapReduce!
Eric Holk authored
292 }
293 }
294 }
295
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
296 for reducers.each_value |v| { send(v, done) }
640886c Polymorphic MapReduce!
Eric Holk authored
297
d1fc2b5 @brson Convert to new closure syntax
brson authored
298 for tasks.each |t| { join(t); }
640886c Polymorphic MapReduce!
Eric Holk authored
299 }
300 }
301
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Closes
msullivan authored
302 fn main(argv: ~[str]) {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
303 if vec::len(argv) < 2u && !os::getenv("RUST_BENCH").is_some() {
640886c Polymorphic MapReduce!
Eric Holk authored
304 let out = io::stdout();
305
5c49e4f @brson Reformat. Issue #855
brson authored
306 out.write_line(#fmt["Usage: %s <filename> ...", argv[0]]);
640886c Polymorphic MapReduce!
Eric Holk authored
307
308 ret;
309 }
310
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
311 let readers: ~[fn~() -> word_reader] = if argv.len() >= 2 {
05543fd @eholk Make tests pass
eholk authored
312 // FIXME (#2880)
313 vec::slice(argv, 1u, argv.len()).map(
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
314 |f| fn~() -> word_reader { file_word_reader(f) } )
315 }
316 else {
317 let num_readers = 50;
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
318 let words_per_reader = 600;
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
319 vec::from_fn(
320 num_readers,
321 |_i| fn~() -> word_reader {
322 random_word_reader(words_per_reader) as word_reader
323 })
324 };
325
640886c Polymorphic MapReduce!
Eric Holk authored
326 let start = time::precise_time_ns();
327
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
328 map_reduce::map_reduce(map, reduce, readers);
640886c Polymorphic MapReduce!
Eric Holk authored
329 let stop = time::precise_time_ns();
330
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
331 let elapsed = (stop - start) / 1000000u64;
640886c Polymorphic MapReduce!
Eric Holk authored
332
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
333 log(error, "MapReduce completed in "
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
334 + u64::str(elapsed) + "ms");
640886c Polymorphic MapReduce!
Eric Holk authored
335 }
336
5c49e4f @brson Reformat. Issue #855
brson authored
337 fn read_word(r: io::reader) -> option<str> {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
338 let mut w = "";
640886c Polymorphic MapReduce!
Eric Holk authored
339
340 while !r.eof() {
341 let c = r.read_char();
342
343 if is_word_char(c) {
ab6bb03 @brson Rename std::istr to std::str. Issue #855
brson authored
344 w += str::from_char(c);
5c49e4f @brson Reformat. Issue #855
brson authored
345 } else { if w != "" { ret some(w); } }
640886c Polymorphic MapReduce!
Eric Holk authored
346 }
347 ret none;
348 }
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
349
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
350 fn is_word_char(c: char) -> bool {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
351 char::is_alphabetic(c) || char::is_digit(c) || c == '_'
352 }
353
354 class random_word_reader: word_reader {
355 let mut remaining: uint;
356 let rng: rand::rng;
357 new(count: uint) {
358 self.remaining = count;
359 self.rng = rand::rng();
360 }
361
362 fn read_word() -> option<str> {
363 if self.remaining > 0 {
364 self.remaining -= 1;
1a276db @eholk Switch map-reduce control protocol to use pipes. This exposed a bug i…
eholk authored
365 let len = self.rng.gen_uint_range(1, 4);
366 some(self.rng.gen_str(len))
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
367 }
368 else { none }
369 }
370 }
Something went wrong with that request. Please try again.