Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 310 lines (256 sloc) 7.924 kb
640886c Polymorphic MapReduce!
Eric Holk authored
1 /**
2 A parallel word-frequency counting program.
3
4 This is meant primarily to demonstrate Rust's MapReduce framework.
5
6 It takes a list of files on the command line and outputs a list of
7 words along with how many times each word is used.
8
9 */
10
11 use std;
12
e5d095d @catamorphism Change option::t to option
catamorphism authored
13 import option = option;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
14 import option::some;
15 import option::none;
16 import str;
640886c Polymorphic MapReduce!
Eric Holk authored
17 import std::treemap;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
18 import vec;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
19 import io;
20 import io::{reader_util, writer_util};
640886c Polymorphic MapReduce!
Eric Holk authored
21
22 import std::time;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
23 import u64;
24
25 import task;
26 import comm;
27 import comm::chan;
28 import comm::port;
29 import comm::recv;
30 import comm::send;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
31 import comm::methods;
32
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
33 trait word_reader {
34 fn read_word() -> option<str>;
35 }
36
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
37 // These used to be in task, but they disappeard.
38 type joinable_task = port<()>;
d1fc2b5 @brson Convert to new closure syntax
brson authored
39 fn spawn_joinable(+f: fn~()) -> joinable_task {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
40 let p = port();
41 let c = chan(p);
d1fc2b5 @brson Convert to new closure syntax
brson authored
42 do task::spawn() |move f| {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
43 f();
44 c.send(());
45 }
46 p
47 }
48
49 fn join(t: joinable_task) {
50 t.recv()
51 }
640886c Polymorphic MapReduce!
Eric Holk authored
52
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
53 impl of word_reader for io::reader {
54 fn read_word() -> option<str> { read_word(self) }
55 }
56
57 fn file_word_reader(filename: str) -> word_reader {
58 alt io::file_reader(filename) {
59 result::ok(f) { f as word_reader }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
60 result::err(e) { fail #fmt("%?", e) }
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
61 }
62 }
640886c Polymorphic MapReduce!
Eric Holk authored
63
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
64 fn map(f: fn~() -> word_reader, emit: map_reduce::putter<str, int>) {
65 let f = f();
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
66 loop {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
67 alt f.read_word() {
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
68 some(w) { emit(w, 1); }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
69 none { break; }
640886c Polymorphic MapReduce!
Eric Holk authored
70 }
71 }
72 }
73
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
74 fn reduce(&&word: str, get: map_reduce::getter<int>) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
75 let mut count = 0;
640886c Polymorphic MapReduce!
Eric Holk authored
76
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
77 loop { alt get() { some(_) { count += 1; } none { break; } } }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
78
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
79 io::println(#fmt("%s\t%?", word, count));
640886c Polymorphic MapReduce!
Eric Holk authored
80 }
81
82 mod map_reduce {
83 export putter;
84 export getter;
85 export mapper;
86 export reducer;
87 export map_reduce;
88
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
89 type putter<K: send, V: send> = fn(K, V);
640886c Polymorphic MapReduce!
Eric Holk authored
90
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
91 type mapper<K1: send, K2: send, V: send> = fn~(K1, putter<K2, V>);
640886c Polymorphic MapReduce!
Eric Holk authored
92
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
93 type getter<V: send> = fn() -> option<V>;
640886c Polymorphic MapReduce!
Eric Holk authored
94
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
95 type reducer<K: copy send, V: copy send> = fn~(K, getter<V>);
640886c Polymorphic MapReduce!
Eric Holk authored
96
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
97 enum ctrl_proto<K: copy send, V: copy send> {
98 find_reducer(K, chan<chan<reduce_proto<V>>>),
99 mapper_done
640886c Polymorphic MapReduce!
Eric Holk authored
100 }
101
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
102 enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
640886c Polymorphic MapReduce!
Eric Holk authored
103
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
104 fn start_mappers<K1: copy send, K2: copy send, V: copy send>(
105 map: mapper<K1, K2, V>,
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
106 ctrl: chan<ctrl_proto<K2, V>>, inputs: ~[K1])
107 -> ~[joinable_task]
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
108 {
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
109 let mut tasks = ~[];
d1fc2b5 @brson Convert to new closure syntax
brson authored
110 for inputs.each |i| {
111 tasks += ~[spawn_joinable(|| map_task(map, ctrl, i) )];
640886c Polymorphic MapReduce!
Eric Holk authored
112 }
113 ret tasks;
114 }
115
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
116 fn map_task<K1: copy send, K2: copy send, V: copy send>(
117 map: mapper<K1, K2, V>,
118 ctrl: chan<ctrl_proto<K2, V>>,
119 input: K1)
120 {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
121 // log(error, "map_task " + input);
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
122 let intermediates = treemap::treemap();
640886c Polymorphic MapReduce!
Eric Holk authored
123
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
124 fn emit<K2: copy send, V: copy send>(
125 im: treemap::treemap<K2, chan<reduce_proto<V>>>,
126 ctrl: chan<ctrl_proto<K2, V>>, key: K2, val: V)
127 {
640886c Polymorphic MapReduce!
Eric Holk authored
128 let c;
129 alt treemap::find(im, key) {
7298b8f @marijnh Insert omitted semicolons for statements
marijnh authored
130 some(_c) { c = _c; }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
131 none {
640886c Polymorphic MapReduce!
Eric Holk authored
132 let p = port();
133 send(ctrl, find_reducer(key, chan(p)));
134 c = recv(p);
135 treemap::insert(im, key, c);
136 send(c, ref);
137 }
138 }
139 send(c, emit_val(val));
140 }
141
4dcf84e @brson Remove bind. Issue #2189
brson authored
142 map(input, {|a,b|emit(intermediates, ctrl, a, b)});
640886c Polymorphic MapReduce!
Eric Holk authored
143
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
144 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
145 {
640886c Polymorphic MapReduce!
Eric Holk authored
146 send(v, release);
147 }
148 treemap::traverse(intermediates, finish);
149 send(ctrl, mapper_done);
150 }
151
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
152 fn reduce_task<K: copy send, V: copy send>(
153 reduce: reducer<K, V>,
154 key: K,
155 out: chan<chan<reduce_proto<V>>>)
156 {
640886c Polymorphic MapReduce!
Eric Holk authored
157 let p = port();
158
159 send(out, chan(p));
160
4dcf84e @brson Remove bind. Issue #2189
brson authored
161 let mut ref_count = 0;
162 let mut is_done = false;
640886c Polymorphic MapReduce!
Eric Holk authored
163
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
164 fn get<V: copy send>(p: port<reduce_proto<V>>,
165 &ref_count: int, &is_done: bool)
ca1df2b @marijnh Pretty-print for new arg-mode syntax
marijnh authored
166 -> option<V> {
640886c Polymorphic MapReduce!
Eric Holk authored
167 while !is_done || ref_count > 0 {
168 alt recv(p) {
169 emit_val(v) {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
170 // #error("received %d", v);
640886c Polymorphic MapReduce!
Eric Holk authored
171 ret some(v);
172 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
173 done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
174 // #error("all done");
640886c Polymorphic MapReduce!
Eric Holk authored
175 is_done = true;
176 }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
177 ref { ref_count += 1; }
178 release { ref_count -= 1; }
640886c Polymorphic MapReduce!
Eric Holk authored
179 }
180 }
181 ret none;
182 }
183
d1fc2b5 @brson Convert to new closure syntax
brson authored
184 reduce(key, || get(p, ref_count, is_done) );
640886c Polymorphic MapReduce!
Eric Holk authored
185 }
186
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
187 fn map_reduce<K1: copy send, K2: copy send, V: copy send>(
188 map: mapper<K1, K2, V>,
189 reduce: reducer<K2, V>,
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
190 inputs: ~[K1])
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
191 {
640886c Polymorphic MapReduce!
Eric Holk authored
192 let ctrl = port();
193
194 // This task becomes the master control task. It task::_spawns
195 // to do the rest.
196
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
197 let reducers = treemap::treemap();
198 let mut tasks = start_mappers(map, chan(ctrl), inputs);
199 let mut num_mappers = vec::len(inputs) as int;
640886c Polymorphic MapReduce!
Eric Holk authored
200
201 while num_mappers > 0 {
202 alt recv(ctrl) {
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
203 mapper_done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
204 // #error("received mapper terminated.");
640886c Polymorphic MapReduce!
Eric Holk authored
205 num_mappers -= 1;
206 }
207 find_reducer(k, cc) {
208 let c;
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
209 // log(error, "finding reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
210 alt treemap::find(reducers, k) {
211 some(_c) {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
212 // log(error,
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
213 // "reusing existing reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
214 c = _c;
215 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
216 none {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
217 // log(error, "creating new reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
218 let p = port();
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
219 let ch = chan(p);
640886c Polymorphic MapReduce!
Eric Holk authored
220 let r = reduce, kk = k;
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
221 tasks += ~[
d1fc2b5 @brson Convert to new closure syntax
brson authored
222 spawn_joinable(|| reduce_task(r, kk, ch) )
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
223 ];
640886c Polymorphic MapReduce!
Eric Holk authored
224 c = recv(p);
225 treemap::insert(reducers, k, c);
226 }
227 }
228 send(cc, c);
229 }
230 }
231 }
232
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
233 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
234 {
cfdf193 @marijnh Update our code to new type parameter kind syntax
marijnh authored
235 send(v, done);
236 }
640886c Polymorphic MapReduce!
Eric Holk authored
237 treemap::traverse(reducers, finish);
238
d1fc2b5 @brson Convert to new closure syntax
brson authored
239 for tasks.each |t| { join(t); }
640886c Polymorphic MapReduce!
Eric Holk authored
240 }
241 }
242
98e161f @msullivan Switch the compiler over to using ~[] notation instead of []/~. Close…
msullivan authored
243 fn main(argv: ~[str]) {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
244 if vec::len(argv) < 2u && !os::getenv("RUST_BENCH").is_some() {
640886c Polymorphic MapReduce!
Eric Holk authored
245 let out = io::stdout();
246
5c49e4f @brson Reformat. Issue #855
brson authored
247 out.write_line(#fmt["Usage: %s <filename> ...", argv[0]]);
640886c Polymorphic MapReduce!
Eric Holk authored
248
249 ret;
250 }
251
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
252 let readers: ~[fn~() -> word_reader] = if argv.len() >= 2 {
253 vec::view(argv, 1u, argv.len()).map(
254 |f| fn~() -> word_reader { file_word_reader(f) } )
255 }
256 else {
257 let num_readers = 50;
258 let words_per_reader = 400;
259 vec::from_fn(
260 num_readers,
261 |_i| fn~() -> word_reader {
262 random_word_reader(words_per_reader) as word_reader
263 })
264 };
265
640886c Polymorphic MapReduce!
Eric Holk authored
266 let start = time::precise_time_ns();
267
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
268 map_reduce::map_reduce(map, reduce, readers);
640886c Polymorphic MapReduce!
Eric Holk authored
269 let stop = time::precise_time_ns();
270
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
271 let elapsed = (stop - start) / 1000000u64;
640886c Polymorphic MapReduce!
Eric Holk authored
272
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
273 log(error, "MapReduce completed in "
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
274 + u64::str(elapsed) + "ms");
640886c Polymorphic MapReduce!
Eric Holk authored
275 }
276
5c49e4f @brson Reformat. Issue #855
brson authored
277 fn read_word(r: io::reader) -> option<str> {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
278 let mut w = "";
640886c Polymorphic MapReduce!
Eric Holk authored
279
280 while !r.eof() {
281 let c = r.read_char();
282
283 if is_word_char(c) {
ab6bb03 @brson Rename std::istr to std::str. Issue #855
brson authored
284 w += str::from_char(c);
5c49e4f @brson Reformat. Issue #855
brson authored
285 } else { if w != "" { ret some(w); } }
640886c Polymorphic MapReduce!
Eric Holk authored
286 }
287 ret none;
288 }
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
289
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
290 fn is_word_char(c: char) -> bool {
a00478b @eholk word-count-generic now generates random words in benchmark mode.
eholk authored
291 char::is_alphabetic(c) || char::is_digit(c) || c == '_'
292 }
293
294 class random_word_reader: word_reader {
295 let mut remaining: uint;
296 let rng: rand::rng;
297 new(count: uint) {
298 self.remaining = count;
299 self.rng = rand::rng();
300 }
301
302 fn read_word() -> option<str> {
303 if self.remaining > 0 {
304 self.remaining -= 1;
305 some(self.rng.gen_str(5))
306 }
307 else { none }
308 }
309 }
Something went wrong with that request. Please try again.