Skip to content
Newer
Older
100644 270 lines (221 sloc) 6.89 KB
640886c Polymorphic MapReduce!
Eric Holk authored
1 /**
2 A parallel word-frequency counting program.
3
4 This is meant primarily to demonstrate Rust's MapReduce framework.
5
6 It takes a list of files on the command line and outputs a list of
7 words along with how many times each word is used.
8
9 */
10
11 use std;
12
e5d095d @catamorphism Change option::t to option
catamorphism authored
13 import option = option;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
14 import option::some;
15 import option::none;
16 import str;
640886c Polymorphic MapReduce!
Eric Holk authored
17 import std::treemap;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
18 import vec;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
19 import io;
20 import io::{reader_util, writer_util};
640886c Polymorphic MapReduce!
Eric Holk authored
21
22 import std::time;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
23 import u64;
24
25 import task;
26 import comm;
27 import comm::chan;
28 import comm::port;
29 import comm::recv;
30 import comm::send;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
31 import comm::methods;
32
33 // These used to be in task, but they disappeard.
34 type joinable_task = port<()>;
35 fn spawn_joinable(f: fn~()) -> joinable_task {
36 let p = port();
37 let c = chan(p);
38 task::spawn() {||
39 f();
40 c.send(());
41 }
42 p
43 }
44
45 fn join(t: joinable_task) {
46 t.recv()
47 }
640886c Polymorphic MapReduce!
Eric Holk authored
48
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
49 fn map(&&filename: str, emit: map_reduce::putter<str, int>) {
50 let f = alt io::file_reader(filename) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
51 result::ok(f) { f }
52 result::err(e) { fail #fmt("%?", e) }
53 };
640886c Polymorphic MapReduce!
Eric Holk authored
54
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
55 loop {
640886c Polymorphic MapReduce!
Eric Holk authored
56 alt read_word(f) {
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
57 some(w) { emit(w, 1); }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
58 none { break; }
640886c Polymorphic MapReduce!
Eric Holk authored
59 }
60 }
61 }
62
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
63 fn reduce(&&word: str, get: map_reduce::getter<int>) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
64 let mut count = 0;
640886c Polymorphic MapReduce!
Eric Holk authored
65
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
66 loop { alt get() { some(_) { count += 1; } none { break; } } }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
67
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
68 io::println(#fmt("%s\t%?", word, count));
640886c Polymorphic MapReduce!
Eric Holk authored
69 }
70
71 mod map_reduce {
72 export putter;
73 export getter;
74 export mapper;
75 export reducer;
76 export map_reduce;
77
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
78 type putter<K: send, V: send> = fn(K, V);
640886c Polymorphic MapReduce!
Eric Holk authored
79
13d4b61 @catamorphism Comments only: annotate FIXMEs in tests
catamorphism authored
80 // FIXME: the first K1 parameter should probably be a - (#2599)
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
81 type mapper<K1: send, K2: send, V: send> = fn~(K1, putter<K2, V>);
640886c Polymorphic MapReduce!
Eric Holk authored
82
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
83 type getter<V: send> = fn() -> option<V>;
640886c Polymorphic MapReduce!
Eric Holk authored
84
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
85 type reducer<K: copy send, V: copy send> = fn~(K, getter<V>);
640886c Polymorphic MapReduce!
Eric Holk authored
86
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
87 enum ctrl_proto<K: copy send, V: copy send> {
88 find_reducer(K, chan<chan<reduce_proto<V>>>),
89 mapper_done
640886c Polymorphic MapReduce!
Eric Holk authored
90 }
91
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
92 enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
640886c Polymorphic MapReduce!
Eric Holk authored
93
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
94 fn start_mappers<K1: copy send, K2: copy send, V: copy send>(
95 map: mapper<K1, K2, V>,
96 ctrl: chan<ctrl_proto<K2, V>>, inputs: [K1])
97 -> [joinable_task]
98 {
99 let mut tasks = [];
c902eaf @marijnh Convert old-style for loops to new-style
marijnh authored
100 for inputs.each {|i|
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
101 tasks += [spawn_joinable {|| map_task(map, ctrl, i)}];
640886c Polymorphic MapReduce!
Eric Holk authored
102 }
103 ret tasks;
104 }
105
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
106 fn map_task<K1: copy send, K2: copy send, V: copy send>(
107 map: mapper<K1, K2, V>,
108 ctrl: chan<ctrl_proto<K2, V>>,
109 input: K1)
110 {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
111 // log(error, "map_task " + input);
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
112 let intermediates = treemap::treemap();
640886c Polymorphic MapReduce!
Eric Holk authored
113
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
114 fn emit<K2: copy send, V: copy send>(
115 im: treemap::treemap<K2, chan<reduce_proto<V>>>,
116 ctrl: chan<ctrl_proto<K2, V>>, key: K2, val: V)
117 {
640886c Polymorphic MapReduce!
Eric Holk authored
118 let c;
119 alt treemap::find(im, key) {
7298b8f @marijnh Insert omitted semicolons for statements
marijnh authored
120 some(_c) { c = _c; }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
121 none {
640886c Polymorphic MapReduce!
Eric Holk authored
122 let p = port();
123 send(ctrl, find_reducer(key, chan(p)));
124 c = recv(p);
125 treemap::insert(im, key, c);
126 send(c, ref);
127 }
128 }
129 send(c, emit_val(val));
130 }
131
4dcf84e @brson Remove bind. Issue #2189
brson authored
132 map(input, {|a,b|emit(intermediates, ctrl, a, b)});
640886c Polymorphic MapReduce!
Eric Holk authored
133
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
134 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
135 {
640886c Polymorphic MapReduce!
Eric Holk authored
136 send(v, release);
137 }
138 treemap::traverse(intermediates, finish);
139 send(ctrl, mapper_done);
140 }
141
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
142 fn reduce_task<K: copy send, V: copy send>(
143 reduce: reducer<K, V>,
144 key: K,
145 out: chan<chan<reduce_proto<V>>>)
146 {
640886c Polymorphic MapReduce!
Eric Holk authored
147 let p = port();
148
149 send(out, chan(p));
150
4dcf84e @brson Remove bind. Issue #2189
brson authored
151 let mut ref_count = 0;
152 let mut is_done = false;
640886c Polymorphic MapReduce!
Eric Holk authored
153
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
154 fn get<V: copy send>(p: port<reduce_proto<V>>,
155 &ref_count: int, &is_done: bool)
ca1df2b @marijnh Pretty-print for new arg-mode syntax
marijnh authored
156 -> option<V> {
640886c Polymorphic MapReduce!
Eric Holk authored
157 while !is_done || ref_count > 0 {
158 alt recv(p) {
159 emit_val(v) {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
160 // #error("received %d", v);
640886c Polymorphic MapReduce!
Eric Holk authored
161 ret some(v);
162 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
163 done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
164 // #error("all done");
640886c Polymorphic MapReduce!
Eric Holk authored
165 is_done = true;
166 }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
167 ref { ref_count += 1; }
168 release { ref_count -= 1; }
640886c Polymorphic MapReduce!
Eric Holk authored
169 }
170 }
171 ret none;
172 }
173
4dcf84e @brson Remove bind. Issue #2189
brson authored
174 reduce(key, {||get(p, ref_count, is_done)});
640886c Polymorphic MapReduce!
Eric Holk authored
175 }
176
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
177 fn map_reduce<K1: copy send, K2: copy send, V: copy send>(
178 map: mapper<K1, K2, V>,
179 reduce: reducer<K2, V>,
180 inputs: [K1])
181 {
640886c Polymorphic MapReduce!
Eric Holk authored
182 let ctrl = port();
183
184 // This task becomes the master control task. It task::_spawns
185 // to do the rest.
186
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
187 let reducers = treemap::treemap();
188 let mut tasks = start_mappers(map, chan(ctrl), inputs);
189 let mut num_mappers = vec::len(inputs) as int;
640886c Polymorphic MapReduce!
Eric Holk authored
190
191 while num_mappers > 0 {
192 alt recv(ctrl) {
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
193 mapper_done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
194 // #error("received mapper terminated.");
640886c Polymorphic MapReduce!
Eric Holk authored
195 num_mappers -= 1;
196 }
197 find_reducer(k, cc) {
198 let c;
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
199 // log(error, "finding reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
200 alt treemap::find(reducers, k) {
201 some(_c) {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
202 // log(error,
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
203 // "reusing existing reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
204 c = _c;
205 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
206 none {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
207 // log(error, "creating new reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
208 let p = port();
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
209 let ch = chan(p);
640886c Polymorphic MapReduce!
Eric Holk authored
210 let r = reduce, kk = k;
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
211 tasks += [
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
212 spawn_joinable {|| reduce_task(r, kk, ch) }
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
213 ];
640886c Polymorphic MapReduce!
Eric Holk authored
214 c = recv(p);
215 treemap::insert(reducers, k, c);
216 }
217 }
218 send(cc, c);
219 }
220 }
221 }
222
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
223 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
224 {
cfdf193 @marijnh Update our code to new type parameter kind syntax
marijnh authored
225 send(v, done);
226 }
640886c Polymorphic MapReduce!
Eric Holk authored
227 treemap::traverse(reducers, finish);
228
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
229 for tasks.each {|t| join(t); }
640886c Polymorphic MapReduce!
Eric Holk authored
230 }
231 }
232
5c49e4f @brson Reformat. Issue #855
brson authored
233 fn main(argv: [str]) {
640886c Polymorphic MapReduce!
Eric Holk authored
234 if vec::len(argv) < 2u {
235 let out = io::stdout();
236
5c49e4f @brson Reformat. Issue #855
brson authored
237 out.write_line(#fmt["Usage: %s <filename> ...", argv[0]]);
640886c Polymorphic MapReduce!
Eric Holk authored
238
239 // TODO: run something just to make sure the code hasn't
240 // broken yet. This is the unit test mode of this program.
241
242 ret;
243 }
244
245 let start = time::precise_time_ns();
246
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
247 map_reduce::map_reduce(map, reduce, vec::slice(argv, 1u, argv.len()));
640886c Polymorphic MapReduce!
Eric Holk authored
248 let stop = time::precise_time_ns();
249
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
250 let elapsed = (stop - start) / 1000000u64;
640886c Polymorphic MapReduce!
Eric Holk authored
251
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
252 log(error, "MapReduce completed in "
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
253 + u64::str(elapsed) + "ms");
640886c Polymorphic MapReduce!
Eric Holk authored
254 }
255
5c49e4f @brson Reformat. Issue #855
brson authored
256 fn read_word(r: io::reader) -> option<str> {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1740
eholk authored
257 let mut w = "";
640886c Polymorphic MapReduce!
Eric Holk authored
258
259 while !r.eof() {
260 let c = r.read_char();
261
262 if is_word_char(c) {
ab6bb03 @brson Rename std::istr to std::str. Issue #855
brson authored
263 w += str::from_char(c);
5c49e4f @brson Reformat. Issue #855
brson authored
264 } else { if w != "" { ret some(w); } }
640886c Polymorphic MapReduce!
Eric Holk authored
265 }
266 ret none;
267 }
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
268 fn is_word_char(c: char) -> bool {
269 char::is_alphabetic(c) || char::is_digit(c) || c == '_' }
Something went wrong with that request. Please try again.