Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 271 lines (222 sloc) 7.081 kb
640886c Polymorphic MapReduce!
Eric Holk authored
1 /**
2 A parallel word-frequency counting program.
3
4 This is meant primarily to demonstrate Rust's MapReduce framework.
5
6 It takes a list of files on the command line and outputs a list of
7 words along with how many times each word is used.
8
9 */
10
11 use std;
12
e5d095d @catamorphism Change option::t to option
catamorphism authored
13 import option = option;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
14 import option::some;
15 import option::none;
16 import str;
640886c Polymorphic MapReduce!
Eric Holk authored
17 import std::treemap;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
18 import vec;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
19 import io;
20 import io::{reader_util, writer_util};
640886c Polymorphic MapReduce!
Eric Holk authored
21
22 import std::time;
fa9ad98 @graydon Copy first batch of material from libstd to libcore.
graydon authored
23 import u64;
24
25 import task;
26 import comm;
27 import comm::chan;
28 import comm::port;
29 import comm::recv;
30 import comm::send;
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
31 import comm::methods;
32
33 // These used to be in task, but they disappeard.
34 type joinable_task = port<()>;
35 fn spawn_joinable(f: fn~()) -> joinable_task {
36 let p = port();
37 let c = chan(p);
38 task::spawn() {||
39 f();
40 c.send(());
41 }
42 p
43 }
44
45 fn join(t: joinable_task) {
46 t.recv()
47 }
640886c Polymorphic MapReduce!
Eric Holk authored
48
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
49 fn map(&&filename: str, emit: map_reduce::putter<str, int>) {
50 let f = alt io::file_reader(filename) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
51 result::ok(f) { f }
52 result::err(e) { fail #fmt("%?", e) }
53 };
640886c Polymorphic MapReduce!
Eric Holk authored
54
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
55 loop {
640886c Polymorphic MapReduce!
Eric Holk authored
56 alt read_word(f) {
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
57 some(w) { emit(w, 1); }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
58 none { break; }
640886c Polymorphic MapReduce!
Eric Holk authored
59 }
60 }
61 }
62
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
63 fn reduce(&&word: str, get: map_reduce::getter<int>) {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
64 let mut count = 0;
640886c Polymorphic MapReduce!
Eric Holk authored
65
321fd80 @catamorphism Add an infinite loop construct
catamorphism authored
66 loop { alt get() { some(_) { count += 1; } none { break; } } }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
67
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
68 io::println(#fmt("%s\t%?", word, count));
640886c Polymorphic MapReduce!
Eric Holk authored
69 }
70
71 mod map_reduce {
72 export putter;
73 export getter;
74 export mapper;
75 export reducer;
76 export map_reduce;
77
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
78 type putter<K: send, V: send> = fn(K, V);
640886c Polymorphic MapReduce!
Eric Holk authored
79
80 // FIXME: the first K1 parameter should probably be a -, but that
81 // doesn't parse at the moment.
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
82 type mapper<K1: send, K2: send, V: send> = fn~(K1, putter<K2, V>);
640886c Polymorphic MapReduce!
Eric Holk authored
83
60ae159 @marijnh Switch to new param kind bound syntax
marijnh authored
84 type getter<V: send> = fn() -> option<V>;
640886c Polymorphic MapReduce!
Eric Holk authored
85
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
86 type reducer<K: copy send, V: copy send> = fn~(K, getter<V>);
640886c Polymorphic MapReduce!
Eric Holk authored
87
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
88 enum ctrl_proto<K: copy send, V: copy send> {
89 find_reducer(K, chan<chan<reduce_proto<V>>>),
90 mapper_done
640886c Polymorphic MapReduce!
Eric Holk authored
91 }
92
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
93 enum reduce_proto<V: copy send> { emit_val(V), done, ref, release }
640886c Polymorphic MapReduce!
Eric Holk authored
94
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
95 fn start_mappers<K1: copy send, K2: copy send, V: copy send>(
96 map: mapper<K1, K2, V>,
97 ctrl: chan<ctrl_proto<K2, V>>, inputs: [K1])
98 -> [joinable_task]
99 {
100 let mut tasks = [];
c902eaf @marijnh Convert old-style for loops to new-style
marijnh authored
101 for inputs.each {|i|
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
102 tasks += [spawn_joinable {|| map_task(map, ctrl, i)}];
640886c Polymorphic MapReduce!
Eric Holk authored
103 }
104 ret tasks;
105 }
106
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
107 fn map_task<K1: copy send, K2: copy send, V: copy send>(
108 map: mapper<K1, K2, V>,
109 ctrl: chan<ctrl_proto<K2, V>>,
110 input: K1)
111 {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
112 // log(error, "map_task " + input);
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
113 let intermediates = treemap::treemap();
640886c Polymorphic MapReduce!
Eric Holk authored
114
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
115 fn emit<K2: copy send, V: copy send>(
116 im: treemap::treemap<K2, chan<reduce_proto<V>>>,
117 ctrl: chan<ctrl_proto<K2, V>>, key: K2, val: V)
118 {
640886c Polymorphic MapReduce!
Eric Holk authored
119 let c;
120 alt treemap::find(im, key) {
7298b8f @marijnh Insert omitted semicolons for statements
marijnh authored
121 some(_c) { c = _c; }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
122 none {
640886c Polymorphic MapReduce!
Eric Holk authored
123 let p = port();
124 send(ctrl, find_reducer(key, chan(p)));
125 c = recv(p);
126 treemap::insert(im, key, c);
127 send(c, ref);
128 }
129 }
130 send(c, emit_val(val));
131 }
132
133 map(input, bind emit(intermediates, ctrl, _, _));
134
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
135 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
136 {
640886c Polymorphic MapReduce!
Eric Holk authored
137 send(v, release);
138 }
139 treemap::traverse(intermediates, finish);
140 send(ctrl, mapper_done);
141 }
142
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
143 fn reduce_task<K: copy send, V: copy send>(
144 reduce: reducer<K, V>,
145 key: K,
146 out: chan<chan<reduce_proto<V>>>)
147 {
640886c Polymorphic MapReduce!
Eric Holk authored
148 let p = port();
149
150 send(out, chan(p));
151
152 let ref_count = 0;
153 let is_done = false;
154
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
155 fn get<V: copy send>(p: port<reduce_proto<V>>,
156 &ref_count: int, &is_done: bool)
ca1df2b @marijnh Pretty-print for new arg-mode syntax
marijnh authored
157 -> option<V> {
640886c Polymorphic MapReduce!
Eric Holk authored
158 while !is_done || ref_count > 0 {
159 alt recv(p) {
160 emit_val(v) {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
161 // #error("received %d", v);
640886c Polymorphic MapReduce!
Eric Holk authored
162 ret some(v);
163 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
164 done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
165 // #error("all done");
640886c Polymorphic MapReduce!
Eric Holk authored
166 is_done = true;
167 }
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
168 ref { ref_count += 1; }
169 release { ref_count -= 1; }
640886c Polymorphic MapReduce!
Eric Holk authored
170 }
171 }
172 ret none;
173 }
174
175 reduce(key, bind get(p, ref_count, is_done));
176 }
177
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
178 fn map_reduce<K1: copy send, K2: copy send, V: copy send>(
179 map: mapper<K1, K2, V>,
180 reduce: reducer<K2, V>,
181 inputs: [K1])
182 {
640886c Polymorphic MapReduce!
Eric Holk authored
183 let ctrl = port();
184
185 // This task becomes the master control task. It task::_spawns
186 // to do the rest.
187
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
188 let reducers = treemap::treemap();
189 let mut tasks = start_mappers(map, chan(ctrl), inputs);
190 let mut num_mappers = vec::len(inputs) as int;
640886c Polymorphic MapReduce!
Eric Holk authored
191
192 while num_mappers > 0 {
193 alt recv(ctrl) {
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
194 mapper_done {
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
195 // #error("received mapper terminated.");
640886c Polymorphic MapReduce!
Eric Holk authored
196 num_mappers -= 1;
197 }
198 find_reducer(k, cc) {
199 let c;
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
200 // log(error, "finding reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
201 alt treemap::find(reducers, k) {
202 some(_c) {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
203 // log(error,
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
204 // "reusing existing reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
205 c = _c;
206 }
04a2887 @catamorphism Remove '.' after nullary tags in patterns
catamorphism authored
207 none {
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
208 // log(error, "creating new reducer for " + k);
640886c Polymorphic MapReduce!
Eric Holk authored
209 let p = port();
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
210 let ch = chan(p);
640886c Polymorphic MapReduce!
Eric Holk authored
211 let r = reduce, kk = k;
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
212 tasks += [
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
213 spawn_joinable {|| reduce_task(r, kk, ch) }
a1ef79c @nikomatsakis update to use new spawn syntax
nikomatsakis authored
214 ];
640886c Polymorphic MapReduce!
Eric Holk authored
215 c = recv(p);
216 treemap::insert(reducers, k, c);
217 }
218 }
219 send(cc, c);
220 }
221 }
222 }
223
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
224 fn finish<K: copy send, V: copy send>(_k: K, v: chan<reduce_proto<V>>)
225 {
cfdf193 @marijnh Update our code to new type parameter kind syntax
marijnh authored
226 send(v, done);
227 }
640886c Polymorphic MapReduce!
Eric Holk authored
228 treemap::traverse(reducers, finish);
229
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
230 for tasks.each {|t| join(t); }
640886c Polymorphic MapReduce!
Eric Holk authored
231 }
232 }
233
5c49e4f @brson Reformat. Issue #855
brson authored
234 fn main(argv: [str]) {
640886c Polymorphic MapReduce!
Eric Holk authored
235 if vec::len(argv) < 2u {
236 let out = io::stdout();
237
5c49e4f @brson Reformat. Issue #855
brson authored
238 out.write_line(#fmt["Usage: %s <filename> ...", argv[0]]);
640886c Polymorphic MapReduce!
Eric Holk authored
239
240 // TODO: run something just to make sure the code hasn't
241 // broken yet. This is the unit test mode of this program.
242
243 ret;
244 }
245
246 let start = time::precise_time_ns();
247
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
248 map_reduce::map_reduce(map, reduce, vec::slice(argv, 1u, argv.len()));
640886c Polymorphic MapReduce!
Eric Holk authored
249 let stop = time::precise_time_ns();
250
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
251 let elapsed = (stop - start) / 1000000u64;
640886c Polymorphic MapReduce!
Eric Holk authored
252
f0dfbe7 @graydon Register new snapshots, purge log_err and log_full in favour of log(.…
graydon authored
253 log(error, "MapReduce completed in "
8b58095 @graydon Register snapshots and switch logging over to use of log_full or #err…
graydon authored
254 + u64::str(elapsed) + "ms");
640886c Polymorphic MapReduce!
Eric Holk authored
255 }
256
5c49e4f @brson Reformat. Issue #855
brson authored
257 fn read_word(r: io::reader) -> option<str> {
0c3a128 @eholk Update word-count-generic to latest syntax and un-xfail it. Closes #1…
eholk authored
258 let mut w = "";
640886c Polymorphic MapReduce!
Eric Holk authored
259
260 while !r.eof() {
261 let c = r.read_char();
262
263 if is_word_char(c) {
ab6bb03 @brson Rename std::istr to std::str. Issue #855
brson authored
264 w += str::from_char(c);
5c49e4f @brson Reformat. Issue #855
brson authored
265 } else { if w != "" { ret some(w); } }
640886c Polymorphic MapReduce!
Eric Holk authored
266 }
267 ret none;
268 }
df83a79 @eholk In generic word count, use str instead of [u8], and use built in is_a…
eholk authored
269 fn is_word_char(c: char) -> bool {
270 char::is_alphabetic(c) || char::is_digit(c) || c == '_' }
Something went wrong with that request. Please try again.