Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 214 lines (165 sloc) 5.771 kb
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
1 // xfail-pretty
2
3 // multi tasking k-nucleotide
4
5 import io::reader_util;
6
7 use std;
8 import std::map;
9 import std::map::hashmap;
10 import std::sort;
11
22e955a @eholk Move streams into core.
eholk authored
12 import pipes::{stream, port, chan};
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
13
14 // given a map, print a sorted version of it
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
15 fn sort_and_fmt(mm: hashmap<~[u8], uint>, total: uint) -> ~str {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
16 fn pct(xx: uint, yy: uint) -> float {
b355936 @brson Convert ret to return
brson authored
17 return (xx as float) * 100f / (yy as float);
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
18 }
19
97452c0 @nikomatsakis Remove modes from map API and replace with regions.
nikomatsakis authored
20 pure fn le_by_val<TT: copy, UU: copy>(kv0: &(TT,UU),
21 kv1: &(TT,UU)) -> bool {
22 let (_, v0) = *kv0;
23 let (_, v1) = *kv1;
b355936 @brson Convert ret to return
brson authored
24 return v0 >= v1;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
25 }
26
97452c0 @nikomatsakis Remove modes from map API and replace with regions.
nikomatsakis authored
27 pure fn le_by_key<TT: copy, UU: copy>(kv0: &(TT,UU),
28 kv1: &(TT,UU)) -> bool {
29 let (k0, _) = *kv0;
30 let (k1, _) = *kv1;
b355936 @brson Convert ret to return
brson authored
31 return k0 <= k1;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
32 }
33
34 // sort by key, then by value
35 fn sortKV<TT: copy, UU: copy>(orig: ~[(TT,UU)]) -> ~[(TT,UU)] {
b355936 @brson Convert ret to return
brson authored
36 return sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
37 }
38
39 let mut pairs = ~[];
40
41 // map -> [(k,%)]
42 mm.each(fn&(key: ~[u8], val: uint) -> bool {
43 vec::push(pairs, (key, pct(val, total)));
b355936 @brson Convert ret to return
brson authored
44 return true;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
45 });
46
47 let pairs_sorted = sortKV(pairs);
48
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
49 let mut buffer = ~"";
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
50
51 pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe {
52 let (k,v) = kv;
a9cc506 @paulstansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
53 buffer += (fmt!{"%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v});
b355936 @brson Convert ret to return
brson authored
54 return true;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
55 });
56
b355936 @brson Convert ret to return
brson authored
57 return buffer;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
58 }
59
60 // given a map, search for the frequency of a pattern
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
61 fn find(mm: hashmap<~[u8], uint>, key: ~str) -> uint {
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
62 match mm.find(str::bytes(str::to_lower(key))) {
025d866 @brson Switch alts to use arrows
brson authored
63 option::none => { return 0u; }
64 option::some(num) => { return num; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
65 }
66 }
67
68 // given a map, increment the counter for a key
69 fn update_freq(mm: hashmap<~[u8], uint>, key: &[u8]) {
70 let key = vec::slice(key, 0, key.len());
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
71 match mm.find(key) {
025d866 @brson Switch alts to use arrows
brson authored
72 option::none => { mm.insert(key, 1u ); }
73 option::some(val) => { mm.insert(key, 1u + val); }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
74 }
75 }
76
77 // given a ~[u8], for each window call a function
78 // i.e., for "hello" and windows of size four,
79 // run it("hell") and it("ello"), then return "llo"
c918bd0 @eholk Fix borrow check errors in k-nucleotide.
eholk authored
80 fn windows_with_carry(bb: &[u8], nn: uint,
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
81 it: fn(window: &[u8])) -> ~[u8] {
82 let mut ii = 0u;
83
84 let len = vec::len(bb);
85 while ii < len - (nn - 1u) {
65beca4 @eholk Use iteration protocol for ebml, use vec::view in more places (issue …
eholk authored
86 it(vec::view(bb, ii, ii+nn));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
87 ii += 1u;
88 }
89
b355936 @brson Convert ret to return
brson authored
90 return vec::slice(bb, len - (nn - 1u), len);
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
91 }
92
22e955a @eholk Move streams into core.
eholk authored
93 fn make_sequence_processor(sz: uint, from_parent: pipes::port<~[u8]>,
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
94 to_parent: pipes::chan<~str>) {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
95
96 let freqs: hashmap<~[u8], uint> = map::bytes_hash();
97 let mut carry: ~[u8] = ~[];
98 let mut total: uint = 0u;
99
100 let mut line: ~[u8];
101
102 loop {
103
104 line = from_parent.recv();
105 if line == ~[] { break; }
106
107 carry = windows_with_carry(carry + line, sz, |window| {
108 update_freq(freqs, window);
109 total += 1u;
110 });
111 }
112
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
113 let buffer = match sz {
025d866 @brson Switch alts to use arrows
brson authored
114 1u => { sort_and_fmt(freqs, total) }
115 2u => { sort_and_fmt(freqs, total) }
116 3u => { fmt!{"%u\t%s", find(freqs, ~"GGT"), ~"GGT"} }
117 4u => { fmt!{"%u\t%s", find(freqs, ~"GGTA"), ~"GGTA"} }
118 6u => { fmt!{"%u\t%s", find(freqs, ~"GGTATT"), ~"GGTATT"} }
119 12u => { fmt!{"%u\t%s", find(freqs, ~"GGTATTTTAATT"), ~"GGTATTTTAATT"} }
120 18u => { fmt!{"%u\t%s", find(freqs, ~"GGTATTTTAATTTATAGT"), ~"GGTATTTTAATTTATAGT"} }
121 _ => { ~"" }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
122 };
123
a9cc506 @paulstansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
124 //comm::send(to_parent, fmt!{"yay{%u}", sz});
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
125 to_parent.send(buffer);
126 }
127
128 // given a FASTA file on stdin, process sequence THREE
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
129 fn main(args: ~[~str]) {
130 let rdr = if os::getenv(~"RUST_BENCH").is_some() {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
131 // FIXME: Using this compile-time env variable is a crummy way to
132 // get to this massive data set, but #include_bin chokes on it (#2598)
133 let path = path::connect(
a9cc506 @paulstansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
134 env!{"CFG_SRC_DIR"},
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
135 ~"src/test/bench/shootout-k-nucleotide.data"
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
136 );
137 result::get(io::file_reader(path))
138 } else {
139 io::stdin()
140 };
141
142
143
144 // initialize each sequence sorter
145 let sizes = ~[1u,2u,3u,4u,6u,12u,18u];
146 let streams = vec::map(sizes, |_sz| some(stream()));
147 let streams = vec::to_mut(streams);
148 let mut from_child = ~[];
149 let to_child = vec::mapi(sizes, |ii, sz| {
150 let mut stream = none;
151 stream <-> streams[ii];
152 let (to_parent_, from_child_) = option::unwrap(stream);
153
154 vec::push(from_child, from_child_);
155
22e955a @eholk Move streams into core.
eholk authored
156 let (to_child, from_parent) = pipes::stream();
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
157
158 do task::spawn_with(from_parent) |from_parent| {
159 make_sequence_processor(sz, from_parent, to_parent_);
160 };
161
162 to_child
163 });
164
165
166 // latch stores true after we've started
167 // reading the sequence of interest
168 let mut proc_mode = false;
169
170 while !rdr.eof() {
92743dc @msullivan Move the world over to using the new style string literals and types.…
msullivan authored
171 let line: ~str = rdr.read_line();
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
172
c26d025 @graydon Switch 'cont' to 'again' everywhere. Close #2229.
graydon authored
173 if str::len(line) == 0u { again; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
174
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
175 match (line[0], proc_mode) {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
176
177 // start processing if this is the one
025d866 @brson Switch alts to use arrows
brson authored
178 ('>' as u8, false) => {
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
179 match str::find_str_from(line, ~"THREE", 1u) {
025d866 @brson Switch alts to use arrows
brson authored
180 option::some(_) => { proc_mode = true; }
181 option::none => { }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
182 }
183 }
184
185 // break our processing
025d866 @brson Switch alts to use arrows
brson authored
186 ('>' as u8, true) => { break; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
187
188 // process the sequence for k-mers
025d866 @brson Switch alts to use arrows
brson authored
189 (_, true) => {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
190 let line_bytes = str::bytes(line);
191
192 for sizes.eachi |ii, _sz| {
193 let mut lb = line_bytes;
194 to_child[ii].send(lb);
195 }
196 }
197
198 // whatever
025d866 @brson Switch alts to use arrows
brson authored
199 _ => { }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
200 }
201 }
202
203 // finish...
204 for sizes.eachi |ii, _sz| {
205 to_child[ii].send(~[]);
206 }
207
208 // now fetch and print result messages
209 for sizes.eachi |ii, _sz| {
210 io::println(from_child[ii].recv());
211 }
212 }
213
Something went wrong with that request. Please try again.