Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 210 lines (163 sloc) 5.743 kb
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
1 // xfail-pretty
2
3 // multi tasking k-nucleotide
4
5 use std;
f686896 @pcwalton test: "import" -> "use"
pcwalton authored
6 use std::map;
7 use std::map::hashmap;
8 use std::sort;
9 use io::ReaderUtil;
10 use pipes::{stream, Port, Chan};
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
11
12 // given a map, print a sorted version of it
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
13 fn sort_and_fmt(mm: hashmap<~[u8], uint>, total: uint) -> ~str {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
14 fn pct(xx: uint, yy: uint) -> float {
b355936 @brson Convert ret to return
brson authored
15 return (xx as float) * 100f / (yy as float);
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
16 }
17
97452c0 @nikomatsakis Remove modes from map API and replace with regions.
nikomatsakis authored
18 pure fn le_by_val<TT: copy, UU: copy>(kv0: &(TT,UU),
19 kv1: &(TT,UU)) -> bool {
20 let (_, v0) = *kv0;
21 let (_, v1) = *kv1;
b355936 @brson Convert ret to return
brson authored
22 return v0 >= v1;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
23 }
24
97452c0 @nikomatsakis Remove modes from map API and replace with regions.
nikomatsakis authored
25 pure fn le_by_key<TT: copy, UU: copy>(kv0: &(TT,UU),
26 kv1: &(TT,UU)) -> bool {
27 let (k0, _) = *kv0;
28 let (k1, _) = *kv1;
b355936 @brson Convert ret to return
brson authored
29 return k0 <= k1;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
30 }
31
32 // sort by key, then by value
33 fn sortKV<TT: copy, UU: copy>(orig: ~[(TT,UU)]) -> ~[(TT,UU)] {
b355936 @brson Convert ret to return
brson authored
34 return sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
35 }
36
37 let mut pairs = ~[];
38
39 // map -> [(k,%)]
40 mm.each(fn&(key: ~[u8], val: uint) -> bool {
41 vec::push(pairs, (key, pct(val, total)));
b355936 @brson Convert ret to return
brson authored
42 return true;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
43 });
44
45 let pairs_sorted = sortKV(pairs);
46
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
47 let mut buffer = ~"";
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
48
49 pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe {
50 let (k,v) = kv;
29f32b4 @paulstansifer `m1!{...}` -> `m1!(...)`
paulstansifer authored
51 buffer += (fmt!("%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v));
b355936 @brson Convert ret to return
brson authored
52 return true;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
53 });
54
b355936 @brson Convert ret to return
brson authored
55 return buffer;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
56 }
57
58 // given a map, search for the frequency of a pattern
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
59 fn find(mm: hashmap<~[u8], uint>, key: ~str) -> uint {
9f59131 @catamorphism Rename str::bytes to str::to_bytes
catamorphism authored
60 match mm.find(str::to_bytes(str::to_lower(key))) {
8337fa1 @brson Camel case the option type
brson authored
61 option::None => { return 0u; }
62 option::Some(num) => { return num; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
63 }
64 }
65
66 // given a map, increment the counter for a key
67 fn update_freq(mm: hashmap<~[u8], uint>, key: &[u8]) {
68 let key = vec::slice(key, 0, key.len());
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
69 match mm.find(key) {
8337fa1 @brson Camel case the option type
brson authored
70 option::None => { mm.insert(key, 1u ); }
71 option::Some(val) => { mm.insert(key, 1u + val); }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
72 }
73 }
74
75 // given a ~[u8], for each window call a function
76 // i.e., for "hello" and windows of size four,
77 // run it("hell") and it("ello"), then return "llo"
c918bd0 @eholk Fix borrow check errors in k-nucleotide.
eholk authored
78 fn windows_with_carry(bb: &[u8], nn: uint,
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
79 it: fn(window: &[u8])) -> ~[u8] {
80 let mut ii = 0u;
81
82 let len = vec::len(bb);
83 while ii < len - (nn - 1u) {
65beca4 @eholk Use iteration protocol for ebml, use vec::view in more places (issue #28...
eholk authored
84 it(vec::view(bb, ii, ii+nn));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
85 ii += 1u;
86 }
87
b355936 @brson Convert ret to return
brson authored
88 return vec::slice(bb, len - (nn - 1u), len);
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
89 }
90
cfbc7cb @brson Convert core::pipes to camel case
brson authored
91 fn make_sequence_processor(sz: uint, from_parent: pipes::Port<~[u8]>,
92 to_parent: pipes::Chan<~str>) {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
93
94 let freqs: hashmap<~[u8], uint> = map::bytes_hash();
95 let mut carry: ~[u8] = ~[];
96 let mut total: uint = 0u;
97
98 let mut line: ~[u8];
99
100 loop {
101
102 line = from_parent.recv();
103 if line == ~[] { break; }
104
105 carry = windows_with_carry(carry + line, sz, |window| {
106 update_freq(freqs, window);
107 total += 1u;
108 });
109 }
110
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
111 let buffer = match sz {
025d866 @brson Switch alts to use arrows
brson authored
112 1u => { sort_and_fmt(freqs, total) }
113 2u => { sort_and_fmt(freqs, total) }
29f32b4 @paulstansifer `m1!{...}` -> `m1!(...)`
paulstansifer authored
114 3u => { fmt!("%u\t%s", find(freqs, ~"GGT"), ~"GGT") }
115 4u => { fmt!("%u\t%s", find(freqs, ~"GGTA"), ~"GGTA") }
116 6u => { fmt!("%u\t%s", find(freqs, ~"GGTATT"), ~"GGTATT") }
117 12u => { fmt!("%u\t%s", find(freqs, ~"GGTATTTTAATT"), ~"GGTATTTTAATT") }
118 18u => { fmt!("%u\t%s", find(freqs, ~"GGTATTTTAATTTATAGT"), ~"GGTATTTTAATTTATAGT") }
025d866 @brson Switch alts to use arrows
brson authored
119 _ => { ~"" }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
120 };
121
29f32b4 @paulstansifer `m1!{...}` -> `m1!(...)`
paulstansifer authored
122 //comm::send(to_parent, fmt!("yay{%u}", sz));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
123 to_parent.send(buffer);
124 }
125
126 // given a FASTA file on stdin, process sequence THREE
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
127 fn main(args: ~[~str]) {
128 let rdr = if os::getenv(~"RUST_BENCH").is_some() {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
129 // FIXME: Using this compile-time env variable is a crummy way to
130 // get to this massive data set, but #include_bin chokes on it (#2598)
c284b8b @graydon Start using core::path2::Path in a lot of places.
graydon authored
131 let path = Path(env!("CFG_SRC_DIR"))
132 .push_rel(&Path("src/test/bench/shootout-k-nucleotide.data"));
133 result::get(io::file_reader(&path))
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
134 } else {
135 io::stdin()
136 };
137
138
139
140 // initialize each sequence sorter
141 let sizes = ~[1u,2u,3u,4u,6u,12u,18u];
8337fa1 @brson Camel case the option type
brson authored
142 let streams = vec::map(sizes, |_sz| Some(stream()));
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
143 let streams = vec::to_mut(streams);
144 let mut from_child = ~[];
145 let to_child = vec::mapi(sizes, |ii, sz| {
8337fa1 @brson Camel case the option type
brson authored
146 let mut stream = None;
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
147 stream <-> streams[ii];
148 let (to_parent_, from_child_) = option::unwrap(stream);
149
150 vec::push(from_child, from_child_);
151
22e955a @eholk Move streams into core.
eholk authored
152 let (to_child, from_parent) = pipes::stream();
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
153
154 do task::spawn_with(from_parent) |from_parent| {
155 make_sequence_processor(sz, from_parent, to_parent_);
156 };
157
158 to_child
159 });
160
161
162 // latch stores true after we've started
163 // reading the sequence of interest
164 let mut proc_mode = false;
165
166 while !rdr.eof() {
92743dc @msullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
167 let line: ~str = rdr.read_line();
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
168
c26d025 @graydon Switch 'cont' to 'again' everywhere. Close #2229.
graydon authored
169 if str::len(line) == 0u { again; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
170
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
171 match (line[0], proc_mode) {
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
172
173 // start processing if this is the one
025d866 @brson Switch alts to use arrows
brson authored
174 ('>' as u8, false) => {
ecaf9e3 @brson Convert alt to match. Stop parsing alt
brson authored
175 match str::find_str_from(line, ~"THREE", 1u) {
8337fa1 @brson Camel case the option type
brson authored
176 option::Some(_) => { proc_mode = true; }
177 option::None => { }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
178 }
179 }
180
181 // break our processing
025d866 @brson Switch alts to use arrows
brson authored
182 ('>' as u8, true) => { break; }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
183
184 // process the sequence for k-mers
025d866 @brson Switch alts to use arrows
brson authored
185 (_, true) => {
9f59131 @catamorphism Rename str::bytes to str::to_bytes
catamorphism authored
186 let line_bytes = str::to_bytes(line);
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
187
188 for sizes.eachi |ii, _sz| {
189 let mut lb = line_bytes;
190 to_child[ii].send(lb);
191 }
192 }
193
194 // whatever
025d866 @brson Switch alts to use arrows
brson authored
195 _ => { }
b925648 @eholk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
196 }
197 }
198
199 // finish...
200 for sizes.eachi |ii, _sz| {
201 to_child[ii].send(~[]);
202 }
203
204 // now fetch and print result messages
205 for sizes.eachi |ii, _sz| {
206 io::println(from_child[ii].recv());
207 }
208 }
209
Something went wrong with that request. Please try again.