Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 214 lines (165 sloc) 5.761 kb
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
1 // xfail-pretty
2
3 // multi tasking k-nucleotide
4
5 import io::reader_util;
6
7 use std;
8 import std::map;
9 import std::map::hashmap;
10 import std::sort;
11
22e955a Eric Holk Move streams into core.
eholk authored
12 import pipes::{stream, port, chan};
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
13
14 // given a map, print a sorted version of it
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
15 fn sort_and_fmt(mm: hashmap<~[u8], uint>, total: uint) -> ~str {
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
16 fn pct(xx: uint, yy: uint) -> float {
b355936 Brian Anderson Convert ret to return
brson authored
17 return (xx as float) * 100f / (yy as float);
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
18 }
19
97452c0 Niko Matsakis Remove modes from map API and replace with regions.
nikomatsakis authored
20 pure fn le_by_val<TT: copy, UU: copy>(kv0: &(TT,UU),
21 kv1: &(TT,UU)) -> bool {
22 let (_, v0) = *kv0;
23 let (_, v1) = *kv1;
b355936 Brian Anderson Convert ret to return
brson authored
24 return v0 >= v1;
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
25 }
26
97452c0 Niko Matsakis Remove modes from map API and replace with regions.
nikomatsakis authored
27 pure fn le_by_key<TT: copy, UU: copy>(kv0: &(TT,UU),
28 kv1: &(TT,UU)) -> bool {
29 let (k0, _) = *kv0;
30 let (k1, _) = *kv1;
b355936 Brian Anderson Convert ret to return
brson authored
31 return k0 <= k1;
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
32 }
33
34 // sort by key, then by value
35 fn sortKV<TT: copy, UU: copy>(orig: ~[(TT,UU)]) -> ~[(TT,UU)] {
b355936 Brian Anderson Convert ret to return
brson authored
36 return sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
37 }
38
39 let mut pairs = ~[];
40
41 // map -> [(k,%)]
42 mm.each(fn&(key: ~[u8], val: uint) -> bool {
43 vec::push(pairs, (key, pct(val, total)));
b355936 Brian Anderson Convert ret to return
brson authored
44 return true;
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
45 });
46
47 let pairs_sorted = sortKV(pairs);
48
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
49 let mut buffer = ~"";
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
50
51 pairs_sorted.each(fn&(kv: (~[u8], float)) -> bool unsafe {
52 let (k,v) = kv;
a9cc506 Paul Stansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
53 buffer += (fmt!{"%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v});
b355936 Brian Anderson Convert ret to return
brson authored
54 return true;
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
55 });
56
b355936 Brian Anderson Convert ret to return
brson authored
57 return buffer;
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
58 }
59
60 // given a map, search for the frequency of a pattern
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
61 fn find(mm: hashmap<~[u8], uint>, key: ~str) -> uint {
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
62 alt mm.find(str::bytes(str::to_lower(key))) {
025d866 Brian Anderson Switch alts to use arrows
brson authored
63 option::none => { return 0u; }
64 option::some(num) => { return num; }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
65 }
66 }
67
68 // given a map, increment the counter for a key
69 fn update_freq(mm: hashmap<~[u8], uint>, key: &[u8]) {
70 let key = vec::slice(key, 0, key.len());
71 alt mm.find(key) {
025d866 Brian Anderson Switch alts to use arrows
brson authored
72 option::none => { mm.insert(key, 1u ); }
73 option::some(val) => { mm.insert(key, 1u + val); }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
74 }
75 }
76
77 // given a ~[u8], for each window call a function
78 // i.e., for "hello" and windows of size four,
79 // run it("hell") and it("ello"), then return "llo"
c918bd0 Eric Holk Fix borrow check errors in k-nucleotide.
eholk authored
80 fn windows_with_carry(bb: &[u8], nn: uint,
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
81 it: fn(window: &[u8])) -> ~[u8] {
82 let mut ii = 0u;
83
84 let len = vec::len(bb);
85 while ii < len - (nn - 1u) {
65beca4 Eric Holk Use iteration protocol for ebml, use vec::view in more places (issue #28...
eholk authored
86 it(vec::view(bb, ii, ii+nn));
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
87 ii += 1u;
88 }
89
b355936 Brian Anderson Convert ret to return
brson authored
90 return vec::slice(bb, len - (nn - 1u), len);
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
91 }
92
22e955a Eric Holk Move streams into core.
eholk authored
93 fn make_sequence_processor(sz: uint, from_parent: pipes::port<~[u8]>,
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
94 to_parent: pipes::chan<~str>) {
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
95
96 let freqs: hashmap<~[u8], uint> = map::bytes_hash();
97 let mut carry: ~[u8] = ~[];
98 let mut total: uint = 0u;
99
100 let mut line: ~[u8];
101
102 loop {
103
104 line = from_parent.recv();
105 if line == ~[] { break; }
106
107 carry = windows_with_carry(carry + line, sz, |window| {
108 update_freq(freqs, window);
109 total += 1u;
110 });
111 }
112
113 let buffer = alt sz {
025d866 Brian Anderson Switch alts to use arrows
brson authored
114 1u => { sort_and_fmt(freqs, total) }
115 2u => { sort_and_fmt(freqs, total) }
116 3u => { fmt!{"%u\t%s", find(freqs, ~"GGT"), ~"GGT"} }
117 4u => { fmt!{"%u\t%s", find(freqs, ~"GGTA"), ~"GGTA"} }
118 6u => { fmt!{"%u\t%s", find(freqs, ~"GGTATT"), ~"GGTATT"} }
119 12u => { fmt!{"%u\t%s", find(freqs, ~"GGTATTTTAATT"), ~"GGTATTTTAATT"} }
120 18u => { fmt!{"%u\t%s", find(freqs, ~"GGTATTTTAATTTATAGT"), ~"GGTATTTTAATTTATAGT"} }
121 _ => { ~"" }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
122 };
123
a9cc506 Paul Stansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
124 //comm::send(to_parent, fmt!{"yay{%u}", sz});
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
125 to_parent.send(buffer);
126 }
127
128 // given a FASTA file on stdin, process sequence THREE
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
129 fn main(args: ~[~str]) {
130 let rdr = if os::getenv(~"RUST_BENCH").is_some() {
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
131 // FIXME: Using this compile-time env variable is a crummy way to
132 // get to this massive data set, but #include_bin chokes on it (#2598)
133 let path = path::connect(
a9cc506 Paul Stansifer Change syntax extension syntax: `#m[...]` -> `m!{...}`.
paulstansifer authored
134 env!{"CFG_SRC_DIR"},
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
135 ~"src/test/bench/shootout-k-nucleotide.data"
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
136 );
137 result::get(io::file_reader(path))
138 } else {
139 io::stdin()
140 };
141
142
143
144 // initialize each sequence sorter
145 let sizes = ~[1u,2u,3u,4u,6u,12u,18u];
146 let streams = vec::map(sizes, |_sz| some(stream()));
147 let streams = vec::to_mut(streams);
148 let mut from_child = ~[];
149 let to_child = vec::mapi(sizes, |ii, sz| {
150 let mut stream = none;
151 stream <-> streams[ii];
152 let (to_parent_, from_child_) = option::unwrap(stream);
153
154 vec::push(from_child, from_child_);
155
22e955a Eric Holk Move streams into core.
eholk authored
156 let (to_child, from_parent) = pipes::stream();
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
157
158 do task::spawn_with(from_parent) |from_parent| {
159 make_sequence_processor(sz, from_parent, to_parent_);
160 };
161
162 to_child
163 });
164
165
166 // latch stores true after we've started
167 // reading the sequence of interest
168 let mut proc_mode = false;
169
170 while !rdr.eof() {
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
171 let line: ~str = rdr.read_line();
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
172
c26d025 Graydon Hoare Switch 'cont' to 'again' everywhere. Close #2229.
graydon authored
173 if str::len(line) == 0u { again; }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
174
175 alt (line[0], proc_mode) {
176
177 // start processing if this is the one
025d866 Brian Anderson Switch alts to use arrows
brson authored
178 ('>' as u8, false) => {
92743dc Michael J. Sullivan Move the world over to using the new style string literals and types. Cl...
msullivan authored
179 alt str::find_str_from(line, ~"THREE", 1u) {
025d866 Brian Anderson Switch alts to use arrows
brson authored
180 option::some(_) => { proc_mode = true; }
181 option::none => { }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
182 }
183 }
184
185 // break our processing
025d866 Brian Anderson Switch alts to use arrows
brson authored
186 ('>' as u8, true) => { break; }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
187
188 // process the sequence for k-mers
025d866 Brian Anderson Switch alts to use arrows
brson authored
189 (_, true) => {
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
190 let line_bytes = str::bytes(line);
191
192 for sizes.eachi |ii, _sz| {
193 let mut lb = line_bytes;
194 to_child[ii].send(lb);
195 }
196 }
197
198 // whatever
025d866 Brian Anderson Switch alts to use arrows
brson authored
199 _ => { }
b925648 Eric Holk Added a k-nucleotide version that uses pipes. 31% speedup.
eholk authored
200 }
201 }
202
203 // finish...
204 for sizes.eachi |ii, _sz| {
205 to_child[ii].send(~[]);
206 }
207
208 // now fetch and print result messages
209 for sizes.eachi |ii, _sz| {
210 io::println(from_child[ii].recv());
211 }
212 }
213
Something went wrong with that request. Please try again.