Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 194 lines (151 sloc) 5.187 kB
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
1 // multi tasking k-nucleotide
2
3 import io::reader_util;
4
5 use std;
6 import std::map;
7 import std::map::hashmap;
8 import std::sort;
9
10 // given a map, print a sorted version of it
11 fn sort_and_fmt(mm: hashmap<[u8], uint>, total: uint) -> str {
12 fn pct(xx: uint, yy: uint) -> float {
13 ret (xx as float) * 100f / (yy as float);
14 }
15
16 fn le_by_val<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
17 let (_, v0) = kv0;
18 let (_, v1) = kv1;
19 ret v0 >= v1;
20 }
21
22 fn le_by_key<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
23 let (k0, _) = kv0;
24 let (k1, _) = kv1;
25 ret k0 <= k1;
26 }
27
28 // sort by key, then by value
29 fn sortKV<TT: copy, UU: copy>(orig: [(TT,UU)]) -> [(TT,UU)] {
30 ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
31 }
32
33 let mut pairs = [];
34
35 // map -> [(k,%)]
36 mm.each(fn&(key: [u8], val: uint) -> bool {
37 pairs += [(key, pct(val, total))];
38 ret true;
39 });
40
41 let pairs_sorted = sortKV(pairs);
42
43 let mut buffer = "";
44
45 pairs_sorted.each(fn&(kv: ([u8], float)) -> bool unsafe {
46 let (k,v) = kv;
47 buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]);
48 ret true;
49 });
50
51 ret buffer;
52 }
53
54 // given a map, search for the frequency of a pattern
55 fn find(mm: hashmap<[u8], uint>, key: str) -> uint {
56 alt mm.find(str::bytes(str::to_lower(key))) {
57 option::none { ret 0u; }
58 option::some(num) { ret num; }
59 }
60 }
61
62 // given a map, increment the counter for a key
63 fn update_freq(mm: hashmap<[u8], uint>, key: [u8]) {
64 alt mm.find(key) {
65 option::none { mm.insert(key, 1u ); }
66 option::some(val) { mm.insert(key, 1u + val); }
67 }
68 }
69
70 // given a [u8], for each window call a function
71 // i.e., for "hello" and windows of size four,
72 // run it("hell") and it("ello"), then return "llo"
73 fn windows_with_carry(bb: [const u8], nn: uint, it: fn(window: [u8])) -> [u8] {
74 let mut ii = 0u;
75
76 let len = vec::len(bb);
77 while ii < len - (nn - 1u) {
78 it(vec::slice(bb, ii, ii+nn));
79 ii += 1u;
80 }
81
82 ret vec::slice(bb, len - (nn - 1u), len);
83 }
84
85 fn make_sequence_processor(sz: uint, from_parent: comm::port<[u8]>, to_parent: comm::chan<str>) {
86
87 let freqs: hashmap<[u8], uint> = map::bytes_hash();
88 let mut carry: [u8] = [];
89 let mut total: uint = 0u;
90
91 let mut line: [u8];
92
93 loop {
94
95 line = comm::recv(from_parent);
96 if line == [] { break; }
97
98 carry = windows_with_carry(carry + line, sz, { |window|
99 update_freq(freqs, window);
100 total += 1u;
101 });
102 }
103
104 let buffer = alt sz {
105 1u { sort_and_fmt(freqs, total) }
106 2u { sort_and_fmt(freqs, total) }
107 3u { #fmt["%u\t%s", find(freqs, "GGT"), "GGT"] }
108 4u { #fmt["%u\t%s", find(freqs, "GGTA"), "GGTA"] }
109 6u { #fmt["%u\t%s", find(freqs, "GGTATT"), "GGTATT"] }
110 12u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATT"), "GGTATTTTAATT"] }
111 18u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATTTATAGT"), "GGTATTTTAATTTATAGT"] }
112 _ { "" }
113 };
114
115 //comm::send(to_parent, #fmt["yay{%u}", sz]);
116 comm::send(to_parent, buffer);
117 }
118
119 // given a FASTA file on stdin, process sequence THREE
120 fn main(args: [str]) {
121 let rdr = if os::getenv("RUST_BENCH").is_some() {
03e186c @brson bench: Modify shootout-k-nucleotide to not read from the filesystem
brson authored
122 // FIXME: Using this compile-time env variable is a crummy way to
123 // get to this massive data set, but #include_bin chokes on it
124 let path = path::connect(
125 #env("CFG_SRC_DIR"),
126 "src/test/bench/shootout-k-nucleotide.data"
127 );
128 result::get(io::file_reader(path))
c2a9cc9 @killerswan Add the Alioth k-nucleotide benchmark
killerswan authored
129 } else {
130 io::stdin()
131 };
132
133
134
135 // initialize each sequence sorter
136 let sizes = [1u,2u,3u,4u,6u,12u,18u];
137 let from_child = vec::map (sizes, { |_sz| comm::port() });
138 let to_parent = vec::mapi(sizes, { |ii, _sz| comm::chan(from_child[ii]) });
139 let to_child = vec::mapi(sizes, fn@(ii: uint, sz: uint) -> comm::chan<[u8]> {
140 ret task::spawn_listener { |from_parent|
141 make_sequence_processor(sz, from_parent, to_parent[ii]);
142 };
143 });
144
145
146 // latch stores true after we've started
147 // reading the sequence of interest
148 let mut proc_mode = false;
149
150 while !rdr.eof() {
151 let line: str = rdr.read_line();
152
153 if str::len(line) == 0u { cont; }
154
155 alt (line[0], proc_mode) {
156
157 // start processing if this is the one
158 ('>' as u8, false) {
159 alt str::find_str_from(line, "THREE", 1u) {
160 option::some(_) { proc_mode = true; }
161 option::none { }
162 }
163 }
164
165 // break our processing
166 ('>' as u8, true) { break; }
167
168 // process the sequence for k-mers
169 (_, true) {
170 let line_bytes = str::bytes(line);
171
172 for sizes.eachi { |ii, _sz|
173 let mut lb = line_bytes;
174 comm::send(to_child[ii], lb);
175 }
176 }
177
178 // whatever
179 _ { }
180 }
181 }
182
183 // finish...
184 for sizes.eachi { |ii, _sz|
185 comm::send(to_child[ii], []);
186 }
187
188 // now fetch and print result messages
189 for sizes.eachi { |ii, _sz|
190 io::println(comm::recv(from_child[ii]));
191 }
192 }
193
Something went wrong with that request. Please try again.