Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions src/test/bench/shootout-fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import vec;
import uint;
import int;
import str;
import io::writer_util;

fn LINE_LENGTH() -> uint { ret 60u; }

Expand Down Expand Up @@ -42,46 +43,53 @@ fn select_random(r: u32, genelist: [aminoacids]) -> char {
ret bisect(genelist, 0u, vec::len::<aminoacids>(genelist) - 1u, r);
}

fn make_random_fasta(id: str, desc: str, genelist: [aminoacids], n: int) {
log(debug, ">" + id + " " + desc);
fn make_random_fasta(wr: io::writer, id: str, desc: str, genelist: [aminoacids], n: int) {
wr.write_line(">" + id + " " + desc);
let rng = @{mut last: std::rand::rng().next()};
let mut op: str = "";
for uint::range(0u, n as uint) {|_i|
str::push_char(op, select_random(myrandom_next(rng, 100u32),
genelist));
if str::len(op) >= LINE_LENGTH() {
log(debug, op);
wr.write_line(op);
op = "";
}
}
if str::len(op) > 0u { log(debug, op); }
if str::len(op) > 0u { wr.write_line(op); }
}

fn make_repeat_fasta(id: str, desc: str, s: str, n: int) unsafe {
log(debug, ">" + id + " " + desc);
fn make_repeat_fasta(wr: io::writer, id: str, desc: str, s: str, n: int) unsafe {
wr.write_line(">" + id + " " + desc);
let mut op: str = "";
let sl: uint = str::len(s);
for uint::range(0u, n as uint) {|i|
str::unsafe::push_byte(op, s[i % sl]);
if str::len(op) >= LINE_LENGTH() {
log(debug, op);
wr.write_line(op);
op = "";
}
}
if str::len(op) > 0u { log(debug, op); }
if str::len(op) > 0u { wr.write_line(op); }
}

fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; }

fn main(args: [str]) {
let args = if os::getenv("RUST_BENCH").is_some() {
// alioth tests k-nucleotide with this data at 25,000,000
["", "300000"]
} else if args.len() <= 1u {
["", "1000"]
} else {
args
};

let writer = if os::getenv("RUST_BENCH").is_some() {
result::get(io::file_writer("./shootout-fasta.data", [io::truncate, io::create]))
} else {
io::stdout()
};

let n = int::from_str(args[1]).get();

let iub: [aminoacids] =
Expand All @@ -101,7 +109,8 @@ fn main(args: [str]) {
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" +
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" +
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
make_repeat_fasta("ONE", "Homo sapiens alu", alu, n * 2);
make_random_fasta("TWO", "IUB ambiguity codes", iub, n * 3);
make_random_fasta("THREE", "Homo sapiens frequency", homosapiens, n * 5);
make_repeat_fasta(writer, "ONE", "Homo sapiens alu", alu, n * 2);
make_random_fasta(writer, "TWO", "IUB ambiguity codes", iub, n * 3);
make_random_fasta(writer, "THREE",
"Homo sapiens frequency", homosapiens, n * 5);
}
187 changes: 187 additions & 0 deletions src/test/bench/shootout-k-nucleotide.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
// multi tasking k-nucleotide

import io::reader_util;

use std;
import std::map;
import std::map::hashmap;
import std::sort;

// given a map, print a sorted version of it
fn sort_and_fmt(mm: hashmap<[u8], uint>, total: uint) -> str {
fn pct(xx: uint, yy: uint) -> float {
ret (xx as float) * 100f / (yy as float);
}

fn le_by_val<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
let (_, v0) = kv0;
let (_, v1) = kv1;
ret v0 >= v1;
}

fn le_by_key<TT: copy, UU: copy>(kv0: (TT,UU), kv1: (TT,UU)) -> bool {
let (k0, _) = kv0;
let (k1, _) = kv1;
ret k0 <= k1;
}

// sort by key, then by value
fn sortKV<TT: copy, UU: copy>(orig: [(TT,UU)]) -> [(TT,UU)] {
ret sort::merge_sort(le_by_val, sort::merge_sort(le_by_key, orig));
}

let mut pairs = [];

// map -> [(k,%)]
mm.each(fn&(key: [u8], val: uint) -> bool {
pairs += [(key, pct(val, total))];
ret true;
});

let pairs_sorted = sortKV(pairs);

let mut buffer = "";

pairs_sorted.each(fn&(kv: ([u8], float)) -> bool unsafe {
let (k,v) = kv;
buffer += (#fmt["%s %0.3f\n", str::to_upper(str::unsafe::from_bytes(k)), v]);
ret true;
});

ret buffer;
}

// given a map, search for the frequency of a pattern
fn find(mm: hashmap<[u8], uint>, key: str) -> uint {
alt mm.find(str::bytes(str::to_lower(key))) {
option::none { ret 0u; }
option::some(num) { ret num; }
}
}

// given a map, increment the counter for a key
fn update_freq(mm: hashmap<[u8], uint>, key: [u8]) {
alt mm.find(key) {
option::none { mm.insert(key, 1u ); }
option::some(val) { mm.insert(key, 1u + val); }
}
}

// given a [u8], for each window call a function
// i.e., for "hello" and windows of size four,
// run it("hell") and it("ello"), then return "llo"
fn windows_with_carry(bb: [const u8], nn: uint, it: fn(window: [u8])) -> [u8] {
let mut ii = 0u;

let len = vec::len(bb);
while ii < len - (nn - 1u) {
it(vec::slice(bb, ii, ii+nn));
ii += 1u;
}

ret vec::slice(bb, len - (nn - 1u), len);
}

fn make_sequence_processor(sz: uint, from_parent: comm::port<[u8]>, to_parent: comm::chan<str>) {

let freqs: hashmap<[u8], uint> = map::bytes_hash();
let mut carry: [u8] = [];
let mut total: uint = 0u;

let mut line: [u8];

loop {

line = comm::recv(from_parent);
if line == [] { break; }

carry = windows_with_carry(carry + line, sz, { |window|
update_freq(freqs, window);
total += 1u;
});
}

let buffer = alt sz {
1u { sort_and_fmt(freqs, total) }
2u { sort_and_fmt(freqs, total) }
3u { #fmt["%u\t%s", find(freqs, "GGT"), "GGT"] }
4u { #fmt["%u\t%s", find(freqs, "GGTA"), "GGTA"] }
6u { #fmt["%u\t%s", find(freqs, "GGTATT"), "GGTATT"] }
12u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATT"), "GGTATTTTAATT"] }
18u { #fmt["%u\t%s", find(freqs, "GGTATTTTAATTTATAGT"), "GGTATTTTAATTTATAGT"] }
_ { "" }
};

//comm::send(to_parent, #fmt["yay{%u}", sz]);
comm::send(to_parent, buffer);
}

// given a FASTA file on stdin, process sequence THREE
fn main(args: [str]) {
let rdr = if os::getenv("RUST_BENCH").is_some() {
result::get(io::file_reader("./shootout-fasta.data"))
} else {
io::stdin()
};



// initialize each sequence sorter
let sizes = [1u,2u,3u,4u,6u,12u,18u];
let from_child = vec::map (sizes, { |_sz| comm::port() });
let to_parent = vec::mapi(sizes, { |ii, _sz| comm::chan(from_child[ii]) });
let to_child = vec::mapi(sizes, fn@(ii: uint, sz: uint) -> comm::chan<[u8]> {
ret task::spawn_listener { |from_parent|
make_sequence_processor(sz, from_parent, to_parent[ii]);
};
});


// latch stores true after we've started
// reading the sequence of interest
let mut proc_mode = false;

while !rdr.eof() {
let line: str = rdr.read_line();

if str::len(line) == 0u { cont; }

alt (line[0], proc_mode) {

// start processing if this is the one
('>' as u8, false) {
alt str::find_str_from(line, "THREE", 1u) {
option::some(_) { proc_mode = true; }
option::none { }
}
}

// break our processing
('>' as u8, true) { break; }

// process the sequence for k-mers
(_, true) {
let line_bytes = str::bytes(line);

for sizes.eachi { |ii, _sz|
let mut lb = line_bytes;
comm::send(to_child[ii], lb);
}
}

// whatever
_ { }
}
}

// finish...
for sizes.eachi { |ii, _sz|
comm::send(to_child[ii], []);
}

// now fetch and print result messages
for sizes.eachi { |ii, _sz|
io::println(comm::recv(from_child[ii]));
}
}