Skip to content

Commit

Permalink
benchmarks relating to SBT-allsome publication
Browse files Browse the repository at this point in the history
  • Loading branch information
rsharris committed Dec 2, 2016
1 parent 48b65f7 commit 2f02493
Show file tree
Hide file tree
Showing 397 changed files with 799,824 additions and 0 deletions.
196 changes: 196 additions & 0 deletions paper_benchmarks/all_benchmarks.sh
@@ -0,0 +1,196 @@
#!/bin/bash

special_thousand=0
special_gencode=0

for dataset in individual_queries/individual
#ten hundred
do


if [ $dataset == "ten" ]
then
lasti=9
else
if [ $dataset == "individual_queries/individual" ]
then
lasti=50
else
lasti=2
fi
fi


for i in `seq 0 $lasti`
do

if [ $special_thousand == "1" ]
then
query=thousand
else
if [ $special_gencode == "1" ]
then
query=gencode.v25.transcripts
else
query=$dataset.$i
fi
fi

echo "query: $query"


# query prep

cat /mnt/assemblage/sbt/$query.fa \
| grep -v "^>" \
> temp/$query.query

# original

do_original=1

if [ $do_original == "1" ]
then

bash drop_cache > /dev/null
echo "original SK"

/usr/bin/time ../src/bt query-original \
--query-threshold 0.9 \
/mnt/assemblage/sbt/compressedSBT/SBT_list.txt \
temp/$query.query \
dat/$query.unclust-rrr-original.dat \
> log/$query.unclust-original 2>&1

tail -n 2 log/$query.unclust-original |head -n 1
fi

# original + opt

do_original_opt=1

if [ $do_original_opt == "1" ]
then

bash drop_cache > /dev/null
echo "original SK + opt"

/usr/bin/time ../src/bt query-redux \
--query-threshold 0.9 \
/mnt/assemblage/sbt/compressedSBT/SBT_list.txt \
temp/$query.query \
dat/$query.unclust-rrr-original.dat \
> log/$query.unclust-original-opt 2>&1

tail -n 2 log/$query.unclust-original-opt |head -n 1
fi

# clust alone

do_clust_alone=1

if [ $do_clust_alone == "1" ]
then
bash drop_cache > /dev/null
echo "clust"

/usr/bin/time ../src/bt query-original \
--query-threshold 0.9 \
/mnt/assemblage/sbt/clusteringSBT.conversion/sbt-rrr.txt \
temp/$query.query \
dat/$query.clust-rrr-original.dat \
> log/$query.clust-original 2>&1

tail -n 2 log/$query.clust-original |head -n 1

fi

# old clust+allsome

#echo tableId = SBT+clust+allsome
#echo queryId = clust-rrr-split
#echo queryProcess = query-redux
#echo topologyFile = /gpfs/cyberstar/pzm11/backup/sbt/clusteringSBT.compressed/sbt-rrr-split.txt

#/usr/bin/time ../src/bt query-redux \
# --query-threshold 0.9 \
# /mnt/assemblage/sbt/clusteringSBT.compressed/sbt-rrr-split.txt \
# temp/$query.query \
# clust-rrr-split.$query.dat > log/$query.split 2>&1

#tail -n 2 log/$query.split | head -n 1

# original SK + allsome

do_original_allsome=1

if [ $do_original_allsome == "1" ]
then

bash drop_cache > /dev/null
echo "original+allsome"

/usr/bin/time ../src/bt query-redux \
--query-threshold 0.9 \
/mnt/assemblage/sbt/compressedSBT.allsome/sbt-rrr-allsome.txt \
temp/$query.query \
dat/$query.unclust-rrr-allsome.dat \
> log/$query.unclust-allsome 2>&1

tail -n 2 log/$query.unclust-allsome | head -n 1
fi

# original SK + allsplit-split(2 files)

do_original_allsome_split=1

if [ $do_original_allsome_split == "1" ]
then

bash drop_cache > /dev/null
echo "original+allsome-split"

/usr/bin/time ../src/bt query-redux \
--query-threshold 0.9 \
/mnt/assemblage/sbt/compressedSBT.reconstructed/sbt-rrr-split.txt \
temp/$query.query \
dat/$query.unclust-rrr-split.dat \
> log/$query.unclust-split 2>&1

tail -n 2 log/$query.unclust-split | head -n 1
fi



# new clust+allsome

do_clust_allsome=1

if [ $do_clust_allsome == "1" ]
then

bash drop_cache > /dev/null
echo "clust+allsome (1-file)"

/usr/bin/time ../src/bt query-redux \
--query-threshold 0.9 \
/mnt/assemblage/sbt/clusteringSBT.compressed/sbt-rrr-allsome.txt \
temp/$query.query \
dat/$query.clust-rrr-allsome.dat \
> log/$query.clust-allsome 2>&1

tail -n 2 log/$query.clust-allsome | head -n 1
fi




if [ $special_thousand == "1" ] || [ $special_gencode == "1" ]
then
exit 0
fi

done
done


5 changes: 5 additions & 0 deletions paper_benchmarks/check-correctness.sh
@@ -0,0 +1,5 @@
grep "^*" clust-rrr-original.ten.6.dat |sort > clust-rrr-original.ten.6.sequences.dat
grep "^*" clust-rrr-split.ten.6.dat |sort > clust-rrr-split.ten.6.sequences.dat
grep "^*" clust-rrr-allsome.ten.6.dat |sort > clust-rrr-allsome.ten.6.sequences.dat
diff clust-rrr-original.ten.6.sequences.dat clust-rrr-split.ten.6.sequences.dat
diff clust-rrr-split.ten.6.sequences.dat clust-rrr-allsome.ten.6.sequences.dat
1 change: 1 addition & 0 deletions paper_benchmarks/drop_cache
@@ -0,0 +1 @@
sudo bash -c "free && sync && echo 3 > /proc/sys/vm/drop_caches && free"
10 changes: 10 additions & 0 deletions paper_benchmarks/generate_individual.sh
@@ -0,0 +1,10 @@
#!/bin/sh
rm individual_queries/*.fa
seqtk sample gencode.v25.transcripts.fa 50 > individual_queries/50_queries.fa
cd individual_queries
split -l 2 50_queries.fa
num=0
for file in x*; do
mv "$file" "individual.$(printf "%u" $num).fa"
let num=$num+1
done

0 comments on commit 2f02493

Please sign in to comment.