Skip to content

Commit

Permalink
Merge pull request #27 from karel-brinda/output
Browse files Browse the repository at this point in the history
Improve cli, output, and tests
  • Loading branch information
karel-brinda committed Apr 9, 2024
2 parents ded070c + bd940a4 commit 1004637
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 45 deletions.
17 changes: 8 additions & 9 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,30 +67,29 @@ Command-line parameters
.. code-block::
$ attotree -h
Program: attotree (rapid estimation of phylogenetic trees using sketching)
Version: 0.1.5
Version: 0.1.6
Author: Karel Brinda <karel.brinda@inria.fr>
usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-f STR] [-L] [-D] [-V] genomes [genomes ...]
usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-m STR] [-d DIR] [-L] [-D] [-V] genome [genome ...]
positional arguments:
genomes input genome file (fasta / gzipped fasta / list of files when "-L")
genome input genome file(s) (fasta / gzipped fasta / list of files when "-L")
options:
-h show this help message and exit
-v show program's version number and exit
-k INT kmer size [21]
-s INT sketch size [10000]
-t INT number of threads [10]
-o FILE newick output [stdout]
-f STR tree inference algorithm (nj/upgma) [nj]
-t INT number of threads [#cores, 10]
-o FILE newick output [-]
-m STR tree construction method (nj/upgma) [nj]
-d DIR tmp dir [default system, /var/folders/z6...]
-L input files are list of files
-D debugging (don't remove tmp dir)
-V verbose output
Issues
------

Expand Down
52 changes: 32 additions & 20 deletions attotree/attotree.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
DEFAULT_S = 10000
DEFAULT_K = 21
DEFAULT_T = os.cpu_count()
DEFAULT_F = "nj"
DEFAULT_M = "nj"


def shorten_output(s):
Expand Down Expand Up @@ -267,13 +267,13 @@ def postprocess_quicktree_nw(nw_in_fn, nw_out_fn, verbose):
buffer.append(x)
s = "".join(buffer)
if nw_out_fn == "-":
print(s)
print(s, end="")
else:
with open(nw_out_fn, "w+") as fo:
print(s, file=fo)
fo.write(s)


def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug):
def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, tmp_dir, fof, verbose, debug):
"""
Generate a phylogenetic tree using the given parameters.
Expand All @@ -284,6 +284,7 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug):
s (int): Value for parameter s.
t (int): Value for parameter t.
phylogeny_algorithm (str): Name of the phylogeny algorithm to use.
tmp_dir (str): Temporary directory.
fof (bool): Flag indicating whether to use the fof parameter.
verbose (bool): Flag indicating whether to enable verbose output.
debug (bool): Flag indicating whether to retain auxiliary files.
Expand All @@ -302,7 +303,9 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug):
fmsg = ""
message(f"Attotree starting{fmsg}")

d = tempfile.mkdtemp()
message(str(tmp_dir))
d = tempfile.mkdtemp(dir=tmp_dir)
message(d)

message('Creating a temporary directory', d)
phylip1_fn = os.path.join(d, "distances.phylip0")
Expand All @@ -311,12 +314,13 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug):
newick2_fn = newick_fn
if fof:
#This is to make the list of file pass to Mash even with
#process substitutions
old_fof_fn = fns[0]
#process substitutions and allows for merging mutliple lists
new_fof_fn = os.path.join(d, "fof.txt")
with open(old_fof_fn) as f, open(new_fof_fn, 'w') as g:
g.write(f.read())
fns = [new_fof_fn]
with open(new_fof_fn, 'w') as g:
for old_fof_fn in fns:
with open(old_fof_fn) as f:
g.write(f.read().strip() + "\n")
fns = [new_fof_fn]
mash_triangle(fns, phylip1_fn, k=k, s=s, t=t, fof=fof, verbose=verbose)
postprocess_mash_phylip(phylip1_fn, phylip2_fn, verbose=verbose)
quicktree(phylip2_fn, newick1_fn, algorithm=phylogeny_algorithm, verbose=verbose)
Expand Down Expand Up @@ -408,7 +412,7 @@ def format_help(self):
metavar='INT',
dest='t',
default=DEFAULT_T,
help=f'number of threads [{DEFAULT_T}]',
help=f'number of threads [#cores, {DEFAULT_T}]',
)

parser.add_argument(
Expand All @@ -420,19 +424,27 @@ def format_help(self):
)

parser.add_argument(
'-f',
'-m',
metavar='STR',
dest='f',
default=DEFAULT_F,
dest='m',
default=DEFAULT_M,
choices=("nj", "upgma"),
help=f'tree inference algorithm (nj/upgma) [{DEFAULT_F}]',
help=f'tree construction method (nj/upgma) [{DEFAULT_M}]',
)

parser.add_argument(
'-d',
metavar='DIR',
dest='d',
default=None,
help=f'tmp dir [default system, {tempfile.gettempdir()[:15]+"..."}]',
)

parser.add_argument(
'-L',
action='store_true',
dest='L',
help=f'input files are list of files',
help=f'input files are list(s) of files',
)

parser.add_argument(
Expand All @@ -450,17 +462,17 @@ def format_help(self):
)

parser.add_argument(
'genomes',
'genome',
nargs="+",
help='input genome file (fasta / gzipped fasta / list of files when "-L")',
help='input genome file(s) (fasta / gzipped fasta / list of files when "-L")',
)

args = parser.parse_args()

#print(args)
attotree(
fns=args.genomes, k=args.k, s=args.s, t=args.t, newick_fn=args.o, phylogeny_algorithm=args.f, fof=args.L,
verbose=args.V, debug=args.D
fns=args.genome, k=args.k, s=args.s, t=args.t, newick_fn=args.o, phylogeny_algorithm=args.m, fof=args.L,
verbose=args.V, debug=args.D, tmp_dir=args.d
)

args = parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion attotree/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from __commit import *
except ImportError:
pass
VERSION = "0.1.5"
VERSION = "0.1.6"
63 changes: 49 additions & 14 deletions tests/02_simple_tree/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: all help clean cleanall view
.PHONY: all all1 all2 help clean cleanall view data

SHELL=/usr/bin/env bash -eo pipefail

Expand All @@ -8,53 +8,87 @@ SHELL=/usr/bin/env bash -eo pipefail

FASTAS=$(shell tar -tf test_spneumo.tar.xz)

###################
## Compare trees ##
###################
##########
## Main ##
##########
all: ## Run all tests
all: all1 all2

all: \
tree.precomputed.newick \
#w/diff
all1: \
tree.default.nw \
tree.default_local_tmp_dir.nw \
tree.default_verbose_debug.nw \
tree.file_list.nw \
tree.file_list_proc_subst.nw
tree.file_list_proc_subst.nw \
tree.file_list_proc_subst_two_lists.nw \
tree.default_o-param.nw
for x in $^; do \
2>&1 echo Testing "$$x"; \
diff $$x tree.precomputed.newick; \
done

###################################
# Different ways to compute trees #
###################################
#w/o diff
all2: \
tree.default_upgma.nw

data: ## Regenerate test data
data: tree.precomputed.newick file_list.txt


# Different ways to compute trees
tree.default.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py $(shell cat $<) > "$@.tmp"
mv "$@.tmp" "$@"

tree.default_upgma.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py $(shell cat $<) -m upgma> "$@.tmp"
mv "$@.tmp" "$@"

tree.default_local_tmp_dir.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py $(shell cat $<) -D -d tmp_dir > "$@.tmp"
mv "$@.tmp" "$@"

tree.default_o-param.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py $(shell cat $<) -o "$@.tmp"
mv "$@.tmp" "$@"

tree.default_verbose_debug.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py -V -D $(shell cat $<) > "$@.tmp"
mv "$@.tmp" "$@"

tree.file_list.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py -L "$<" > "$@.tmp"
mv "$@.tmp" "$@"

tree.file_list_proc_subst.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py -L <(head -n 99999 "$<") > "$@.tmp"
mv "$@.tmp" "$@"

tree.file_list_proc_subst_two_lists.nw: file_list.txt
@ >&2 printf "=====\\n$@\\n=====\\n"
../../attotree/attotree.py -L <(head -n 2 "$<") <(tail -n+3 "$<") > "$@.tmp"
mv "$@.tmp" "$@"

tree.precomputed.newick: tree.default.nw
cp $< $@

##############
# Input data #
##############

file_list.txt: test_spneumo.tar.xz
tar xvf $<
tar -tf $< > file_list.txt.tmp
mv file_list.txt.tmp file_list.txt


###############
## Auxiliary ##
###############
help: ## Print help messages
@echo -e "$$(grep -hE '^\S*(:.*)?##' $(MAKEFILE_LIST) \
| sed \
Expand All @@ -67,6 +101,7 @@ help: ## Print help messages

clean: ## Clean
rm -f *.nw *.tmp
rm -fr tmp_dir/*

cleanall: clean ## Clean all
rm -f file_list.txt *.fa
Expand Down
3 changes: 3 additions & 0 deletions tests/02_simple_tree/tmp_dir/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!.gitignore

2 changes: 1 addition & 1 deletion tests/02_simple_tree/tree.precomputed.newick
Original file line number Diff line number Diff line change
@@ -1 +1 @@
((203692:0.00537,001334:0.00473):0.00060,(302649:0.00543,101058:0.00531):0.00062,403790:0.01403);
((203692:0.00537,001334:0.00473):0.00060,(302649:0.00543,101058:0.00531):0.00062,403790:0.01403);

0 comments on commit 1004637

Please sign in to comment.