diff --git a/README.rst b/README.rst index f9d5659..a046591 100644 --- a/README.rst +++ b/README.rst @@ -67,30 +67,29 @@ Command-line parameters .. code-block:: $ attotree -h - + Program: attotree (rapid estimation of phylogenetic trees using sketching) - Version: 0.1.5 + Version: 0.1.6 Author: Karel Brinda - usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-f STR] [-L] [-D] [-V] genomes [genomes ...] + usage: attotree [-k INT] [-s INT] [-t INT] [-o FILE] [-m STR] [-d DIR] [-L] [-D] [-V] genome [genome ...] positional arguments: - genomes input genome file (fasta / gzipped fasta / list of files when "-L") + genome input genome file(s) (fasta / gzipped fasta / list of files when "-L") options: -h show this help message and exit -v show program's version number and exit -k INT kmer size [21] -s INT sketch size [10000] - -t INT number of threads [10] - -o FILE newick output [stdout] - -f STR tree inference algorithm (nj/upgma) [nj] + -t INT number of threads [#cores, 10] + -o FILE newick output [-] + -m STR tree construction method (nj/upgma) [nj] + -d DIR tmp dir [default system, /var/folders/z6...] -L input files are list of files -D debugging (don't remove tmp dir) -V verbose output - - Issues ------ diff --git a/attotree/attotree.py b/attotree/attotree.py index a49bd5f..8e26558 100755 --- a/attotree/attotree.py +++ b/attotree/attotree.py @@ -26,7 +26,7 @@ DEFAULT_S = 10000 DEFAULT_K = 21 DEFAULT_T = os.cpu_count() -DEFAULT_F = "nj" +DEFAULT_M = "nj" def shorten_output(s): @@ -267,13 +267,13 @@ def postprocess_quicktree_nw(nw_in_fn, nw_out_fn, verbose): buffer.append(x) s = "".join(buffer) if nw_out_fn == "-": - print(s) + print(s, end="") else: with open(nw_out_fn, "w+") as fo: - print(s, file=fo) + fo.write(s) -def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug): +def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, tmp_dir, fof, verbose, debug): """ Generate a phylogenetic tree using the given parameters. @@ -284,6 +284,7 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug): s (int): Value for parameter s. t (int): Value for parameter t. phylogeny_algorithm (str): Name of the phylogeny algorithm to use. + tmp_dir (str): Temporary directory. fof (bool): Flag indicating whether to use the fof parameter. verbose (bool): Flag indicating whether to enable verbose output. debug (bool): Flag indicating whether to retain auxiliary files. @@ -302,7 +303,9 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug): fmsg = "" message(f"Attotree starting{fmsg}") - d = tempfile.mkdtemp() + message(str(tmp_dir)) + d = tempfile.mkdtemp(dir=tmp_dir) + message(d) message('Creating a temporary directory', d) phylip1_fn = os.path.join(d, "distances.phylip0") @@ -311,12 +314,13 @@ def attotree(fns, newick_fn, k, s, t, phylogeny_algorithm, fof, verbose, debug): newick2_fn = newick_fn if fof: #This is to make the list of file pass to Mash even with - #process substitutions - old_fof_fn = fns[0] + #process substitutions and allows for merging mutliple lists new_fof_fn = os.path.join(d, "fof.txt") - with open(old_fof_fn) as f, open(new_fof_fn, 'w') as g: - g.write(f.read()) - fns = [new_fof_fn] + with open(new_fof_fn, 'w') as g: + for old_fof_fn in fns: + with open(old_fof_fn) as f: + g.write(f.read().strip() + "\n") + fns = [new_fof_fn] mash_triangle(fns, phylip1_fn, k=k, s=s, t=t, fof=fof, verbose=verbose) postprocess_mash_phylip(phylip1_fn, phylip2_fn, verbose=verbose) quicktree(phylip2_fn, newick1_fn, algorithm=phylogeny_algorithm, verbose=verbose) @@ -408,7 +412,7 @@ def format_help(self): metavar='INT', dest='t', default=DEFAULT_T, - help=f'number of threads [{DEFAULT_T}]', + help=f'number of threads [#cores, {DEFAULT_T}]', ) parser.add_argument( @@ -420,19 +424,27 @@ def format_help(self): ) parser.add_argument( - '-f', + '-m', metavar='STR', - dest='f', - default=DEFAULT_F, + dest='m', + default=DEFAULT_M, choices=("nj", "upgma"), - help=f'tree inference algorithm (nj/upgma) [{DEFAULT_F}]', + help=f'tree construction method (nj/upgma) [{DEFAULT_M}]', + ) + + parser.add_argument( + '-d', + metavar='DIR', + dest='d', + default=None, + help=f'tmp dir [default system, {tempfile.gettempdir()[:15]+"..."}]', ) parser.add_argument( '-L', action='store_true', dest='L', - help=f'input files are list of files', + help=f'input files are list(s) of files', ) parser.add_argument( @@ -450,17 +462,17 @@ def format_help(self): ) parser.add_argument( - 'genomes', + 'genome', nargs="+", - help='input genome file (fasta / gzipped fasta / list of files when "-L")', + help='input genome file(s) (fasta / gzipped fasta / list of files when "-L")', ) args = parser.parse_args() #print(args) attotree( - fns=args.genomes, k=args.k, s=args.s, t=args.t, newick_fn=args.o, phylogeny_algorithm=args.f, fof=args.L, - verbose=args.V, debug=args.D + fns=args.genome, k=args.k, s=args.s, t=args.t, newick_fn=args.o, phylogeny_algorithm=args.m, fof=args.L, + verbose=args.V, debug=args.D, tmp_dir=args.d ) args = parser.parse_args() diff --git a/attotree/version.py b/attotree/version.py index b26e858..8febed7 100644 --- a/attotree/version.py +++ b/attotree/version.py @@ -2,4 +2,4 @@ from __commit import * except ImportError: pass -VERSION = "0.1.5" \ No newline at end of file +VERSION = "0.1.6" \ No newline at end of file diff --git a/tests/02_simple_tree/Makefile b/tests/02_simple_tree/Makefile index defbf82..eef9879 100644 --- a/tests/02_simple_tree/Makefile +++ b/tests/02_simple_tree/Makefile @@ -1,4 +1,4 @@ -.PHONY: all help clean cleanall view +.PHONY: all all1 all2 help clean cleanall view data SHELL=/usr/bin/env bash -eo pipefail @@ -8,53 +8,87 @@ SHELL=/usr/bin/env bash -eo pipefail FASTAS=$(shell tar -tf test_spneumo.tar.xz) -################### -## Compare trees ## -################### +########## +## Main ## +########## +all: ## Run all tests +all: all1 all2 -all: \ - tree.precomputed.newick \ +#w/diff +all1: \ tree.default.nw \ + tree.default_local_tmp_dir.nw \ tree.default_verbose_debug.nw \ tree.file_list.nw \ - tree.file_list_proc_subst.nw + tree.file_list_proc_subst.nw \ + tree.file_list_proc_subst_two_lists.nw \ + tree.default_o-param.nw for x in $^; do \ 2>&1 echo Testing "$$x"; \ diff $$x tree.precomputed.newick; \ done -################################### -# Different ways to compute trees # -################################### +#w/o diff +all2: \ + tree.default_upgma.nw +data: ## Regenerate test data +data: tree.precomputed.newick file_list.txt + + +# Different ways to compute trees tree.default.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" ../../attotree/attotree.py $(shell cat $<) > "$@.tmp" mv "$@.tmp" "$@" +tree.default_upgma.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" + ../../attotree/attotree.py $(shell cat $<) -m upgma> "$@.tmp" + mv "$@.tmp" "$@" + +tree.default_local_tmp_dir.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" + ../../attotree/attotree.py $(shell cat $<) -D -d tmp_dir > "$@.tmp" + mv "$@.tmp" "$@" + +tree.default_o-param.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" + ../../attotree/attotree.py $(shell cat $<) -o "$@.tmp" + mv "$@.tmp" "$@" + tree.default_verbose_debug.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" ../../attotree/attotree.py -V -D $(shell cat $<) > "$@.tmp" mv "$@.tmp" "$@" tree.file_list.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" ../../attotree/attotree.py -L "$<" > "$@.tmp" mv "$@.tmp" "$@" tree.file_list_proc_subst.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" ../../attotree/attotree.py -L <(head -n 99999 "$<") > "$@.tmp" mv "$@.tmp" "$@" +tree.file_list_proc_subst_two_lists.nw: file_list.txt + @ >&2 printf "=====\\n$@\\n=====\\n" + ../../attotree/attotree.py -L <(head -n 2 "$<") <(tail -n+3 "$<") > "$@.tmp" + mv "$@.tmp" "$@" + tree.precomputed.newick: tree.default.nw cp $< $@ -############## -# Input data # -############## - file_list.txt: test_spneumo.tar.xz tar xvf $< tar -tf $< > file_list.txt.tmp mv file_list.txt.tmp file_list.txt + +############### +## Auxiliary ## +############### help: ## Print help messages @echo -e "$$(grep -hE '^\S*(:.*)?##' $(MAKEFILE_LIST) \ | sed \ @@ -67,6 +101,7 @@ help: ## Print help messages clean: ## Clean rm -f *.nw *.tmp + rm -fr tmp_dir/* cleanall: clean ## Clean all rm -f file_list.txt *.fa diff --git a/tests/02_simple_tree/tmp_dir/.gitignore b/tests/02_simple_tree/tmp_dir/.gitignore new file mode 100644 index 0000000..a5baada --- /dev/null +++ b/tests/02_simple_tree/tmp_dir/.gitignore @@ -0,0 +1,3 @@ +* +!.gitignore + diff --git a/tests/02_simple_tree/tree.precomputed.newick b/tests/02_simple_tree/tree.precomputed.newick index 489a522..16dc601 100644 --- a/tests/02_simple_tree/tree.precomputed.newick +++ b/tests/02_simple_tree/tree.precomputed.newick @@ -1 +1 @@ -((203692:0.00537,001334:0.00473):0.00060,(302649:0.00543,101058:0.00531):0.00062,403790:0.01403); +((203692:0.00537,001334:0.00473):0.00060,(302649:0.00543,101058:0.00531):0.00062,403790:0.01403); \ No newline at end of file