Skip to content

Commit

Permalink
4.3.19
Browse files Browse the repository at this point in the history
  • Loading branch information
smirarab committed Jan 13, 2021
1 parent a5a9da6 commit 4056b39
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Expand Up @@ -5,7 +5,7 @@ os:
- linux
- osx
env:
- PYVERSION="3.5"
- PYVERSION="3.7"
- PYVERSION="3.8"
before_install:
- echo "$TRAVIS_OS_NAME"
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,6 @@
* Version 4.3.18:
* Enable `--symfrac` from config file
* Enable config files that overwrite the default values only when provided
* Version 4.3.18:
* UPP speed improvement for cases with super gappy backbone alignment (more can be done)
* Slightly better logging
Expand Down
11 changes: 11 additions & 0 deletions sepp/alignment.py
Expand Up @@ -332,12 +332,23 @@ def remove_columns(self, indexes):
self[name] = ''.join((
char for idx, char in enumerate(seq) if idx not in indexes))

def keep_columns(self, indexes):
for name, seq in self.items():
self[name] = ''.join((
char for idx, char in enumerate(seq) if idx in indexes))

def get_all_gap_cols(self):
all_gaps = list(range(0, self.get_length()))
for seq in self.values():
all_gaps[:] = [i for i in all_gaps if seq[i] == '-']
return all_gaps

def get_all_nongap_cols(self):
all_gaps = list(range(0, self.get_length()))
for seq in self.values():
all_gaps[:] = [i for i in all_gaps if seq[i] == '-']
return [x for x in range(0, self.get_length()) if x not in all_gaps]

def delete_all_gap(self):
"""
Delete all sites that consists of nothing but gaps
Expand Down
7 changes: 6 additions & 1 deletion sepp/config.py
Expand Up @@ -59,6 +59,7 @@ def set_main_config_path(filename):


def _read_config_file(filename, opts, expand=None):
_LOG.debug("Reading config %s" %filename)
config_defaults = []
cparser = configparser.ConfigParser()
cparser.optionxform = str
Expand All @@ -72,7 +73,10 @@ def _read_config_file(filename, opts, expand=None):
for section in cparser.sections():
if section == "commandline":
continue
section_name_space = Namespace()
if getattr(opts, section, None):
section_name_space = getattr(opts, section)
else:
section_name_space = Namespace()
for (k, v) in cparser.items(section):
if expand and k == "path":
v = os.path.join(expand, v)
Expand Down Expand Up @@ -351,6 +355,7 @@ def error_callback(message):

parser.error = error_callback

_LOG.debug(str(input_args))
''' Read commandline options again to overwrite config file values'''
opts = parser.parse_args(input_args, namespace=opts)
random.seed(opts.seed)
Expand Down
11 changes: 7 additions & 4 deletions sepp/jobs.py
Expand Up @@ -265,12 +265,15 @@ def setup_for_subproblem(self, subproblem, symfrac=True,
self.options = kwargs['options']

def get_invocation(self):
invoc = [self.path, '--ere', '0.59', "--cpu", "1",
useroptions = self.options.split()
invoc = [self.path, "--cpu", "1",
"--%s" % self.molecule]
if self.symfrac is True:
if "--ere" not in useroptions:
invoc.extend(['--ere', '0.59'])
if self.symfrac is True and "--symfrac" not in useroptions:
invoc.extend(["--symfrac", "0.0"])
if self.options != "":
invoc.extend(self.options.split())
if useroptions:
invoc.extend(useroptions)
if self.informat == "fasta":
invoc.extend(['--informat', 'afa'])
invoc.extend([self.outfile, self.infile])
Expand Down
23 changes: 23 additions & 0 deletions tutorial/upp-tutorial.md
Expand Up @@ -224,6 +224,29 @@ run_upp.py -c sample.config -o config_example
```

---------



Hints for scalability
---
If UPP is too slow or takes too much memory, consider the following options:

* Use `-A` to increase alignment subset size. The default, 10, is too small for very large alignments.
For example, for 10,000 species, consider using `-A 100` instead of the default `-A 10`.
* If your backbone alignment (either what you provide or the pasta alignment that UPP internally estimates) become
too long and gappy (e.g., more than 100,000 sites), you can try the following option.
* Make a config file (e.g., `upp.conf`) and put the following in it:
~~~
[hmmbuild]
options = --symfrac 0.05
~~~
* The `0.05` tells HMMBuild to treat sites that are 90% or more gaps as insertion sites rather than match sites.
This can make the HMMs smaller. Choose a value that makes sense for your data, based on how gappy it is.
* Run up with `-c upp.conf` option provided.
* If you have set `-B` to a very large value (say more than 10,000), consider lowering it.
Very large numbers of species in the backbone can make the PASTA alignment very gappy and memory intensive.


Contact
===

Expand Down

0 comments on commit 4056b39

Please sign in to comment.