Skip to content

Commit

Permalink
R compatibility
Browse files Browse the repository at this point in the history
This PR makes sure Phlorest phylogenies can be read by several common
R packages.

closes #36
  • Loading branch information
xrotwang committed Nov 16, 2023
1 parent 1264f9f commit e3336ac
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
python-version: [3.8, 3.9, "3.10", "3.11"]

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,10 @@ cldfbench makecldf --glottolog-version vX.Y --with-cldfreadme cldfbench_<id>.py
pytest
cldfbench zenodo --communities phlorest cldfbench_<id>.py
cldfbench readme cldfbench_<id>.py
phlorest check cldfbench_<id>.py --with-R
git commit -a -m"release vX.Y"
git push origin
cldfbench release cldfbench_<id>.py vX.Y
phlorest release cldfbench_<id>.py vX.Y
```


Expand Down
6 changes: 3 additions & 3 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@ Releasing phlorest
==================

- Do platform test via tox:
```
```shell
tox -r
```

- Make sure flake8 passes:
```
```shell
flake8 src
```

Expand All @@ -34,7 +34,7 @@ twine upload dist/*
```

- Push to github:
```
```shell
git push origin
git push --tags
```
Expand Down
5 changes: 3 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ classifiers =
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Programming Language :: Python :: 3.12
License :: OSI Approved :: Apache Software License

[options]
Expand All @@ -33,7 +34,7 @@ package_dir =
python_requires = >=3.8
install_requires =
newick>=1.9
commonnexus>=1.5
commonnexus>=1.8
cldfviz>=0.10
pycldf>=1.33.0
clldutils
Expand Down Expand Up @@ -105,7 +106,7 @@ show_missing = true
skip_covered = true
[tox:tox]
envlist = py38, py39, py310, py311
envlist = py38, py39, py310, py311, py312
isolated_build = true
skip_missing_interpreter = true
Expand Down
58 changes: 58 additions & 0 deletions src/phlorest/check.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# install.packages('ape')
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# BiocManager::install("treeio")
# install.packages('rncl')
# remotes::install_github("ropensci/tracerer")
# install.packages('testthat')

library(ape) # APE: the standard package, no annotations
suppressMessages(library(treeio)) # treeio: considered the 'newer' and 'better' package for trees
library(rncl) # rncl: based on nexus class library, so should be fast but no annotations.
library(tracerer) # tracerer: loads BEAST annotations

library(testthat)

args <- commandArgs(trailingOnly = TRUE)
treefile <- args[1]
ntrees <- strtoi(args[2])
res <- 0

readers <- list(
"ape" = function(x) ape::read.nexus(x, force.multi = TRUE),
"treeio" = function(x) treeio::read.beast(x),
"rncl" = function(x) {
trees <- rncl::read_nexus_phylo(x)
# We force multi "by hand":
if (class(trees) != 'multiPhylo'){
trees <- list(c(trees))
class(trees) <- "multiPhylo"
}
trees
},
"tracerer" = function(x) tracerer::parse_beast_trees(x)
)

for (rdr in names(readers)) {
# Only use tracerer with files with multiple trees, because it requires tree names to start
# with STATE_ ...
if (!(ntrees == 1 && rdr == 'tracerer')) {
cat(sprintf("READER: %s -- %s\n", rdr, treefile))
tryCatch(
expr = {
t <- readers[[rdr]](treefile)
# Make sure we read the correct number of trees:
testthat::expect_equal(length(t), ntrees)
cat('OK\n')
},
error = function(e) {
res <<- 1
print(e)
},
warning = function(w) {
print(w)
}
)
}
}
quit(status=res)
3 changes: 2 additions & 1 deletion src/phlorest/cldfwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@ def add_posterior(self,
self.add_tree(
tree,
self.posterior,
'posterior-{}'.format(i),
# We use a name format that works with the `tracerer` package for R:
'STATE_{}'.format(i),
metadata,
log,
'sample',
Expand Down
40 changes: 39 additions & 1 deletion src/phlorest/commands/check.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,28 @@
"""
Checks datasets for compliance
"""
import shutil
import pathlib
import zipfile
import subprocess

from clldutils.path import TemporaryDirectory
from termcolor import colored
from cldfbench.cli_util import add_dataset_spec, get_dataset
from commonnexus import Nexus

from phlorest.check import run_checks


def register(parser): # pragma: no cover
add_dataset_spec(parser)
parser.add_argument(
'--with-R',
action='store_true',
help="Make sure the NEXUS files of the dataset can be read with commonly used R packages."
"\nNOTE: This requires the Rscript command and an R installation with the relevant "
"packages.",
default=False)


def run(args, d=None):
Expand All @@ -19,5 +33,29 @@ def run(args, d=None):
args.log.error("Unable to load %s - %s" % (args.dataset, e))
raise

msg, color = ('PASS', 'green') if run_checks(d, args.log) else ('FAIL', 'red')
success = True
if args.with_R: # pragma: no cover
for fname in {'summary.trees', 'posterior.trees.zip'}:
if d.cldf_dir.joinpath(fname).exists():
p = d.cldf_dir / fname
with TemporaryDirectory() as tmp:
if fname.endswith('.zip'):
with zipfile.ZipFile(p) as zip:
zip.extract(zip.infolist()[0], tmp)
assert tmp.joinpath(p.stem).exists()
p = tmp / p.stem
else:
shutil.copy(p, tmp / p.name)
p = tmp / p.name
ntrees = len(Nexus.from_file(p).TREES.commands['TREE'])
res = subprocess.call([
'Rscript',
str(pathlib.Path(__file__).parent.parent / 'check.R'),
str(p),
str(ntrees),
])
if res:
success = False

msg, color = ('PASS', 'green') if run_checks(d, args.log) and success else ('FAIL', 'red')
print('{} {}'.format(colored(msg, color, attrs=['bold']), d.id))
9 changes: 5 additions & 4 deletions src/phlorest/nexuslib.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def _rescaler(factor, n):
nwk = tree.newick
nwk.visit(functools.partial(_rescaler, RESCALE_TO_YEARS[orig_scaling]))
trees.append((tree.name, nwk, tree.rooted))
nex.replace_block(
nex.TREES,
Trees.from_data(*trees, **nex.TREES.TRANSLATE.mappings if nex.TREES.TRANSLATE else {}))
kwarg = nex.TREES.TRANSLATE.mappings if nex.TREES.TRANSLATE else {}
kwarg.update(lowercase_command=True)
nex.replace_block(nex.TREES, Trees.from_data(*trees, **kwarg))
return nex
raise ValueError('Cannot rescale {} to years.'.format(orig_scaling))

Expand Down Expand Up @@ -104,7 +104,8 @@ def __enter__(self):

def __exit__(self, exc_type, exc_val, exc_tb):
if self._trees:
nex = Nexus.from_blocks(Trees.from_data(*self._trees))
nex = Nexus.from_blocks(
Trees.from_data(*self._trees, **dict(lowercase_command=True)))
nex.to_file(self.path)
if self.zipped:
with zipfile.ZipFile(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def test_check(dataset, caplog):
check.run(argparse.Namespace(log=logging.getLogger(__name__)), dataset)
check.run(argparse.Namespace(log=logging.getLogger(__name__), with_R=False), dataset)
assert len(caplog.records) >= 4


Expand All @@ -26,7 +26,7 @@ class DS(Dataset):
dir = tmp_repos
id = 'phy'

check.run(argparse.Namespace(log=logging.getLogger(__name__)), DS())
check.run(argparse.Namespace(log=logging.getLogger(__name__), with_R=False), DS())


def test_main(dataset):
Expand Down

0 comments on commit e3336ac

Please sign in to comment.