Skip to content

Commit

Permalink
Merge pull request #45 from mcveanlab/cli-prototype
Browse files Browse the repository at this point in the history
CLI for running simulations
  • Loading branch information
jeromekelleher committed May 21, 2019
2 parents eb42eae + 3e0dd4b commit c87eb1e
Show file tree
Hide file tree
Showing 11 changed files with 660 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ jobs:
python setup.py egg_info
python setup.py bdist_wheel
pip install dist/*.tar.gz
python3 -m stdpopsim -V
stdpopsim -V
- run:
name: Build docs
Expand Down
1 change: 1 addition & 0 deletions requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ sphinx-argparse
sphinx_rtd_theme
msprime
appdirs
daiquiri
5 changes: 5 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
keywords='simulations, recombination map, models',
packages=['stdpopsim'],
include_package_data=True,
entry_points={
'console_scripts': [
'stdpopsim=stdpopsim.cli:stdpopsim_main',
]
},
install_requires=["msprime", "appdirs"],
url='https://github.com/popgensims/stdpopsim',
project_urls={
Expand Down
4 changes: 4 additions & 0 deletions stdpopsim/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from . import cli

if __name__ == "__main__":
cli.stdpopsim_main()
29 changes: 29 additions & 0 deletions stdpopsim/citations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Citation management for stdpopsim. Provides utilities for printing
citation information associated with different entities derived from the
literature that are used within a simulation.
"""


class CitableMixin(object):
"""
A simple class that provides class attributes which define citation
information about an particular entity that should be acknowledged
by citation. This is intendted to be used as a simple mixin class;
it does not store or change any state of inheriting objects.
"""

doi = None
"""
The DOI for the publication providing the definitive reference.
"""

author = None
"""
Short author list.
"""

year = None
"""
Year of publication as a 4 digit integer, e.g. 2008.
"""
321 changes: 321 additions & 0 deletions stdpopsim/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
"""
The command line interface for stdpopsim. Allows provides standard simulations
at the command line.
"""
import argparse
import json
import logging
import platform
import sys
import msprime
import tskit

import stdpopsim
from stdpopsim import homo_sapiens


logger = logging.getLogger(__name__)
log_format = '%(asctime)s %(levelname)s %(message)s'


def exit(message):
"""
Exit with the specified error message, setting error status.
"""
sys.exit("{}: {}".format(sys.argv[0], message))


def setup_logging(args):
log_level = "WARN"
if args.verbosity > 0:
log_level = "INFO"
if args.verbosity > 1:
log_level = "DEBUG"
logging.basicConfig(level=log_level, format=log_format)


def get_environment():
"""
Returns a dictionary describing the environment in which stdpopsim
is currently running.
"""
env = {
"os": {
"system": platform.system(),
"node": platform.node(),
"release": platform.release(),
"version": platform.version(),
"machine": platform.machine(),
},
"python": {
"implementation": platform.python_implementation(),
"version": platform.python_version(),
},
"libraries": {
"msprime": {"version": msprime.__version__},
"tskit": {"version": tskit.__version__},
}
}
return env


def get_provenance_dict():
"""
Returns a dictionary encoding an execution of stdpopsim conforming to the
tskit provenance schema.
"""
document = {
"schema_version": "1.0.0",
"software": {
"name": "stdpopsim",
"version": stdpopsim.__version__
},
"parameters": {
"command": sys.argv[0],
"args": sys.argv[1:]
},
"environment": get_environment()
}
return document


def write_output(ts, args):
"""
Adds provenance information to the specified tree sequence (ensuring that the
output is reproducible) and write the resulting tree sequence to output.
"""
tables = ts.dump_tables()
logger.debug("Updating provenance")
provenance = get_provenance_dict()
tables.provenances.add_row(json.dumps(provenance))
ts = tables.tree_sequence()
logger.info(f"Writing to {args.output}")
ts.dump(args.output)


def write_citations(chromosome, model, args):
"""
Write out citation information so that the user knows what papers to cite
for the simulation engine, the model and the mutation/recombination rate
information.
"""
if not args.quiet:
# TODO say this better
print(
"If you use this simulation in published work, please cite the following "
"papers:")
print("******************")
print("Simulation engine:")
print("******************")
print(
"\tmsprime: Kelleher et al. 2016: "
"https://doi.org/10.1371/journal.pcbi.1004842")
print("******************")
print("Genetic map:")
print("******************")
print("\tTODO")
# TODO need some way to get a GeneticMap instance from the chromosome. We'll also
# want to be able to output mutation map, and perhaps other information too, so
# we want to keep some flexibility for this in mind.
print("Simulation model:")
print(f"\t{model.name}: {model.author} {model.year} {model.doi}")


# TODO This setup is quite repetive and we can clearly abstract some of this
# logic about sampling into the model object. What we want is for the model
# to contain the names of the populations plus some documtation descriptions
# from which we automatically generate the CLI and this runner inferface
# can then be entirely generic.

def run_human_gutenkunst_three_pop_ooa(args):
if args.num_yri_samples + args.num_ceu_samples + args.num_chb_samples < 2:
exit("Must specify at least 2 samples from YRI, CEU or CHB populations")

chromosome = homo_sapiens.chromosome_factory(
args.chromosome, genetic_map=args.genetic_map,
length_multiplier=args.length_multiplier)
logger.info(
f"Running GutenkunstThreePopOutOfAfrica with YRI={args.num_yri_samples}"
f" CEU={args.num_ceu_samples} CHB={args.num_chb_samples}")
samples = (
[msprime.Sample(population=0, time=0)] * args.num_yri_samples +
[msprime.Sample(population=1, time=0)] * args.num_ceu_samples +
[msprime.Sample(population=2, time=0)] * args.num_chb_samples)
model = homo_sapiens.GutenkunstThreePopOutOfAfrica()
ts = msprime.simulate(
samples=samples,
recombination_map=chromosome.recombination_map,
mutation_rate=chromosome.mutation_rate,
**model.asdict())
write_output(ts, args)
write_citations(chromosome, model, args)


def run_tennessen_two_pop_ooa(args):
if args.num_european_samples + args.num_african_samples < 2:
exit("Must specify at least 2 samples")

chromosome = homo_sapiens.chromosome_factory(
args.chromosome, genetic_map=args.genetic_map,
length_multiplier=args.length_multiplier)
model = homo_sapiens.TennessenTwoPopOutOfAfrica()
logger.info(
f"Running TennessenTwoPopOutOfAfrica with European={args.num_european_samples}"
f" African={args.num_african_samples}")
samples = (
[msprime.Sample(population=0, time=0)] * args.num_european_samples +
[msprime.Sample(population=1, time=0)] * args.num_african_samples)
ts = msprime.simulate(
samples=samples,
recombination_map=chromosome.recombination_map,
mutation_rate=chromosome.mutation_rate,
**model.asdict())
write_output(ts, args)
write_citations(chromosome, model, args)


def run_browning_america(args):
total_samples = (
args.num_european_samples + args.num_african_samples
+ args.num_asian_samples + args.num_american_samples)
if total_samples < 2:
exit("Must specify at least 2 samples")

chromosome = homo_sapiens.chromosome_factory(
args.chromosome, genetic_map=args.genetic_map,
length_multiplier=args.length_multiplier)
logger.info(
f"Running BrowningAmerica with European={args.num_european_samples} "
f"African={args.num_african_samples}, Asian={args.num_asian_samples} "
f"African={args.num_american_samples}")

samples = (
[msprime.Sample(population=0, time=0)] * args.num_european_samples +
[msprime.Sample(population=1, time=0)] * args.num_african_samples +
[msprime.Sample(population=2, time=0)] * args.num_asian_samples +
[msprime.Sample(population=3, time=0)] * args.num_american_samples)

model = homo_sapiens.BrowningAmerica()
ts = msprime.simulate(
samples=samples,
recombination_map=chromosome.recombination_map,
mutation_rate=chromosome.mutation_rate,
**model.asdict())
write_output(ts, args)
write_citations(chromosome, model, args)


def add_output_argument(parser):
parser.add_argument(
"output",
help="The file to write simulated tree sequence to")


def stdpopsim_cli_parser():

# TODO the CLI defined by this hierarchical and clumsy, but it's the best
# I could figure out. It can definitely be improved!
top_parser = argparse.ArgumentParser(
description="Run simulations defined by stdpopsim from the command line")
top_parser.add_argument(
"-V", "--version", action='version',
version='%(prog)s {}'.format(stdpopsim.__version__))
top_parser.add_argument(
"-v", "--verbosity", action='count', default=0,
help="Increase the verbosity")
top_parser.add_argument(
"-q", "--quiet", action='store_true',
help="Do not write out citation information")
subparsers = top_parser.add_subparsers(dest="subcommand")
subparsers.required = True

species_parser = subparsers.add_parser(
"homo-sapiens",
help="Run simulations of human history.")
species_parser.add_argument(
"-g", "--genetic-map", default=None,
# TODO use the genetic map registry
choices=["HapmapII_GRCh37", "Decode_2010_sex_averaged"],
help="Specify a particular genetic map. Use a flat map by default.")
species_parser.add_argument(
"-c", "--chromosome", default="chr22",
help="Simulate a specific chromosome")
species_parser.add_argument(
"-l", "--length-multiplier", default=1, type=float,
help="Simulate a chromsome of length l times the named chromosome")
subsubparsers = species_parser.add_subparsers(dest="subcommand")
subsubparsers.required = True

parser = subsubparsers.add_parser(
"GutenkunstThreePopOutOfAfrica",
help="Runs the three population out-of-Africa model")
parser.set_defaults(runner=run_human_gutenkunst_three_pop_ooa)
add_output_argument(parser)
parser.add_argument(
"--num-yri-samples", default=0, type=int,
help="The number of samples to take from the YRI population")
parser.add_argument(
"--num-ceu-samples", default=0, type=int,
help="The number of samples to take from the CEU population")
parser.add_argument(
"--num-chb-samples", default=0, type=int,
help="The number of samples to take from the CHB population")

parser = subsubparsers.add_parser(
"TennessenTwoPopOutOfAfrica",
help="Runs the TennessenTwoPopOutOfAfrica model.")
add_output_argument(parser)
parser.add_argument(
"--num-african-samples", default=0, type=int,
help="The number of samples to take from the African population")
parser.add_argument(
"--num-european-samples", default=0, type=int,
help="The number of samples to take from the European population")
parser.set_defaults(runner=run_tennessen_two_pop_ooa)

parser = subsubparsers.add_parser(
"BrowningAmerica",
help="Runs the BrowningAmerica model.")
add_output_argument(parser)
parser.add_argument(
"--num-african-samples", default=0, type=int,
help="The number of samples to take from the African population")
parser.add_argument(
"--num-european-samples", default=0, type=int,
help="The number of samples to take from the European population")
parser.add_argument(
"--num-asian-samples", default=0, type=int,
help="The number of samples to take from the Asian population")
parser.add_argument(
"--num-american-samples", default=0, type=int,
help="The number of samples to take from Admixed American population")
parser.set_defaults(runner=run_browning_america)

# Add stubs for discussion
species_parser = subparsers.add_parser(
"arabadopsis-thaliana",
help="Run simulations of Arabadopsis thaliana.")

species_parser = subparsers.add_parser(
"e-coli",
help="Run simulations of E-coli.")

return top_parser


def stdpopsim_main(arg_list=None):
parser = stdpopsim_cli_parser()
args = parser.parse_args(arg_list)
setup_logging(args)
if not args.quiet:
print("*****************************")
print("** WARNING **")
print("*****************************")
print("This inferface is highly experimental and *will* change. ")
print("It is provided as a work-in-progress to get some feedback on ")
print("how to better structure the interfaces")
print("Disable this message with the -q option")
print("*****************************")
print("** END WARNING **")
print("*****************************")
args.runner(args)
3 changes: 2 additions & 1 deletion stdpopsim/genetic_maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import msprime

import stdpopsim
import stdpopsim.citations as citations

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -78,7 +79,7 @@ def __get__(self, obj, owner):
return self.f(owner)


class GeneticMap(object):
class GeneticMap(citations.CitableMixin):
"""
Class representing a genetic map for a species. Provides functionality for
downloading and cacheing recombination maps from a remote URL.
Expand Down

0 comments on commit c87eb1e

Please sign in to comment.