Skip to content

Commit

Permalink
Merge pull request #664 from apragsdale/mask_by_bed
Browse files Browse the repository at this point in the history
Masking simulated data
  • Loading branch information
grahamgower committed Nov 6, 2020
2 parents 7e1a88d + 4b8d29e commit c649d7f
Show file tree
Hide file tree
Showing 10 changed files with 858 additions and 337 deletions.
52 changes: 34 additions & 18 deletions stdpopsim/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,22 +371,33 @@ def add_simulate_species_parser(parser, species):
"Available maps: "
f"{', '.join(choices)}. "))

if len(species.genome.chromosomes) == 1:
species_parser.set_defaults(chromosome=species.genome.chromosomes[0].id)
else:
# To avoid listing too much stuff out in the help, we only list
# the actual IDs. We make all synonyms available as choices though.
choices = []
all_choices = []
for chrom in species.genome.chromosomes:
choices.append(chrom.id)
all_choices.extend([chrom.id] + chrom.synonyms)
species_parser.add_argument(
"-c", "--chromosome", choices=all_choices, metavar="", default=choices[0],
help=(
f"Simulate a specific chromosome. "
f"Options: {', '.join(choices)}. "
f"Default={choices[0]}."))
# To avoid listing too much stuff out in the help, we only list
# the actual IDs. We make all synonyms available as choices though.
choices = []
all_choices = []
for chrom in species.genome.chromosomes:
choices.append(chrom.id)
all_choices.extend([chrom.id] + chrom.synonyms)
species_parser.add_argument(
"-c", "--chromosome", choices=all_choices, metavar="", default=None,
help=(
f"Simulate a specific chromosome. If no chromosome is given, "
f"simulate a generic contig of given length, specified using --length. "
f"Options: {', '.join(choices)}. "
f"Default=None."))

species_parser.add_argument(
"-L", "--length", default=None, type=float,
help="Simulate a default contig of given length."
)
species_parser.add_argument(
"-i", "--inclusion-mask", default=None, type=str,
help="Path to inclusion mask specified in bed format."
)
species_parser.add_argument(
"-e", "--exclusion-mask", default=None, type=str,
help="Path to exclusion mask specified in bed format."
)
species_parser.add_argument(
"-l", "--length-multiplier", default=1, type=float,
help="Simulate a sequence of length l times the named chromosome's length, "
Expand Down Expand Up @@ -440,8 +451,13 @@ def run_simulation(args):
"populations")
samples = model.get_samples(*args.samples)
contig = species.get_contig(
args.chromosome, genetic_map=args.genetic_map,
length_multiplier=args.length_multiplier)
args.chromosome,
genetic_map=args.genetic_map,
length_multiplier=args.length_multiplier,
length=args.length,
inclusion_mask=args.inclusion_mask,
exclusion_mask=args.exclusion_mask,
)
engine = stdpopsim.get_engine(args.engine)
logger.info(
f"Running simulation model {model.id} for {species.id} on "
Expand Down
75 changes: 49 additions & 26 deletions stdpopsim/engines.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,13 @@ class Engine:
"""

def simulate(
self, demographic_model=None, contig=None, samples=None, seed=None,
dry_run=False):
self,
demographic_model=None,
contig=None,
samples=None,
seed=None,
dry_run=False,
):
"""
Simulates the model for the specified contig and samples.
Expand Down Expand Up @@ -99,25 +104,36 @@ class _MsprimeEngine(Engine):
id = "msprime" #:
description = "Msprime coalescent simulator" #:
citations = [
stdpopsim.Citation(
doi="https://doi.org/10.1371/journal.pcbi.1004842",
year="2016",
author="Kelleher et al.",
reasons={stdpopsim.CiteReason.ENGINE}),
]
stdpopsim.Citation(
doi="https://doi.org/10.1371/journal.pcbi.1004842",
year="2016",
author="Kelleher et al.",
reasons={stdpopsim.CiteReason.ENGINE},
)
]
# We default to the first model in the list.
supported_models = ["hudson", "dtwf", "smc", "smc_prime"]
model_citations = {"dtwf": [
stdpopsim.Citation(
doi="https://doi.org/10.1371/journal.pgen.1008619",
year="2020",
author="Nelson et al.",
reasons={stdpopsim.CiteReason.ENGINE}),
]}
model_citations = {
"dtwf": [
stdpopsim.Citation(
doi="https://doi.org/10.1371/journal.pgen.1008619",
year="2020",
author="Nelson et al.",
reasons={stdpopsim.CiteReason.ENGINE},
)
]
}

def simulate(
self, demographic_model=None, contig=None, samples=None, seed=None,
msprime_model=None, msprime_change_model=None, dry_run=False):
self,
demographic_model=None,
contig=None,
samples=None,
seed=None,
msprime_model=None,
msprime_change_model=None,
dry_run=False,
):
"""
Simulate the demographic model using msprime.
See :meth:`.Engine.simulate()` for definitions of parameters defined
Expand Down Expand Up @@ -154,15 +170,22 @@ def simulate(
demographic_events.sort(key=lambda x: x.time)

ts = msprime.simulate(
samples=samples,
recombination_map=contig.recombination_map,
mutation_rate=contig.mutation_rate,
population_configurations=demographic_model.population_configurations,
migration_matrix=demographic_model.migration_matrix,
demographic_events=demographic_events,
random_seed=seed,
model=msprime_model,
end_time=0 if dry_run else None)
samples=samples,
recombination_map=contig.recombination_map,
mutation_rate=contig.mutation_rate,
population_configurations=demographic_model.population_configurations,
migration_matrix=demographic_model.migration_matrix,
demographic_events=demographic_events,
random_seed=seed,
model=msprime_model,
end_time=0 if dry_run else None,
)

if contig.inclusion_mask is not None:
ts = stdpopsim.utils.mask_tree_sequence(ts, contig.inclusion_mask, False)
if contig.exclusion_mask is not None:
ts = stdpopsim.utils.mask_tree_sequence(ts, contig.exclusion_mask, True)

if dry_run:
ts = None
return ts
Expand Down
26 changes: 20 additions & 6 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Genome:
:ivar length: The total length of the genome.
:vartype length: int
"""

chromosomes = attr.ib(factory=list)
mutation_rate_citations = attr.ib(factory=list, kw_only=True)
recombination_rate_citations = attr.ib(factory=list, kw_only=True)
Expand Down Expand Up @@ -98,6 +99,7 @@ class Chromosome:
chromosome by ID, e.g. from the command line interface.
:vartype synonyms: list of str
"""

id = attr.ib(type=str, kw_only=True)
length = attr.ib(kw_only=True)
recombination_rate = attr.ib(type=float, kw_only=True)
Expand All @@ -119,9 +121,16 @@ class Contig:
<https://msprime.readthedocs.io/en/stable/api.html#msprime.RecombinationMap>`_
for more details.
:vartype recombination_map: msprime.simulations.RecombinationMap
:ivar mask_intervals: Intervals to keep in simulated tree sequence, as a list
of (left_position, right_position), such that intervals are non-overlapping
and in ascending order. Should have shape Nx2, where N is the number of
intervals.
:vartype mask_intervals: array-like (?)
:ivar exclude: If True, ``mask_intervals`` specify regions to exclude. If False,
``mask_intervals`` specify regions in keep.
:vartype exclude: bool
.. note::
To run stdpopsim simulations with alternative, user-specified mutation
or recombination rates, a new contig can be created based on an existing
one. For instance, the following will create a ``new_contig`` that,
Expand All @@ -135,17 +144,22 @@ class Contig:
genetic_map=old_contig.genetic_map,
)
"""

recombination_map = attr.ib(default=None, kw_only=True)
mutation_rate = attr.ib(default=None, type=float, kw_only=True)
genetic_map = attr.ib(default=None, kw_only=True)
inclusion_mask = attr.ib(default=None, kw_only=True)
exclusion_mask = attr.ib(default=None, kw_only=True)

def __str__(self):
gmap = "None" if self.genetic_map is None else self.genetic_map.id
s = (
"Contig(length={:.2G}, recombination_rate={:.2G}, "
"mutation_rate={:.2G}, genetic_map={})").format(
self.recombination_map.get_length(),
self.recombination_map.mean_recombination_rate,
self.mutation_rate,
gmap)
"mutation_rate={:.2G}, genetic_map={})"
).format(
self.recombination_map.get_length(),
self.recombination_map.mean_recombination_rate,
self.mutation_rate,
gmap,
)
return s

0 comments on commit c649d7f

Please sign in to comment.