Skip to content

Commit

Permalink
Merge pull request #959 from grahamgower/contig
Browse files Browse the repository at this point in the history
Make contigs fully_neutral() by default.
  • Loading branch information
grahamgower committed Jun 17, 2021
2 parents a497795 + 200692e commit de8c215
Show file tree
Hide file tree
Showing 19 changed files with 1,019 additions and 915 deletions.
2 changes: 1 addition & 1 deletion stdpopsim/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def download(self):
os.rename(local_path, self.cache_path)
except (OSError, FileExistsError):
warnings.warn(
"Error occured renaming map directory. Are multiple processes"
"Error occured renaming map directory. Are multiple processes "
"downloading this map at the same time?"
)
return
Expand Down
3 changes: 2 additions & 1 deletion stdpopsim/catalog/HomSap/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@
)
],
)
_species.add_annotations(_an)
# XXX: zarr deleted from amazon
# _species.add_annotations(_an)
154 changes: 136 additions & 18 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
regions to be simulated.
"""
import logging
import warnings
import attr

import numpy as np
import msprime

import stdpopsim

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -167,7 +171,7 @@ def __attrs_post_init__(self):
stdpopsim.utils.check_intervals_validity(self.intervals)


@attr.s
@attr.s(kw_only=True)
class Contig:
"""
Class representing a contiguous region of genome that is to be
Expand Down Expand Up @@ -211,13 +215,128 @@ class Contig:
)
"""

recombination_map = attr.ib(default=None, kw_only=True)
mutation_rate = attr.ib(default=None, type=float, kw_only=True)
genetic_map = attr.ib(default=None, kw_only=True)
inclusion_mask = attr.ib(default=None, kw_only=True)
exclusion_mask = attr.ib(default=None, kw_only=True)
genomic_element_types = attr.ib(factory=list, type=list, kw_only=True)
mutation_types = attr.ib(factory=list, type=list, kw_only=True)
recombination_map = attr.ib()
mutation_rate = attr.ib(type=float)
genetic_map = attr.ib(default=None)
inclusion_mask = attr.ib(default=None)
exclusion_mask = attr.ib(default=None)
genomic_element_types = attr.ib(factory=list)
mutation_types = attr.ib(factory=list)

def __attrs_post_init__(self):
self.fully_neutral()

@staticmethod
def basic_contig(*, length, mutation_rate=0, recombination_rate=0):
recomb_map = msprime.RateMap.uniform(length, recombination_rate)
return Contig(recombination_map=recomb_map, mutation_rate=mutation_rate)

@staticmethod
def species_contig(
*,
species,
chromosome=None,
genetic_map=None,
length_multiplier=1,
length=None,
mutation_rate=None,
inclusion_mask=None,
exclusion_mask=None,
):
"""
Build a Contig for a species.
"""
# TODO: add non-autosomal support
non_autosomal_lower = ["x", "y", "m", "mt", "chrx", "chry", "chrm"]
if chromosome is not None and chromosome.lower() in non_autosomal_lower:
warnings.warn(
stdpopsim.NonAutosomalWarning(
"Non-autosomal simulations are not yet supported. See "
"https://github.com/popsim-consortium/stdpopsim/issues/383 and "
"https://github.com/popsim-consortium/stdpopsim/issues/406"
)
)
if chromosome is None:
if genetic_map is not None:
raise ValueError("Cannot use genetic map with generic contic")
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier for generic contig")
if inclusion_mask is not None or exclusion_mask is not None:
raise ValueError("Cannot use mask with generic contig")
if length is None:
raise ValueError("Must specify sequence length of generic contig")
L_tot = 0
r_tot = 0
u_tot = 0
for chrom_data in species.genome.chromosomes:
if chrom_data.id.lower() not in non_autosomal_lower:
L_tot += chrom_data.length
r_tot += chrom_data.length * chrom_data.recombination_rate
u_tot += chrom_data.length * chrom_data.mutation_rate
if mutation_rate is None:
mutation_rate = u_tot / L_tot
r = r_tot / L_tot
contig = Contig.basic_contig(
length=length,
mutation_rate=mutation_rate,
recombination_rate=r,
)
else:
if length is not None:
raise ValueError("Cannot specify sequence length for named contig")
if inclusion_mask is not None and exclusion_mask is not None:
raise ValueError("Cannot specify both inclusion and exclusion masks")
chrom = species.genome.get_chromosome(chromosome)
if genetic_map is None:
logger.debug(f"Making flat chromosome {length_multiplier} * {chrom.id}")
gm = None
recomb_map = msprime.RateMap.uniform(
round(chrom.length * length_multiplier), chrom.recombination_rate
)
else:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with empirical maps")
logger.debug(f"Getting map for {chrom.id} from {genetic_map}")
gm = species.get_genetic_map(genetic_map)
recomb_map = gm.get_chromosome_map(chrom.id)

inclusion_intervals = None
exclusion_intervals = None
if inclusion_mask is not None:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with mask")
if isinstance(inclusion_mask, str):
inclusion_intervals = stdpopsim.utils.read_bed(
inclusion_mask, chromosome
)
else:
inclusion_intervals = inclusion_mask
if exclusion_mask is not None:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with mask")
if isinstance(exclusion_mask, str):
exclusion_intervals = stdpopsim.utils.read_bed(
exclusion_mask, chromosome
)
else:
exclusion_intervals = exclusion_mask

if mutation_rate is None:
mutation_rate = chrom.mutation_rate

contig = stdpopsim.Contig(
recombination_map=recomb_map,
mutation_rate=mutation_rate,
genetic_map=gm,
inclusion_mask=inclusion_intervals,
exclusion_mask=exclusion_intervals,
)

return contig

@property
def length(self):
return self.recombination_map.sequence_length

@property
def all_intervals_array(self):
Expand All @@ -242,11 +361,17 @@ def add_mutation_types(self, mutation_types, proportions, genomic_element_type_i
Adds mutation types with their respective proportions to the genomic
element type provided.
"""
if genomic_element_type_id >= len(self.genomic_element_types):
if not (0 <= genomic_element_type_id < len(self.genomic_element_types)):
raise ValueError(
f"Genomic element type of id {genomic_element_type_id} does not exist."
)

if len(mutation_types) != len(proportions):
raise ValueError(
f"number of mutation_types ({len(mutation_types)}) doesn't match "
f"the number of proportions ({len(proportions)})"
)

def _is_index(y, X):
for i, e in enumerate(X):
if e is y:
Expand All @@ -270,11 +395,6 @@ def _is_index(y, X):
)

def add_genomic_element_type(self, intervals, mutation_types, proportions):
if self.recombination_map is None:
raise ValueError(
"You cannot add genomic element types to a "
"contig that has no recombination map."
)
stdpopsim.utils.check_intervals_validity(intervals)
ge_type = stdpopsim.GenomicElementType(intervals=intervals)
self.genomic_element_types.append(ge_type)
Expand All @@ -289,10 +409,8 @@ def clear_genomic_mutation_types(self):

def fully_neutral(self, slim_mutations=False, convert_to_substitution=True):
self.clear_genomic_mutation_types()
if self.recombination_map is None:
raise ValueError("You must specify a recombination map.")
self.add_genomic_element_type(
intervals=np.array([[0, int(self.recombination_map.sequence_length)]]),
intervals=np.array([[0, int(self.length)]]),
mutation_types=[
stdpopsim.ext.MutationType(
convert_to_substitution=convert_to_substitution
Expand All @@ -308,7 +426,7 @@ def __str__(self):
"mutation_rate={:.2G}, genetic_map={}, genomic_element_types={}, "
"mutation_types={})"
).format(
self.recombination_map.sequence_length,
self.length,
self.recombination_map.mean_rate,
self.mutation_rate,
gmap,
Expand Down
16 changes: 0 additions & 16 deletions stdpopsim/slim_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,14 +1135,6 @@ def simulate(
"compare results across different values of the scaling factor."
)
)
if contig.recombination_map is None:
raise ValueError(
"Your Contig object is not compatible with the SLiM "
"engine. You must specify a recombination map."
)
# if no GET were defined, fully neutral sim with overlay of muts using msprime
if len(contig.genomic_element_types) == 0:
contig.fully_neutral()

# figuring out mutations in SLiM and neutral mutations to be added
# by msprime later
Expand Down Expand Up @@ -1379,14 +1371,6 @@ def recap_and_rescale(
should be consulted to determine if the behaviour is appropriate
for your case.
"""
if contig.recombination_map is None:
raise ValueError(
"Your Contig object is not compatible with the SLiM "
"engine. You must specify a recombination map."
)
# if no GET were defined, fully neutral sim with overlay of muts using msprime
if len(contig.genomic_element_types) == 0:
contig.fully_neutral()
msp_rate_map, slim_rate_map = get_msp_and_slim_mutation_rate_maps(contig)

with open(os.devnull, "w") as script_file:
Expand Down
98 changes: 10 additions & 88 deletions stdpopsim/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@
organising the species catalog.
"""
import logging
import warnings

import attr
import msprime

import stdpopsim
import stdpopsim.utils
Expand Down Expand Up @@ -187,92 +185,16 @@ def get_contig(
:rtype: :class:`.Contig`
:return: A :class:`.Contig` describing the section of the genome.
"""
# TODO: add non-autosomal support
non_autosomal_lower = ["x", "y", "m", "mt", "chrx", "chry", "chrm"]
if chromosome is not None and chromosome.lower() in non_autosomal_lower:
warnings.warn(
stdpopsim.NonAutosomalWarning(
"Non-autosomal simulations are not yet supported. See "
"https://github.com/popsim-consortium/stdpopsim/issues/383 and "
"https://github.com/popsim-consortium/stdpopsim/issues/406"
)
)
if chromosome is None:
if genetic_map is not None:
raise ValueError("Cannot use genetic map with generic contic")
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier for generic contig")
if inclusion_mask is not None or exclusion_mask is not None:
raise ValueError("Cannot use mask with generic contig")
if length is None:
raise ValueError("Must specify sequence length of generic contig")
L_tot = 0
r_tot = 0
u_tot = 0
for chrom_data in self.genome.chromosomes:
if chrom_data.id.lower() not in non_autosomal_lower:
L_tot += chrom_data.length
r_tot += chrom_data.length * chrom_data.recombination_rate
u_tot += chrom_data.length * chrom_data.mutation_rate
if mutation_rate is None:
mutation_rate = u_tot / L_tot
r = r_tot / L_tot
recomb_map = msprime.RateMap.uniform(length, r)
ret = stdpopsim.Contig(
recombination_map=recomb_map, mutation_rate=mutation_rate
)
else:
if length is not None:
raise ValueError("Cannot specify sequence length for named contig")
if inclusion_mask is not None and exclusion_mask is not None:
raise ValueError("Cannot specify both inclusion and exclusion masks")
chrom = self.genome.get_chromosome(chromosome)
if genetic_map is None:
logger.debug(f"Making flat chromosome {length_multiplier} * {chrom.id}")
gm = None
recomb_map = msprime.RateMap.uniform(
round(chrom.length * length_multiplier), chrom.recombination_rate
)
else:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with empirical maps")
logger.debug(f"Getting map for {chrom.id} from {genetic_map}")
gm = self.get_genetic_map(genetic_map)
recomb_map = gm.get_chromosome_map(chrom.id)

inclusion_intervals = None
exclusion_intervals = None
if inclusion_mask is not None:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with mask")
if isinstance(inclusion_mask, str):
inclusion_intervals = stdpopsim.utils.read_bed(
inclusion_mask, chromosome
)
else:
inclusion_intervals = inclusion_mask
if exclusion_mask is not None:
if length_multiplier != 1:
raise ValueError("Cannot use length multiplier with mask")
if isinstance(exclusion_mask, str):
exclusion_intervals = stdpopsim.utils.read_bed(
exclusion_mask, chromosome
)
else:
exclusion_intervals = exclusion_mask

if mutation_rate is None:
mutation_rate = chrom.mutation_rate

ret = stdpopsim.Contig(
recombination_map=recomb_map,
mutation_rate=mutation_rate,
genetic_map=gm,
inclusion_mask=inclusion_intervals,
exclusion_mask=exclusion_intervals,
)

return ret
return stdpopsim.Contig.species_contig(
species=self,
chromosome=chromosome,
genetic_map=genetic_map,
length_multiplier=length_multiplier,
length=length,
mutation_rate=mutation_rate,
inclusion_mask=inclusion_mask,
exclusion_mask=exclusion_mask,
)

def get_demographic_model(self, id):
"""
Expand Down

0 comments on commit de8c215

Please sign in to comment.