Skip to content

Commit

Permalink
Merge pull request #1002 from izabelcavassim/DFE_class
Browse files Browse the repository at this point in the history
Adding DFE class
  • Loading branch information
petrelharp committed Sep 23, 2021
2 parents 03b91b1 + cdfb99b commit 1cf5049
Show file tree
Hide file tree
Showing 11 changed files with 496 additions and 6 deletions.
8 changes: 7 additions & 1 deletion docs/development.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,7 @@ Genetic maps are named using a descriptive name and the assembly version accordi
to ``${CamelCaseDescriptiveName}_${Assembly}``. e.g., the HapMap phase 2 map on
the GRCh37 assembly becomes HapMapII_GRCh37.

Finally demographic models are named using a combination of a descriptive name,
Demographic models are named using a combination of a descriptive name,
information about the simulation, and information about the publication it was
presented in. Specifically we use
``${SomethingDescriptive}_${number_of_populations}${first_author_initial}${two_digit_date}``
Expand All @@ -1091,6 +1091,12 @@ is the number of populations implemented in the model (not necessarily the numbe
from which samples are drawn). For author initial we will use a single letter, the 1st,
until an ID collision, in which case we will include the 2nd letter, and so forth.

DFEs (Distributions of Fitness Effects) are similarly named using something descriptive
of the distribution, and information about the publication:
``${SomethingDescriptive}_${First_authors_last_name_first_letter}{two_digit_date}``.
For instance, if the distribution in question is a lognormal distribution,
then ``LogNormal`` might be the descriptive string.


**********
Unit tests
Expand Down
6 changes: 3 additions & 3 deletions stdpopsim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
from .engines import * # NOQA
from .warning_categories import * # NOQA

# Extensions.
from . import ext # NOQA

# We import catalog here, but the internal functions
# defined are not part of the external API.
from .catalog import * # NOQA

from . import qc # NOQA

from .slim_engine import * # NOQA

# Extensions.
from . import ext # NOQA
1 change: 1 addition & 0 deletions stdpopsim/catalog/HomSap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from . import genetic_maps # noqa: F401
from . import demographic_models # noqa: F401
from . import annotations # noqa: F401
from . import dfes # noqa: F401
47 changes: 47 additions & 0 deletions stdpopsim/catalog/HomSap/dfes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import stdpopsim

_species = stdpopsim.get_species("HomSap")

###########################################################
#
# DFEs
#
###########################################################


def _KimDFE():
id = "Gamma_K17"
description = "Deleterious Gamma DFE"
long_description = """
Return neutral and negative MutationType()s representing a human DFE.
Kim et al. (2017), https://doi.org/10.1534/genetics.116.197145
"""
citations = [
stdpopsim.Citation(
author="Kim et al.",
year=2017,
doi="https://doi.org/10.1534/genetics.116.197145",
reasons="to be defined", # include the dfe_model reason
)
]
neutral = stdpopsim.ext.MutationType()
gamma_shape = 0.186 # shape
gamma_mean = -0.01314833 # expected value
h = 0.5 # dominance coefficient
negative = stdpopsim.ext.MutationType(
dominance_coeff=h,
distribution_type="g", # gamma distribution
distribution_args=[gamma_mean, gamma_shape],
)

return stdpopsim.DFE(
id=id,
description=description,
long_description=long_description,
mutation_types=[neutral, negative],
proportions=[0.3, 0.7],
citations=citations,
)


_species.add_dfe(_KimDFE())
17 changes: 17 additions & 0 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,22 @@ def all_intervals_array(self):
all_intervals = stdpopsim.utils.build_intervals_array(all_intervals)
return all_intervals

def add_DFE(self, intervals, DFE):
"""
Adds the provided DFE to the intervals specified, by making a new genomic
element type with the mutation types and proportions specified by the DFE.
:param array intervals: A valid set of intervals.
:param DFE dfe: A DFE object.
"""
stdpopsim.utils.check_intervals_validity(intervals)
ge_type = stdpopsim.GenomicElementType(intervals=intervals)
self.genomic_element_types.append(ge_type)
getid = len(self.genomic_element_types) - 1
self.add_mutation_types(
DFE.mutation_types, DFE.proportions, genomic_element_type_id=getid
)

def add_mutation_types(self, mutation_types, proportions, genomic_element_type_id):
"""
Adds mutation types with their respective proportions to the genomic
Expand Down Expand Up @@ -395,6 +411,7 @@ def _is_index(y, X):
)

def add_genomic_element_type(self, intervals, mutation_types, proportions):
# TODO: deprecate, replaced by add_DFE??
stdpopsim.utils.check_intervals_validity(intervals)
ge_type = stdpopsim.GenomicElementType(intervals=intervals)
self.genomic_element_types.append(ge_type)
Expand Down
92 changes: 92 additions & 0 deletions stdpopsim/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""
Common infrastructure for specifying demographic models.
"""
import collections.abc
import copy
import numpy as np
import textwrap

import msprime

from . import ext


class Population:
# TODO deprecate this - we don't need any internal definition of what
Expand Down Expand Up @@ -299,3 +303,91 @@ def __init__(self, NA, N1, N2, T, M12, M21):
model=model,
generation_time=1,
)


class DFE:
"""
Class representing a DFE.
Instances of this class are constructed by dfe implementors, following the
:ref:`developer documentation <sec_development_def_model>`. To instead
obtain a pre-specified model as listed in the :ref:`sec_catalog`,
see :class:`Species.get_dfe`.
:ivar ~.mutation_types: a list of MutationTypes associated with the DFE.
:vartype ~.mutation_types: list
:ivar ~.proportions: a list of MutationTypes proportions
:vartype ~.proportions: list
:ivar ~.id: The unique identifier for this model. DFE IDs should be
short and memorable, and conform to the stdpopsim
:ref:`naming conventions <sec_development_naming_conventions>`
for DFE models.
:vartype ~.id: str
:ivar ~.description: A short description of this model as it would be used in
written text, e.g., "Lognormal DFE". This should
describe the DFE itself and not contain author or year information.
:vartype ~.description: str
:ivar long_description: A concise, but detailed, summary of the DFE model.
:vartype long_description: str
:ivar citations: A list of :class:`Citation`, that describe the primary
reference(s) for the DFE model.
:vartype citations: list of :class:`Citation`
"""

def __init__(
self,
*,
id,
description,
long_description,
mutation_types=None,
proportions=None,
citations=None,
# qc_model=None, # we may want to include qc of dfe in the future
):
self.id = id
self.description = description
self.long_description = long_description
self.citations = [] if citations is None else citations
self.mutation_types = mutation_types
self.proportions = (
[1.0] if (proportions is None and len(mutation_types) == 1) else proportions
)
# self.qc_model = qc_model

if not (
isinstance(self.proportions, collections.abc.Collection)
and isinstance(self.mutation_types, collections.abc.Collection)
and len(self.proportions) == len(self.mutation_types)
):
raise ValueError(
"proportions and mutation_types must be lists of the same length."
)

for p in self.proportions:
if not isinstance(p, (float, int)) or p < 0:
raise ValueError("proportions must be nonnegative numbers.")
sum_p = sum(self.proportions)
if not np.isclose(sum_p, 1):
raise ValueError("proportions must sum 1.0.")

for m in self.mutation_types:
if not isinstance(m, ext.MutationType):
raise ValueError(
"mutation_types must be a list of MutationType objects."
)

def __str__(self):
long_desc_lines = [
line.strip()
for line in textwrap.wrap(textwrap.dedent(self.long_description))
]
long_desc = "\n║ ".join(long_desc_lines)
s = (
"DFE:\n"
f"║ id = {self.id}\n"
f"║ description = {self.description}\n"
f"║ long_description = {long_desc}\n"
f"║ citations = {[cite.doi for cite in self.citations]}\n"
)
return s
34 changes: 33 additions & 1 deletion stdpopsim/species.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ def all_demographic_models():
yield model


def all_dfes():
for species in all_species():
for dfe in species.dfes:
yield dfe


def all_annotations():
for species in all_species():
for an in species.annotations:
Expand Down Expand Up @@ -117,6 +123,9 @@ class Species:
:ivar demographic_models: This list of :class:`DemographicModel`
instances in the catalog for this species.
:vartype demographic_models: list
:ivar dfes: This list of :class:`DFE`
instances in the catalog for this species.
:vartype dfes: list
:ivar ensembl_id: The ensembl id for the species which is used by
maintenance scripts to query ensembl's database.
:vartype ensembl_id: str
Expand All @@ -129,11 +138,12 @@ class Species:
generation_time = attr.ib(default=0, kw_only=True)
population_size = attr.ib(default=0, kw_only=True)
demographic_models = attr.ib(factory=list, kw_only=True)
dfes = attr.ib(factory=list, kw_only=True)
ensembl_id = attr.ib(type=str, kw_only=True)
citations = attr.ib(factory=list, kw_only=True)

# A list of genetic maps. This is undocumented as the parameter is not
# intended to be used when the Species is initialsed.
# intended to be used when the Species is initialised.
# Use add_genetic_map() instead.
genetic_maps = attr.ib(factory=list, kw_only=True)
annotations = attr.ib(factory=list, kw_only=True)
Expand Down Expand Up @@ -219,6 +229,28 @@ def add_demographic_model(self, model):
)
self.demographic_models.append(model)

def get_dfe(self, id):
"""
Returns a DFE with the specified ``id``.
:param str id: The string identifier for the DFE.
A complete list of IDs for each species can be found in the
# TODO add that section to the species catalogo
"DFE" subsection for the species in the
:ref:`sec_catalog`.
:rtype: :class:`DFE`
:return: A :class:`DFE` that defines the requested model.
"""
for dfe in self.dfes:
if dfe.id == id:
return dfe
raise ValueError(f"DFE '{self.id}/{id}' not in catalog")

def add_dfe(self, dfe):
if dfe.id in [d.id for d in self.dfes]:
raise ValueError(f"DFE '{self.id}/{dfe.id}' already in catalog.")
self.dfes.append(dfe)

def add_genetic_map(self, genetic_map):
if genetic_map.id in [gm.id for gm in self.genetic_maps]:
raise ValueError(
Expand Down
11 changes: 11 additions & 0 deletions stdpopsim/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,17 @@ def is_valid_demographic_model_id(model_id):
return regex.fullmatch(model_id) is not None


def is_valid_dfe_id(model_id):
"""
Returns True if the specified string is a valid dfe model ID. This must
be a string with the following pattern:
{something descriptive of model}_{First author initial}{2 digit year}.
"""
regex = re.compile(r"[A-Z][a-z]*_[A-Z]*\d\d")
return regex.fullmatch(model_id) is not None


def is_valid_genetic_map_id(gmap_id):
"""
Returns True if the specified string is a valid genetic map ID. This must
Expand Down
8 changes: 8 additions & 0 deletions tests/test_citations.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,11 @@ def test_reason_for_model_citations(self):
f"No reason given for '{citation.author}' citation "
f"in model {model.id}"
)

def test_reason_for_dfe_citations(self):
for dfe in stdpopsim.all_dfes():
for citation in dfe.citations:
assert len(citation.reasons) > 0, (
f"No reason given for '{citation.author}' citation "
f"in dfe {dfe.id}"
)
Loading

0 comments on commit 1cf5049

Please sign in to comment.