Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert demographic models to Demes YAML files #1233

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
120 changes: 120 additions & 0 deletions maintenance/convert_to_demes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python3
# Convert all stdpopsim models into demes YAML files.

import copy
import sys
import pathlib
import textwrap

import demes
import msprime
import stdpopsim


def change_units(graph: demes.Graph, time_units: str, generation_time: float):
# TODO: put a function like this in demes.

# stdpopsim models are always in generations
assert graph.time_units == "generations"

# return a copy instead of modifying the original
graph = copy.deepcopy(graph)

if time_units == "generations":
assert generation_time == 1
return graph

for deme in graph.demes:
deme.start_time *= generation_time
for epoch in deme.epochs:
epoch.start_time *= generation_time
epoch.end_time *= generation_time
for migration in graph.migrations:
migration.start_time *= generation_time
migration.end_time *= generation_time
for pulse in graph.pulses:
pulse.time *= generation_time
graph.time_units = time_units
graph.generation_time = generation_time

# Check for stupid mistakes.
graph2 = demes.Graph.fromdict(graph.asdict())
graph2.assert_close(graph)

return graph


# Convert the models to the given time_units and generation_time.
# This table was filled by inspecting the stdpopsim model definitions.
time_conversion = {
# AnaPla
"MallardBlackDuck_2L19": ("years", 4),
# AnoGam
"GAS_1A17": ("generations", 1),
# AraTha
"SouthMiddleAtlas_1D17": ("generations", 1),
"African2Epoch_1H18": ("generations", 1),
"African3Epoch_1H18": ("generations", 1),
# BosTau
"HolsteinFriesian_1M13": ("generations", 1),
# DroMel
"African3Epoch_1S16": ("generations", 1),
"OutOfAfrica_2L06": ("generations", 1),
# HomSap
"OutOfAfricaExtendedNeandertalAdmixturePulse_3I21": (
"thousands of years",
25 / 1000,
),
"OutOfAfrica_3G09": ("years", 25),
"OutOfAfrica_2T12": ("years", 25),
"Africa_1T12": ("years", 25),
"AmericanAdmixture_4B11": ("generations", 1),
"OutOfAfricaArchaicAdmixture_5R19": ("years", 29),
"Zigzag_1S14": ("generations", 1),
"AncientEurasia_9K19": ("years", 25),
"PapuansOutOfAfrica_10J19": ("generations", 1),
"AshkSub_7G19": ("generations", 1),
"OutOfAfrica_4J17": ("years", 29),
"Africa_1B08": ("generations", 1),
# PanTro
"BonoboGhost_4K19": ("thousands of years", 25 / 1000),
# PonAbe
"TwoSpecies_2L11": ("years", 20),
}

if __name__ == "__main__":
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} output_folder/")
exit(1)

output_folder = pathlib.Path(sys.argv[1])
output_folder.mkdir(exist_ok=True)

for species in stdpopsim.all_species():
species_folder = output_folder / species.id
species_folder.mkdir(exist_ok=True)
for model in species.demographic_models:
graph = msprime.Demography.to_demes(model.model)

# Change the time units.
time_units, generation_time = time_conversion[model.id]
graph = change_units(graph, time_units, generation_time)

# Add description.
graph.description = (
model.description
+ "\n"
+ " ".join(
textwrap.wrap(textwrap.dedent(model.long_description))
).strip()
)

# Add citations.
for citation in model.citations:
graph.doi.append(str(citation))

# Add metadata.
if model.mutation_rate is not None:
graph.metadata["mutation_rate"] = model.mutation_rate

demes.dump(graph, species_folder / f"{model.id}.yaml")
2 changes: 1 addition & 1 deletion stdpopsim/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

# We import catalog here, but the internal functions
# defined are not part of the external API.
from .catalog import * # NOQA
from . import catalog # NOQA

from . import qc # NOQA

Expand Down
1 change: 0 additions & 1 deletion stdpopsim/catalog/AnaPla/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
Catalog definitions for AnaPla (Ensembl ID='anas_platyrhynchos')
"""
from . import species # noqa: F401
from . import demographic_models # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/AnoGam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
Catalog definitions for AnoGam (Ensembl ID='anopheles_gambiae')
"""
from . import species # noqa: F401
from . import demographic_models # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/AraTha/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
"""
from . import species # noqa: F401
from . import genetic_maps # noqa: F401
from . import demographic_models # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/BosTau/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
Catalog definitions for bos_taurus
"""
from . import species # noqa: F401
from . import demographic_models # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/DroMel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@
"""
from . import species # noqa: F401
from . import genetic_maps # noqa: F401
from . import demographic_models # noqa: F401
from . import dfes # noqa: F401
from . import annotations # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/HomSap/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@
"""
from . import species # noqa: F401
from . import genetic_maps # noqa: F401
from . import demographic_models # noqa: F401
from . import annotations # noqa: F401
from . import dfes # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/PanTro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,3 @@
Catalog definitions for Pan troglodytes
"""
from . import species # noqa: F401
from . import demographic_models # noqa: F401
1 change: 0 additions & 1 deletion stdpopsim/catalog/PonAbe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@
"""
from . import species # noqa: F401
from . import genetic_maps # noqa: F401
from . import demographic_models # noqa: F401
11 changes: 9 additions & 2 deletions stdpopsim/catalog/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
import importlib
import pathlib

from . import ensembl_info # noqa: F401
import stdpopsim

# Import all species definitions in the catalog.
__all__ = []
for path in pathlib.Path(__path__[0]).glob("*"):
module_name = path.parts[-1]
if module_name[0].isupper():
__all__.append(module_name)
importlib.import_module("stdpopsim.catalog." + module_name)

for species in stdpopsim.all_species():
path = pathlib.Path(__path__[0]) / "demographic-models" / species.id
for yaml_file in path.glob("*.yaml"):
dm = stdpopsim.DemographicModel.from_yaml(yaml_file)
species.add_demographic_model(dm)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
description: 'North American Mallard/Black Duck split

This is a model fit to contemporary samples of wild North American mallard and black
duck, using the "split-migration" model of dadi. See Figure 6 of Lavretsky et al
2019.'
time_units: years
generation_time: 4
doi: ['Lavretsky et al., 2019: https://doi.org/10.1111/mec.15343']
metadata: {mutation_rate: 4.83e-09}
demes:
- name: Ancestral
description: Ancestral population
epochs:
- {end_time: 632305.0, start_size: 819535}
- name: Mallard
description: Wild North American mallards
ancestors: [Ancestral]
epochs:
- {end_time: 0, start_size: 1370000.0}
- name: Black_duck
description: Wild black ducks
ancestors: [Ancestral]
epochs:
- {end_time: 0, start_size: 1570000.0}
migrations:
- demes: [Mallard, Black_duck]
rate: 1.720487837615233e-06
125 changes: 125 additions & 0 deletions stdpopsim/catalog/demographic-models/AnoGam/GAS_1A17.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
description: 'Stairwayplot estimates of N(t) for Gabon sample

These estimates were done as part of the Ag1000G 2017 Consortium paper. Stairwayplot
was run with the addition of a misorientation parameter using SFS information from
each population.'
time_units: generations
generation_time: 1
doi: ['Ag1000G Consortium, 2017: https://doi.org/10.1038/nature24995']
metadata: {mutation_rate: 3.5e-09}
demes:
- name: GAS
description: Gabon gabiae population
epochs:
- {end_time: 18029.2683, start_size: 409527.12801416}
- {end_time: 11260.2248, start_size: 409527.12801416}
- {end_time: 10834.1379, start_size: 77334.77862527}
- {end_time: 9946.68728, start_size: 322144.56539297}
- {end_time: 7169.74182, start_size: 1680052.006021}
- {end_time: 6481.15044, start_size: 624896.67937685}
- {end_time: 6063.7642, start_size: 530289.21416049}
- {end_time: 5742.77111, start_size: 543762.2932594}
- {end_time: 4868.7804, start_size: 1903551.76041575}
- {end_time: 4144.16552, start_size: 1972764.03439268}
- {end_time: 3543.66638, start_size: 1998160.88128216}
- {end_time: 3043.25043, start_size: 1998160.88128216}
- {end_time: 2619.82155, start_size: 1998160.88128216}
- {end_time: 2256.73855, start_size: 1998953.44050118}
- {end_time: 1942.06662, start_size: 1998953.44050118}
- {end_time: 1667.62718, start_size: 1992430.35210815}
- {end_time: 1425.80467, start_size: 1989715.58702617}
- {end_time: 1211.87638, start_size: 1980227.21562606}
- {end_time: 1145.45402, start_size: 687172.58638815}
- {end_time: 1085.9595, start_size: 683889.43627665}
- {end_time: 1032.13113, start_size: 683889.43627665}
- {end_time: 983.196252, start_size: 683889.43627665}
- {end_time: 938.516578, start_size: 683889.43627665}
- {end_time: 897.519058, start_size: 684576.59330447}
- {end_time: 859.801339, start_size: 684576.59330447}
- {end_time: 824.963147, start_size: 685005.95081601}
- {end_time: 792.661043, start_size: 685951.34145025}
- {end_time: 762.657505, start_size: 686150.90910499}
- {end_time: 734.719738, start_size: 686235.36369922}
- {end_time: 708.609246, start_size: 687162.86783629}
- {end_time: 684.129309, start_size: 688681.83478372}
- {end_time: 661.129552, start_size: 690176.6983718}
- {end_time: 639.454764, start_size: 692379.41939462}
- {end_time: 619.04502, start_size: 692716.93526192}
- {end_time: 599.781759, start_size: 693429.24269629}
- {end_time: 581.588679, start_size: 693429.24269629}
- {end_time: 564.379009, start_size: 693429.24269629}
- {end_time: 548.045759, start_size: 694677.60526595}
- {end_time: 532.550112, start_size: 694677.60526595}
- {end_time: 517.829247, start_size: 694677.60526595}
- {end_time: 503.826473, start_size: 694677.60526595}
- {end_time: 490.490498, start_size: 694677.60526595}
- {end_time: 477.774801, start_size: 694677.60526595}
- {end_time: 465.620388, start_size: 695633.55941906}
- {end_time: 454.00617, start_size: 695633.55941906}
- {end_time: 442.896919, start_size: 695633.55941906}
- {end_time: 432.260401, start_size: 695633.55941906}
- {end_time: 422.067072, start_size: 695633.55941906}
- {end_time: 412.289797, start_size: 695633.55941906}
- {end_time: 402.903613, start_size: 695633.55941906}
- {end_time: 393.885515, start_size: 695633.55941906}
- {end_time: 385.214266, start_size: 695633.55941906}
- {end_time: 376.870235, start_size: 695633.55941906}
- {end_time: 368.835241, start_size: 695633.55941906}
- {end_time: 361.09243, start_size: 695633.55941906}
- {end_time: 353.626147, start_size: 695633.55941906}
- {end_time: 346.421839, start_size: 695633.55941906}
- {end_time: 339.465956, start_size: 695633.55941906}
- {end_time: 332.745865, start_size: 695633.55941906}
- {end_time: 326.24479, start_size: 696167.64460966}
- {end_time: 319.955507, start_size: 696317.93467906}
- {end_time: 313.86024, start_size: 697332.03584512}
- {end_time: 307.951917, start_size: 698106.79217618}
- {end_time: 302.218199, start_size: 699330.14877435}
- {end_time: 296.632655, start_size: 702884.83678823}
- {end_time: 291.187697, start_size: 706605.83914068}
- {end_time: 285.888999, start_size: 708782.9061541}
- {end_time: 280.730912, start_size: 710882.47527273}
- {end_time: 275.664382, start_size: 719107.79112942}
- {end_time: 270.57382, start_size: 743769.25832605}
- {end_time: 258.403062, start_size: 1829782.22978925}
- {end_time: 243.883726, start_size: 2245241.1128414}
- {end_time: 228.659684, start_size: 2420531.19687943}
- {end_time: 213.010501, start_size: 2557240.92209016}
- {end_time: 196.88442, start_size: 2707367.42825967}
- {end_time: 181.06966, start_size: 2726859.95574373}
- {end_time: 165.081438, start_size: 2830283.03259496}
- {end_time: 149.486416, start_size: 2833326.90480091}
- {end_time: 134.317538, start_size: 2827486.59107239}
- {end_time: 119.778311, start_size: 2779609.28386339}
- {end_time: 105.86718, start_size: 2726859.95574373}
- {end_time: 93.3624771, start_size: 2512451.15337624}
- {end_time: 83.3883683, start_size: 2053484.48904133}
- {end_time: 80.0932536, start_size: 694949.57011815}
- {end_time: 76.9788992, start_size: 672653.84034255}
- {end_time: 74.0546632, start_size: 646628.97921259}
- {end_time: 71.3468519, start_size: 612860.29930411}
- {end_time: 68.7139222, start_size: 609770.71698915}
- {end_time: 66.15229, start_size: 606896.78477367}
- {end_time: 63.6475829, start_size: 606896.78477367}
- {end_time: 61.1979243, start_size: 606896.78477367}
- {end_time: 58.8015192, start_size: 606896.78477367}
- {end_time: 56.4566497, start_size: 606896.78477367}
- {end_time: 54.161671, start_size: 606896.78477367}
- {end_time: 51.9150077, start_size: 606896.78477367}
- {end_time: 49.7151498, start_size: 606896.78477367}
- {end_time: 47.5606499, start_size: 606896.78477367}
- {end_time: 45.4501193, start_size: 606896.78477367}
- {end_time: 43.3822257, start_size: 606896.78477367}
- {end_time: 41.35569, start_size: 606896.78477367}
- {end_time: 39.3692837, start_size: 606896.78477367}
- {end_time: 37.4218265, start_size: 606896.78477367}
- {end_time: 35.503141, start_size: 609770.71698915}
- {end_time: 33.6213534, start_size: 609770.71698915}
- {end_time: 31.7590591, start_size: 615171.6634657}
- {end_time: 29.9059882, start_size: 623896.55036331}
- {end_time: 28.0715527, start_size: 629386.55333025}
- {end_time: 26.2194838, start_size: 647425.87204344}
- {end_time: 24.3957877, start_size: 649423.64089016}
- {end_time: 17.7815418, start_size: 2398970.46335773}
- {end_time: 10.8221472, start_size: 2570469.78880419}
- {end_time: 0, start_size: 4069863.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
description: 'South Middle Atlas African two epoch model

Model estimated from site frequency spectrum of synonymous SNPs from African South
Middle Atlas samples using Williamson et al. 2005 methodology. Values come from
supplementary table 1 of Huber et al 2018. Sizes change from N_A -> N_0 and t_1
is time of the second epoch.'
time_units: generations
generation_time: 1
doi: ['Huber et al., 2018: https://doi.org/10.1038/s41467-018-05281-7']
metadata: {mutation_rate: 7e-09}
demes:
- name: SouthMiddleAtlas
description: Arabidopsis Thaliana South Middle Atlas population
epochs:
- {end_time: 568344, start_size: 746148}
- {end_time: 0, start_size: 100218.0}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
description: 'South Middle Atlas African three epoch model

Model estimated from site frequency spectrum of synonymous SNPs from African (South
Middle Atlas) samples using Williamson et al. 2005 methodology. Values come from
supplementary table 1 of Huber et al 2018. Sizes change from N_A -> N_2 -> N_3 and
t_2 is the time of the second epoch and t_3 is the time of the 3rd epoch.'
time_units: generations
generation_time: 1
doi: ['Huber et al., 2018: https://doi.org/10.1038/s41467-018-05281-7']
metadata: {mutation_rate: 7e-09}
demes:
- name: SouthMiddleAtlas
description: Arabidopsis Thaliana South Middle Atlas population
epochs:
- {end_time: 21954, start_size: 161744}
- {end_time: 14534, start_size: 24076.0}
- {end_time: 0, start_size: 203077.0}