diff --git a/stdpopsim/catalog/DroSec/__init__.py b/stdpopsim/catalog/DroSec/__init__.py new file mode 100644 index 000000000..ff8827da5 --- /dev/null +++ b/stdpopsim/catalog/DroSec/__init__.py @@ -0,0 +1,4 @@ +""" +Catalog definitions for DroSec (Ensembl ID='drosophila_sechellia') +""" +from . import species # noqa: F401 diff --git a/stdpopsim/catalog/DroSec/genome_data.py b/stdpopsim/catalog/DroSec/genome_data.py new file mode 100644 index 000000000..8f3d8cf56 --- /dev/null +++ b/stdpopsim/catalog/DroSec/genome_data.py @@ -0,0 +1,13 @@ +# File created manually from https://www.ncbi.nlm.nih.gov/assembly/GCF_004382195.1 +data = { + "assembly_accession": "GCA_004382195.1", + "assembly_name": "ASM438219v1", + "chromosomes": { + "2L": {"length": 24956976, "synonyms": []}, + "2R": {"length": 21536224, "synonyms": []}, + "3L": {"length": 28131630, "synonyms": []}, + "3R": {"length": 30464902, "synonyms": []}, + "X": {"length": 22909512, "synonyms": []}, + "4": {"length": 1277805, "synonyms": []}, + }, +} diff --git a/stdpopsim/catalog/DroSec/species.py b/stdpopsim/catalog/DroSec/species.py new file mode 100644 index 000000000..7534ce7ab --- /dev/null +++ b/stdpopsim/catalog/DroSec/species.py @@ -0,0 +1,88 @@ +import stdpopsim + +from . import genome_data + +_ChakrabortyEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1101/gr.263442.120", + year=2021, + author="Chakraborty et al.", + reasons={stdpopsim.CiteReason.ASSEMBLY}, +) + +_ComeronEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1371/journal.pgen.1002905", + year=2012, + author="Comeron et al.", + reasons={stdpopsim.CiteReason.REC_RATE}, +) + +_LegrandEtAl = stdpopsim.Citation( + doi="https://doi.org/10.1534/genetics.108.092080", + year=2009, + author="Legrand et al.", + reasons={ + stdpopsim.CiteReason.GEN_TIME, + stdpopsim.CiteReason.MUT_RATE, + stdpopsim.CiteReason.POP_SIZE, + }, +) + +# We used recombination rates estimated in DroMel. +# Mean chromosomal rates, calculated from the Comeron 2012 map. +# Chromosome 4 isn't in this map, so the weighted mean of 2L, 2R, 3L and 3R +# was used instead. +_recombination_rate = { + "2L": 2.4125016027908946e-08, + "2R": 2.2366522822806982e-08, + "3L": 1.7985794693631893e-08, + "3R": 1.7165556232922828e-08, + "X": 2.9151053903465754e-08, + "4": 2.0085234464525437e-08, +} + +# Mutation rate is set to that used by +# Legrand et al. in an ABC selection of +# demographic scenarios (page 1200). +# They state this is an estimate taken +# from D. simulans (Wall et al. 2002). +_overall_rate = 1.5e-9 +_mutation_rate = { + "2L": _overall_rate, + "2R": _overall_rate, + "3L": _overall_rate, + "3R": _overall_rate, + "X": _overall_rate, + "4": _overall_rate, +} + +# We could not auto-pull the genome data from ensemble +# so instead we used the most up-to-date assembly +# currently available from NCBI. +_genome = stdpopsim.Genome.from_data( + genome_data.data, + recombination_rate=_recombination_rate, + mutation_rate=_mutation_rate, + citations=[ + _ChakrabortyEtAl, + _ComeronEtAl, + _LegrandEtAl, + ], +) + + +# Generation time was set to that used by +# by Legrand et al. in an ABC selection of demographic +# scenarios (page 1200). +# Population size was estimated in the same paper (page 1202). +_species = stdpopsim.Species( + id="DroSec", + ensembl_id="drosophila_sechellia", + name="Drosophila sechellia", + common_name="Drosophila sechellia", + genome=_genome, + generation_time=0.05, + population_size=100000, + citations=[_LegrandEtAl], +) + +stdpopsim.register_species(_species) diff --git a/tests/test_DroSec.py b/tests/test_DroSec.py new file mode 100644 index 000000000..536b925e3 --- /dev/null +++ b/tests/test_DroSec.py @@ -0,0 +1,45 @@ +import pytest + +import stdpopsim +from tests import test_species + + +class TestSpeciesData(test_species.SpeciesTestBase): + + species = stdpopsim.get_species("DroSec") + + def test_ensembl_id(self): + assert self.species.ensembl_id == "drosophila_sechellia" + + def test_name(self): + assert self.species.name == "Drosophila sechellia" + + def test_common_name(self): + assert self.species.common_name == "Drosophila sechellia" + + # QC Tests. These tests are performed by another contributor + # independently referring to the citations provided in the + # species definition, filling in the appropriate values + # and deleting the pytest "skip" annotations. + @pytest.mark.skip("Population size QC not done yet") + def test_qc_population_size(self): + assert self.species.population_size == -1 + + @pytest.mark.skip("Generation time QC not done yet") + def test_qc_generation_time(self): + assert self.species.generation_time == -1 + + +class TestGenomeData(test_species.GenomeTestBase): + + genome = stdpopsim.get_species("DroSec").genome + + @pytest.mark.skip("Recombination rate QC not done yet") + @pytest.mark.parametrize(["name", "rate"], {}.items()) + def test_recombination_rate(self, name, rate): + assert pytest.approx(rate, self.genome.get_chromosome(name).recombination_rate) + + @pytest.mark.skip("Mutation rate QC not done yet") + @pytest.mark.parametrize(["name", "rate"], {}.items()) + def test_mutation_rate(self, name, rate): + assert pytest.approx(rate, self.genome.get_chromosome(name).mutation_rate)