Skip to content

Commit

Permalink
Merge pull request #23 from LohmuellerLab/at_model
Browse files Browse the repository at this point in the history
Merging the Arabidopsis model.
  • Loading branch information
andrewkern committed Mar 9, 2019
2 parents baa8697 + 76e6b75 commit 2733de4
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 0 deletions.
22 changes: 22 additions & 0 deletions athaliana_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
A. thaliana msprime example
"""
import msprime

from stdpopsim import arabidopsis_thaliana

chrom = arabidopsis_thaliana.genome.chromosomes["chr5"]
recomb_map = chrom.recombination_map()

model = arabidopsis_thaliana.Durvasula2017MSMC()
model.debug()

samples = [
msprime.Sample(population=0, time=0), msprime.Sample(population=0, time=0)]

ts = msprime.simulate(
samples=samples,
recombination_map=chrom.recombination_map(),
mutation_rate=chrom.mean_mutation_rate,
**model.asdict())
print("simulated:", ts.num_trees, ts.num_sites)
115 changes: 115 additions & 0 deletions stdpopsim/arabidopsis_thaliana.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Genome, genetic map and demographic model definitions for Arabidopsis thaliana.
"""
import msprime
import numpy as np

import stdpopsim.models as models
import stdpopsim.genomes as genomes
import stdpopsim.genetic_maps as genetic_maps


###########################################################
#
# Genetic maps
#
###########################################################


class Salome2012(genetic_maps.GeneticMap):
"""
Genetic map from Salome 2012 averaged across population crosses.
Please see this repo for details on how this was done: https://github.com/
LohmuellerLab/arabidopsis_recomb_maps
"""

url = ("http://www.eeb.ucla.edu/Faculty/Lohmueller/data/"
"uploads/salome2012_maps.tar.gz")
file_pattern = "arab_{name}_map_loess.txt"


genetic_maps.register_genetic_map(Salome2012())


###########################################################
#
# Genome definition
#
###########################################################

# Data for length information based on:
# https://www.arabidopsis.org/portals/genAnnotation/
# gene_structural_annotation/agicomplete.jsp
# Lengths from TAIR 10 although its unclear what reference the genetic map used
# -- follow up on this with Salome 2012 authors

_chromosome_data = """\
chr1 30427671
chr2 19698289
chr3 23459830
chr4 18585056
chr5 26975502
"""
# mutation rate from Ossowski 2010 Science
# recombination value from Huber et al 2014 MBE
# rho=200/Mb, assume Ne=124,000, rho=2*Ne*r
_chromosomes = []
for line in _chromosome_data.splitlines():
name, length = line.split()[:2]
_chromosomes.append(genomes.Chromosome(
name=name, length=int(length),
mean_mutation_rate=7e-9,
mean_recombination_rate=8.1e-9))

genome = genomes.Genome(
species="arabidopsis_thaliana",
chromosomes=_chromosomes,
default_genetic_map=Salome2012.name)


###########################################################
#
# Demographic models
#
###########################################################


class Durvasula2017MSMC(models.Model):
"""
Model estimated from two homozygous individuals from the South Middle Atlas
using MSMC
"""

def __init__(self):

# the size during the interval times[k] to times[k+1] = sizes[k]
self.times = np.array([
699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275,
51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324,
260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610,
954517, 1119341, 1312147, 1537686, 1801500, 2110100])
self.sizes = np.array([
42252426, 42252426, 60323, 72174, 40591, 21158, 21442,
39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813,
111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222,
217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989])

# MSMC is accurate from 40Kya-1.6Mya for A.thaliana(Durvasula et al 2017)
# set the first 7 sizes
# equal to the size at 8 (~40Kya)
self.sizes[:8] = self.sizes[8]
# set the last 2 entries equal
# to the size at 30 (~1.6Mya)
self.sizes[30:32] = self.sizes[30]
self.demographic_events = []
for idx, t in enumerate(self.times):
self.demographic_events.append(
msprime.PopulationParametersChange(
time=t, initial_size=self.sizes[idx], population_id=0))

self.migration_matrix = [[0]]

self.population_configurations = [
msprime.PopulationConfiguration(initial_size=self.sizes[0])
]
42 changes: 42 additions & 0 deletions tests/test_athaliana.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest
import io

from stdpopsim import arabidopsis_thaliana


class TestGenome(unittest.TestCase):
"""
Tests for the arabidopsis_thaliana genome.
"""

def test_basic_attributes(self):
genome = arabidopsis_thaliana.genome
self.assertEqual(genome.species, "arabidopsis_thaliana")
self.assertEqual(genome.default_genetic_map, "Salome2012")
self.assertEqual(len(genome.chromosomes), 5)

def test_str(self):
s = str(arabidopsis_thaliana.genome)
self.assertGreater(len(s), 0)
self.assertIsInstance(s, str)

def test_chromosome_lengths(self):
genome = arabidopsis_thaliana.genome
self.assertEqual(genome.chromosomes["chr1"].length, 30427671)
self.assertEqual(genome.chromosomes["chr2"].length, 19698289)
self.assertEqual(genome.chromosomes["chr3"].length, 23459830)
self.assertEqual(genome.chromosomes["chr4"].length, 18585056)
self.assertEqual(genome.chromosomes["chr5"].length, 26975502)


class TestDurvasula2017MSMC(unittest.TestCase):
"""
Basic tests for the Durvasula MSMC model.
"""

def test_debug_runs(self):
model = arabidopsis_thaliana.Durvasula2017MSMC()
output = io.StringIO()
model.debug(output)
s = output.getvalue()
self.assertGreater(len(s), 0)

0 comments on commit 2733de4

Please sign in to comment.