Skip to content

Commit

Permalink
Merge pull request #34 from jgallowa07/mean_genome_recombination_rate
Browse files Browse the repository at this point in the history
Added mean recombination rates to chromosome & genome
  • Loading branch information
petrelharp committed Mar 19, 2019
2 parents 0add546 + 86f66dc commit d01604a
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 74 deletions.
2 changes: 1 addition & 1 deletion athaliana_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@
ts = msprime.simulate(
samples=samples,
recombination_map=chrom.recombination_map(),
mutation_rate=chrom.mean_mutation_rate,
mutation_rate=chrom.default_mutation_rate,
**model.asdict())
print("simulated:", ts.num_trees, ts.num_sites)
4 changes: 2 additions & 2 deletions stdpopsim/arabidopsis_thaliana.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ class Salome2012(genetic_maps.GeneticMap):
name, length = line.split()[:2]
_chromosomes.append(genomes.Chromosome(
name=name, length=int(length),
mean_mutation_rate=7e-9,
mean_recombination_rate=8.1e-9))
default_mutation_rate=7e-9,
default_recombination_rate=8.1e-9))

genome = genomes.Genome(
species="arabidopsis_thaliana",
Expand Down
4 changes: 2 additions & 2 deletions stdpopsim/drosophila_melanogaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ class Comeron2012_dm6(genetic_maps.GeneticMap):
name, length = line.split()[:2]
_chromosomes.append(genomes.Chromosome(
name=name, length=int(length),
mean_mutation_rate=8.4e-9, # WRONG!, underestimate used in S&S
mean_recombination_rate=8.4e-9)) # WRONG, underestimate used in S&S!
default_mutation_rate=8.4e-9, # WRONG!, underestimate used in S&S
default_recombination_rate=8.4e-9)) # WRONG, underestimate used in S&S!


#: :class:`stdpopsim.Genome` definition for D. melanogaster. Chromosome length data is
Expand Down
4 changes: 2 additions & 2 deletions stdpopsim/e_coli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
_chromosomes.append(genomes.Chromosome(
name="chr",
length=4641652,
mean_mutation_rate=1e-5+2e-4,
mean_recombination_rate=0.0))
default_mutation_rate=1e-5+2e-4,
default_recombination_rate=0.0))
# mean_conversion_rate=8.9e-11 # not implemented yet!
# mean_conversion_length=542 # not implemented yet!

Expand Down
30 changes: 23 additions & 7 deletions stdpopsim/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ def __init__(self, species, chromosomes, default_genetic_map=None):
self.species = species
self.default_genetic_map = default_genetic_map
self.chromosomes = {}
self.length = 0
for chromosome in chromosomes:
self.chromosomes[chromosome.name] = chromosome
chromosome.default_genetic_map = default_genetic_map
chromosome.species = species
self.length += chromosome.length

def __str__(self):
s = "Genome for {}:\n".format(self.species)
Expand All @@ -31,28 +33,42 @@ def __str__(self):
s += "\t{}\n".format(chrom)
return s

@property
def mean_recombination_rate(self):
"""
This method return the weighted mean recombination rate
across all chomosomes in the genome.
:rtype: float
"""
mean_recombination_rate = 0
for chrom in self.chromosomes.values():
normalized_weight = chrom.length / self.length
cont = chrom.default_recombination_rate*normalized_weight
mean_recombination_rate += cont
return mean_recombination_rate


class Chromosome(object):
"""
Class representing a single chromosome for a species.
.. todo:: Define the facilities that this object provides.
"""
def __init__(self, name, length, mean_recombination_rate, mean_mutation_rate):
def __init__(self, name, length, default_recombination_rate, default_mutation_rate):
self.name = name
self.length = length
self.mean_recombination_rate = mean_recombination_rate
self.mean_mutation_rate = mean_mutation_rate
self.default_recombination_rate = default_recombination_rate
self.default_mutation_rate = default_mutation_rate
self.species = None
self.default_genetic_map = None

def __repr__(self):
return (
"{{'name': {}, 'length': {}, "
"'mean_recombination_rate': {}, "
"'mean_mutation_rate': {}}}".format(
self.name, self.length, self.mean_recombination_rate,
self.mean_mutation_rate))
"'default_recombination_rate': {}, "
"'default_mutation_rate': {}}}".format(
self.name, self.length, self.default_recombination_rate,
self.default_mutation_rate))

def __str__(self):
return repr(self)
Expand Down
69 changes: 37 additions & 32 deletions stdpopsim/homo_sapiens.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,48 +40,53 @@ class HapmapII_GRCh37(genetic_maps.GeneticMap):
#
###########################################################

# List of chromosomes. Data for length information based on GRCh38,
# https://www.ncbi.nlm.nih.gov/grc/human/data
# List of chromosomes.

# FIXME: add mean mutation and recombination rate data to this table.
# FIXME: add mean mutation rate data to this table.
# Name Length mean_recombination_rate mean_mutation_rate

# length information can be found here
# <http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz>

# mean_recombination_rate was computed across all windows of the GRCh37 genetic map
# <ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots>
_chromosome_data = """\
chr1 248956422
chr2 242193529
chr3 198295559
chr4 190214555
chr5 181538259
chr6 170805979
chr7 159345973
chr8 145138636
chr9 138394717
chr10 133797422
chr11 135086622
chr12 133275309
chr13 114364328
chr14 107043718
chr15 101991189
chr16 90338345
chr17 83257441
chr18 80373285
chr19 58617616
chr20 64444167
chr21 46709983
chr22 50818468
chrX 156040895
chrY 57227415
chr1 249250621 1.1485597641285933e-08
chr2 243199373 1.1054289277533446e-08
chr3 198022430 1.1279585624662551e-08
chr4 191154276 1.1231162636001008e-08
chr5 180915260 1.1280936570022824e-08
chr6 171115067 1.1222852661225285e-08
chr7 159138663 1.1764614397655721e-08
chr8 146364022 1.1478465778920576e-08
chr9 141213431 1.1780701596308656e-08
chr10 135534747 1.3365134257075317e-08
chr11 135006516 1.1719334320833283e-08
chr12 133851895 1.305017186986983e-08
chr13 115169878 1.0914860554958317e-08
chr14 107349540 1.119730771394731e-08
chr15 102531392 1.3835785893339787e-08
chr16 90354753 1.4834607113882717e-08
chr17 81195210 1.582489036239487e-08
chr18 78077248 1.5075956950023575e-08
chr19 59128983 1.8220141872466202e-08
chr20 63025520 1.7178269031631664e-08
chr21 48129895 1.3045214034879191e-08
chr22 51304566 1.4445022767788226e-08
chrX 155270560 1.164662223273842e-08
chrY 59373566 0.0
"""

_chromosomes = []
for line in _chromosome_data.splitlines():
name, length = line.split()[:2]
name, length, mean_rr = line.split()[:3]
_chromosomes.append(genomes.Chromosome(
name=name, length=int(length),
mean_mutation_rate=1e-8, # WRONG!,
mean_recombination_rate=1e-8)) # WRONG!
default_mutation_rate=1e-8, # WRONG!,
default_recombination_rate=float(mean_rr)))


#: :class:`stdpopsim.Genome` definition for humans. Chromosome length data is
#: based on `GRCh38 <https://www.ncbi.nlm.nih.gov/grc/human/data>`_.
#: :class:`stdpopsim.Genome` definition for humans.
genome = genomes.Genome(
species="homo_sapiens",
chromosomes=_chromosomes,
Expand Down
97 changes: 69 additions & 28 deletions tests/test_homo_sapiens.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import io

import msprime
import numpy as np

from stdpopsim import homo_sapiens
from stdpopsim import genetic_maps
Expand All @@ -27,36 +28,76 @@ def test_str(self):

def test_chromosome_lengths(self):
genome = homo_sapiens.genome
# Numbers from https://www.ncbi.nlm.nih.gov/grc/human/data
# GRCh38.p12
self.assertEqual(genome.chromosomes["chr1"].length, 248956422)
self.assertEqual(genome.chromosomes["chr2"].length, 242193529)
self.assertEqual(genome.chromosomes["chr3"].length, 198295559)
self.assertEqual(genome.chromosomes["chr4"].length, 190214555)
self.assertEqual(genome.chromosomes["chr5"].length, 181538259)
self.assertEqual(genome.chromosomes["chr6"].length, 170805979)
self.assertEqual(genome.chromosomes["chr7"].length, 159345973)
self.assertEqual(genome.chromosomes["chr8"].length, 145138636)
self.assertEqual(genome.chromosomes["chr9"].length, 138394717)
self.assertEqual(genome.chromosomes["chr10"].length, 133797422)
self.assertEqual(genome.chromosomes["chr11"].length, 135086622)
self.assertEqual(genome.chromosomes["chr12"].length, 133275309)
self.assertEqual(genome.chromosomes["chr13"].length, 114364328)
self.assertEqual(genome.chromosomes["chr14"].length, 107043718)
self.assertEqual(genome.chromosomes["chr15"].length, 101991189)
self.assertEqual(genome.chromosomes["chr16"].length, 90338345)
self.assertEqual(genome.chromosomes["chr17"].length, 83257441)
self.assertEqual(genome.chromosomes["chr18"].length, 80373285)
self.assertEqual(genome.chromosomes["chr19"].length, 58617616)
self.assertEqual(genome.chromosomes["chr20"].length, 64444167)
self.assertEqual(genome.chromosomes["chr21"].length, 46709983)
self.assertEqual(genome.chromosomes["chr22"].length, 50818468)
self.assertEqual(genome.chromosomes["chrX"].length, 156040895)
self.assertEqual(genome.chromosomes["chrY"].length, 57227415)
self.assertEqual(genome.chromosomes["chr1"].length, 249250621)
self.assertEqual(genome.chromosomes["chr2"].length, 243199373)
self.assertEqual(genome.chromosomes["chr3"].length, 198022430)
self.assertEqual(genome.chromosomes["chr4"].length, 191154276)
self.assertEqual(genome.chromosomes["chr5"].length, 180915260)
self.assertEqual(genome.chromosomes["chr6"].length, 171115067)
self.assertEqual(genome.chromosomes["chr7"].length, 159138663)
self.assertEqual(genome.chromosomes["chr8"].length, 146364022)
self.assertEqual(genome.chromosomes["chr9"].length, 141213431)
self.assertEqual(genome.chromosomes["chr10"].length, 135534747)
self.assertEqual(genome.chromosomes["chr11"].length, 135006516)
self.assertEqual(genome.chromosomes["chr12"].length, 133851895)
self.assertEqual(genome.chromosomes["chr13"].length, 115169878)
self.assertEqual(genome.chromosomes["chr14"].length, 107349540)
self.assertEqual(genome.chromosomes["chr15"].length, 102531392)
self.assertEqual(genome.chromosomes["chr16"].length, 90354753)
self.assertEqual(genome.chromosomes["chr17"].length, 81195210)
self.assertEqual(genome.chromosomes["chr18"].length, 78077248)
self.assertEqual(genome.chromosomes["chr19"].length, 59128983)
self.assertEqual(genome.chromosomes["chr20"].length, 63025520)
self.assertEqual(genome.chromosomes["chr21"].length, 48129895)
self.assertEqual(genome.chromosomes["chr22"].length, 51304566)
self.assertEqual(genome.chromosomes["chrX"].length, 155270560)
self.assertEqual(genome.chromosomes["chrY"].length, 59373566)

def get_mean_rr_numpy(self, chromosome):
chrom_length = chromosome.length
recombination_map = chromosome.recombination_map()
positions = np.array(recombination_map.get_positions())
positions_diff = recombination_map.get_positions()[1:]
positions_diff.append(chrom_length)
positions_diff = np.array(positions_diff)
window_sizes = positions_diff - positions
weights = window_sizes / chrom_length
rates = recombination_map.get_rates()
return np.average(rates, weights=weights)

def test_default_recombination_rates(self):
# recompute default recombination rates from maps found at
# "http://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/"
# "20110106_recombination_hotspots/ then compare the results to
# the current default recombination rates for each chromosome"

# We catch the warning that will be thrown when we iterate over
# Y chromosome.
with self.assertWarns(Warning):
for chrom in homo_sapiens.genome.chromosomes.values():
default_rr = chrom.default_recombination_rate
numpy_rr = self.get_mean_rr_numpy(chrom)
# Assert that the difference in mean recombination rate
# is small when computed with numpy.
self.assertTrue(np.allclose(default_rr, numpy_rr))

def test_default_recombination_rate(self):
genome = homo_sapiens.genome
highest_rr = 0
lowest_rr = 10000
for chrom in genome.chromosomes.values():
rr = chrom.default_recombination_rate
lowest_rr = min(lowest_rr, rr)
highest_rr = max(highest_rr, rr)
mean_genome_rr = genome.mean_recombination_rate
self.assertGreater(mean_genome_rr, lowest_rr)
self.assertGreater(highest_rr, mean_genome_rr)

def test_warning_from_no_mapped_chromosome(self):
# Test that a known chromosome throws a warning
# if there is no recombination map associated
"""
Test that a known chromosome throws a warning
if there is no recombination map associated
"""
with self.assertWarns(Warning):
chrom = homo_sapiens.genome.chromosomes["chrY"]
cm = chrom.recombination_map()
Expand Down

0 comments on commit d01604a

Please sign in to comment.