In [1]:
import tskit
import msprime
import stdpopsim

import time

print(f"tskit {tskit.__version__}")
print(f"msprime {msprime.__version__}")
print(f"stdpopsim {stdpopsim.__version__}")

tskit 0.4.1
msprime 1.1.1
stdpopsim 0.1.2


### Simulate ancestry and mutations along the ancestry using an Out-of-Africa demographic model.

In [2]:
species = stdpopsim.get_species("HomSap")
contig  = species.get_contig("chr20")
model  = species.get_demographic_model('OutOfAfricaArchaicAdmixture_5R19')

print(f"Number of (sampling and non-sampling) populations is {model.num_populations}.")
print(f"Number of sampling populations is {model.num_sampling_populations}.")
print(f"Populations in this model are: {[pop.id for pop in model.populations]}.")

Number of (sampling and non-sampling) populations is 5.
Number of sampling populations is 3.
Populations in this model are: ['YRI', 'CEU', 'CHB', 'Neanderthal', 'ArchaicAFR'].


In [13]:
num_samples_yri = 1_000 # Ne = 12,300
num_samples_ceu = 1_000 # Ne =  1,000
num_samples_chb = 1_000 # Ne =    510

samples = model.get_samples(2 * num_samples_yri,
                            2 * num_samples_ceu,
                            2 * num_samples_chb)

engine = stdpopsim.get_engine('msprime')

ts = engine.simulate(model,
                     contig,
                     samples,
                     discrete_genome = False)

In [14]:
# Check the tree sequences
print(f"Number of individuals of is {ts.num_individuals}.")
print(f"Number of sample nodes is {ts.num_samples}.")
print(f"Number of all nodes is {ts.num_nodes}.")
# ts.num_nodes - ts.num_samples equal the number of non-sample nodes
print(f"Number of trees in the tree sequence is {ts.num_trees}.")

print(f"Number of mutations is {ts.num_mutations}.")
print(f"Number of sites with genetic variation is {ts.num_sites}.")
print(f"Number of bytes to store the tree sequence is {ts.nbytes / 1e6}.")

Number of individuals of is 0.
Number of sample nodes is 6000.
Number of all nodes is 556652.
Number of trees in the tree sequence is 962356.
Number of mutations is 746811.
Number of sites with genetic variation is 746811.
Number of bytes to store the tree sequence is 214.119115.


In [8]:
# Store the simulated tree sequence
ts_file = "../data/simulated_tree_sequences/OutOfAfricaArchaicAdmixture_5R19.trees"
ts.dump(ts_file)