## Compare simulated individuals between our model and the Browning model

In [1]:
import stdpopsim
import json
species = stdpopsim.get_species("HomSap")
contig = species.get_contig("chr22") # default is a flat genetic map

In [2]:
model = species.get_demographic_model('AmericanAdmixture_4B11')
print(model.num_populations)
# 3
print(model.num_sampling_populations)
# 3
print([pop.id for pop in model.populations])
# ['YRI', 'CEU', 'CHB']

4
4
['AFR', 'EUR', 'ASIA', 'ADMIX']


In [3]:
N_AFR = 108
N_EUR = 107
N_ASIA = 103
N_ADMIX = 64
samples = model.get_samples(N_AFR, N_EUR, N_ASIA, N_ADMIX)
engine = stdpopsim.get_engine('msprime')
ts = engine.simulate(model, contig, samples)
print(ts.num_sites)

247360


In [4]:
ts.num_samples
# 20
for k, pop in enumerate(ts.populations()):
   popdata = json.loads(pop.metadata)
   print(f"The tree sequence has {len(ts.samples(k))} samples from "
         f"population {k}, which is {popdata['id']}.")

The tree sequence has 108 samples from population 0, which is AFR.
The tree sequence has 107 samples from population 1, which is EUR.
The tree sequence has 103 samples from population 2, which is ASIA.
The tree sequence has 64 samples from population 3, which is ADMIX.


In [5]:
pops = ['YRI']*N_AFR + ['IBS']*N_EUR + ['CHB']*N_ASIA + ['MXL']*N_ADMIX
n_dip_indv = int(ts.num_samples / 2)

In [6]:
n_dip_indv = int(ts.num_samples)
indv_names = [f"{pops[i]}_{str(i)}indv" for i in range(n_dip_indv)]

In [7]:
with open("data/BrowningEtAl2011.vcf", "w") as vcf_file:
   ts.write_vcf(vcf_file, contig_id='22', individual_names=indv_names)