In [1]:
import pyslim
import tskit
import msprime

# Generating realistic genetic data

## Overlaying neutral diversity

We continue from the previous example,
now adding additional, neutral, mutations:

In [2]:
ts = tskit.load("../generating_diversity/final.trees")
next_id = pyslim.next_slim_mutation_id(ts)
mut_model = msprime.SLiMMutationModel(type=1, next_id=next_id)
vts = msprime.sim_mutations( ts, rate=0.99 * 3e-8,
        model=mut_model, keep=True)

To write out a (legal) VCF we need to swap out the ancestral and derived states for nucleotides:

In [3]:
nts = pyslim.generate_nucleotides(vts)
nts = pyslim.convert_alleles(nts)
with open('nucs.vcf', 'w') as f:
    nts.write_vcf(f)

Let's see:

In [4]:
! head nucs.vcf | cut -f 1-15 

##fileformat=VCFv4.2
##source=tskit 0.6.2
##FILTER=<ID=PASS,Description="All filters passed">
##contig=<ID=1,length=90000000>
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	tsk_0	tsk_1	tsk_2	tsk_3	tsk_4	tsk_5
1	76	0	C	G	.	PASS	.	GT	0|0	0|0	0|0	0|0	1|0	0|0
1	1533	1	G	A	.	PASS	.	GT	0|0	0|0	0|0	0|0	0|0	0|0
1	2519	2	C	G	.	PASS	.	GT	0|0	0|1	0|0	0|0	0|0	0|1
1	4342	3	G	C	.	PASS	.	GT	0|0	0|0	0|0	0|0	0|0	0|0
