In [47]:
import msprime
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import tskit

In [35]:
r_chrom = 1e-8 #Recombination rate
r_break = math.log(2) #Recombination rate needed to satisfy probability 2^-t inheritance of two chromsomes
chrom_positions = [0, 1e6, 2e6, 3e6] #1Mb chromosome sizes
map_positions = [
    chrom_positions[0],
    chrom_positions[1],
    chrom_positions[1] + 1,
    chrom_positions[2],
    chrom_positions[2] + 1,
    chrom_positions[3]
]
rates = [r_chrom, r_break, r_chrom, r_break, r_chrom] 
rate_map = msprime.RateMap(position=map_positions, rate=rates) #Rate map for separate chromosomes

In [40]:
alpha = np.random.uniform(low=1.05, high=2) #Draw alpha parameter from uniform distribution
Ne = np.random.uniform(low=1000, high=1000000)
ts = msprime.sim_ancestry(
    samples=38,
    population_size=10000,
    recombination_rate=rate_map,
    model=msprime.BetaCoalescent(alpha=alpha),
    random_seed=1234,
)
ts

Tree Sequence,Unnamed: 1
Trees,223
Sequence Length,3000000.0
Time Units,generations
Sample Nodes,76
Total Size,52.7 KiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,1064,33.3 KiB,
Individuals,38,1.1 KiB,
Migrations,0,8 Bytes,
Mutations,0,16 Bytes,
Nodes,307,8.4 KiB,
Populations,1,224 Bytes,✅
Provenances,1,1.4 KiB,
Sites,0,16 Bytes,


In [52]:
mts = msprime.sim_mutations(ts, rate=1e-8, random_seed=5678)


np.float64(1.4693918128654995e-05)

In [57]:
np.set_printoptions(legacy="1.25")
summary_statistics = [] #Initialize list of summary statistics
summary_statistics.append(1) #First column corresponds to model index
summary_statistics.append(10000) #Second column is Ne
summary_statistics.append(alpha) #Third column is alpha parameter
summary_statistics.append(1) #Fourth column is rho/theta
S = mts.get_num_mutations()
summary_statistics.append(S) #Fifth column is number of segregating sites
normalized_S = mts.segregating_sites(span_normalise=True)
summary_statistics.append(normalized_S) #Sixth column is span normalized S
pi = mts.diversity()
summary_statistics.append(pi) #Seventh column is nucleotide diversity
summary_statistics

[1,
 10000,
 1.3977804705446066,
 1,
 284,
 9.466666666666667e-05,
 1.4693918128654995e-05]

In [102]:
afs = mts.allele_frequency_spectrum(span_normalise=False, polarised=False)

afs_entries = []

for x in range(1, 40):
   num_mutations = afs[x]
   l = [x/76] * int(num_mutations)
   afs_entries.extend(l)
afs_entries = np.array(afs_entries)

array([0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315

In [106]:
afs_quant = np.quantile(afs_entries, [0.1, 0.3, 0.5, 0.7, 0.9])
summary_statistics.append(afs_quant[0]) #8th column is AFS quantile 0.1
summary_statistics.append(afs_quant[1]) #9th column 0.3
summary_statistics.append(afs_quant[2]) #10th column 0.5
summary_statistics.append(afs_quant[3]) #11th column 0.7
summary_statistics.append(afs_quant[4]) #12th column 0.9
summary_statistics

[1,
 10000,
 1.3977804705446066,
 1,
 284,
 9.466666666666667e-05,
 1.4693918128654995e-05,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023]

In [131]:
num_windows = 30
D_array = mts.Tajimas_D(windows=np.linspace(0, ts.sequence_length, num_windows + 1))
summary_statistics.append(np.mean(D_array))
summary_statistics.append(np.var(D_array))
summary_statistics

[1,
 10000,
 1.3977804705446066,
 1,
 284,
 9.466666666666667e-05,
 1.4693918128654995e-05,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.05417167691064831,
 0.027642286210542138,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.05417167691064831,
 0.027642286210542138,
 -0.6709081352399637,
 0.6869938335316393,
 -0.6709081352399637,
 0.6869938335316393]

In [109]:
ts_chroms = []
for j in range(len(chrom_positions) - 1):
    start, end = chrom_positions[j: j + 2]
    chrom_ts = mts.keep_intervals([[start, end]], simplify=False).trim()
    ts_chroms.append(chrom_ts)
    print(chrom_ts.sequence_length)

1000000.0
1000000.0
1000000.0


Tree Sequence,Unnamed: 1
Trees,73
Sequence Length,1000000.0
Time Units,generations
Sample Nodes,76
Total Size,31.5 KiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,344,10.8 KiB,
Individuals,38,1.1 KiB,
Migrations,0,8 Bytes,
Mutations,85,3.1 KiB,
Nodes,307,8.4 KiB,
Populations,1,224 Bytes,✅
Provenances,4,3.1 KiB,
Sites,85,2.1 KiB,


In [127]:

ld_calc = tskit.LdCalculator(ts_chroms[0])
r2_chrom1 = ld_calc.r2_matrix()
r2_chrom1 = np.matrix.flatten(r2_chrom1)
ld_calc = tskit.LdCalculator(ts_chroms[1])
r2_chrom2 = ld_calc.r2_matrix()
r2_chrom2 = np.matrix.flatten(r2_chrom2)
ld_calc = tskit.LdCalculator(ts_chroms[2])
r2_chrom3 = ld_calc.r2_matrix()
r2_chrom3 = np.matrix.flatten(r2_chrom3)
r2 = np.concatenate((r2_chrom1,r2_chrom2,r2_chrom3))
r2_quant = np.quantile(r2, [0.1,0.3,0.5,0.7,0.9])
r2_quant

0.05417167691064831

In [129]:
summary_statistics.append(r2_quant[0])
summary_statistics.append(r2_quant[1])
summary_statistics.append(r2_quant[2])
summary_statistics.append(r2_quant[3])
summary_statistics.append(r2_quant[4])
summary_statistics.append(np.mean(r2))
summary_statistics.append(np.var(r2))
summary_statistics

[1,
 10000,
 1.3977804705446066,
 1,
 284,
 9.466666666666667e-05,
 1.4693918128654995e-05,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.10657894736842097,
 0.32500000000000023,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.05417167691064831,
 0.027642286210542138,
 0.00017777777777777776,
 0.0007407407407407406,
 0.0031796502384737672,
 0.013310297664039157,
 0.11043354549177514,
 0.05417167691064831,
 0.027642286210542138]

In [132]:
for x in range(3):
    with open("output"+str(x+1)+".vcf", "w") as vcf_file:
        ts_chroms[x].write_vcf(vcf_file, contig_id=str(x+1))

In [None]:
from subprocess import call

