In [1061]:
import msprime
import numpy as np
import matplotlib.pyplot as plt
import math
import tskit
import scipy
import allel

In [1062]:
r_chrom = 1e-8 #Recombination rate
r_break = math.log(2) #Recombination rate needed to satisfy probability 2^-t inheritance of two chromsomes
chrom_positions = [0, 1e6, 2e6, 3e6] #1Mb chromosome sizes
map_positions = [
    chrom_positions[0],
    chrom_positions[1],
    chrom_positions[1] + 1,
    chrom_positions[2],
    chrom_positions[2] + 1,
    chrom_positions[3]
]
rates = [r_chrom, r_break, r_chrom, r_break, r_chrom] 
rate_map = msprime.RateMap(position=map_positions, rate=rates) #Rate map for separate chromosomes

In [1139]:
alpha = 1.1
Ne = 1e5
ts = msprime.sim_ancestry(
    samples = 38,
    population_size = Ne,
    recombination_rate = rate_map,
    model=msprime.BetaCoalescent(alpha = alpha),
    #random_seed=1234,
)
ts

Tree Sequence,Unnamed: 1
Trees,98
Sequence Length,3000000.0
Time Units,generations
Sample Nodes,76
Total Size,31.8 KiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,601,18.8 KiB,
Individuals,38,1.1 KiB,
Migrations,0,8 Bytes,
Mutations,0,16 Bytes,
Nodes,206,5.6 KiB,
Populations,1,224 Bytes,✅
Provenances,1,1.3 KiB,
Sites,0,16 Bytes,


In [1140]:
mts = msprime.sim_mutations(ts, rate=1e-8, random_seed=5678)


In [1141]:
mts

Tree Sequence,Unnamed: 1
Trees,98
Sequence Length,3000000.0
Time Units,generations
Sample Nodes,76
Total Size,42.1 KiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,601,18.8 KiB,
Individuals,38,1.1 KiB,
Migrations,0,8 Bytes,
Mutations,158,5.7 KiB,
Nodes,206,5.6 KiB,
Populations,1,224 Bytes,✅
Provenances,2,2.1 KiB,
Sites,158,3.9 KiB,


In [1180]:
sample_size = 38
1/(sample_size*2)

0.013157894736842105

In [1142]:
np.set_printoptions(legacy="1.21")
summary_statistics = [] #Initialize list of summary statistics
summary_statistics.append(1) #First column corresponds to model index
summary_statistics.append(Ne) #Second column is Ne
summary_statistics.append(alpha) #Third column is alpha parameter
summary_statistics.append(1) #Fourth column is rho/theta
S = mts.get_num_mutations()
summary_statistics.append(S) #Fifth column is number of segregating sites
normalized_S = mts.segregating_sites(span_normalise=True)
summary_statistics.append(normalized_S) #Sixth column is span normalized S
pi = mts.diversity()
summary_statistics.append(pi) #Seventh column is nucleotide diversity
summary_statistics

[1, 100000.0, 1.1, 1, 158, 5.266666666666667e-05, 6.0176608187134685e-06]

In [1166]:
afs = mts.allele_frequency_spectrum(span_normalise=False, polarised=False)

afs_entries = []

for x in range(1, 40):
   num_mutations = afs[x]
   l = [x/76] * int(num_mutations)
   afs_entries.extend(l)
afs_entries = np.array(afs_entries)
len(afs_entries)

158

In [1178]:
afs_entries

array([0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315789,
       0.01315789, 0.01315789, 0.01315789, 0.01315789, 0.01315

In [1144]:
afs_quant = np.quantile(afs_entries, [0.1, 0.3, 0.5, 0.7, 0.9])
summary_statistics.append(afs_quant[0]) #8th column is AFS quantile 0.1
summary_statistics.append(afs_quant[1]) #9th column 0.3
summary_statistics.append(afs_quant[2]) #10th column 0.5
summary_statistics.append(afs_quant[3]) #11th column 0.7
summary_statistics.append(afs_quant[4]) #12th column 0.9
summary_statistics

[1,
 100000.0,
 1.1,
 1,
 158,
 5.266666666666667e-05,
 6.0176608187134685e-06,
 0.013157894736842105,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.3171052631578958]

In [1145]:
num_windows = 30
D_array = mts.Tajimas_D(windows=np.linspace(0, ts.sequence_length, num_windows + 1))
summary_statistics.append(np.nanmean(D_array))
summary_statistics.append(np.nanvar(D_array))
summary_statistics

[1,
 100000.0,
 1.1,
 1,
 158,
 5.266666666666667e-05,
 6.0176608187134685e-06,
 0.013157894736842105,
 0.013157894736842105,
 0.013157894736842105,
 0.039473684210526314,
 0.3171052631578958,
 -1.0296638690416244,
 0.5515323673370985]

In [1146]:
ts_chroms = []
for j in range(len(chrom_positions) - 1):
    start, end = chrom_positions[j: j + 2]
    chrom_ts = mts.keep_intervals([[start, end]], simplify=False).trim()
    ts_chroms.append(chrom_ts)
    print(chrom_ts.sequence_length)

1000000.0
1000000.0
1000000.0


In [1151]:
gn = mts.genotype_matrix()
print("Converted to genotype matrix...")
r = allel.rogers_huff_r(gn)
print("Calculated r...")
s = scipy.spatial.distance.squareform(r ** 2)
arr = mts.sites_position
result = abs(arr[:, None] - arr)


Converted to genotype matrix...
Calculated r...


In [1152]:
scaled_ld = result * s

chrom1_mut_num = ts_chroms[0].get_num_mutations()
chrom1_scaled_ld = scaled_ld[:chrom1_mut_num,:chrom1_mut_num]

chrom2_mut_num = ts_chroms[1].get_num_mutations()
chrom1and2_mut_num = chrom1_mut_num + chrom2_mut_num
chrom2_scaled_ld = scaled_ld[chrom1_mut_num:chrom1and2_mut_num, chrom1_mut_num:chrom1and2_mut_num]

chrom3_mut_num = ts_chroms[2].get_num_mutations()
total_mut_num = chrom1and2_mut_num + chrom3_mut_num
chrom3_scaled_ld = scaled_ld[chrom1and2_mut_num:total_mut_num,chrom1and2_mut_num:total_mut_num]



chrom1_scaled_ld = chrom1_scaled_ld[np.triu_indices_from(chrom1_scaled_ld)]
chrom2_scaled_ld = chrom2_scaled_ld[np.triu_indices_from(chrom2_scaled_ld)]
chrom3_scaled_ld = chrom3_scaled_ld[np.triu_indices_from(chrom3_scaled_ld)]



scaled_r2 = np.concatenate((chrom1_scaled_ld, chrom2_scaled_ld, chrom3_scaled_ld))
scaled_r2_quant = np.nanquantile(scaled_r2, [0.1,0.3,0.5,0.7,0.9])
scaled_r2_quant


summary_statistics.append(scaled_r2_quant[0])
summary_statistics.append(scaled_r2_quant[1])
summary_statistics.append(scaled_r2_quant[2])
summary_statistics.append(scaled_r2_quant[3])
summary_statistics.append(scaled_r2_quant[4])
summary_statistics.append(np.nanmean(scaled_r2))
summary_statistics.append(np.nanvar(scaled_r2))

In [1164]:

scaled_r2[::-1].sort()
np.std(scaled_r2)

66389.7463969021

In [1072]:
chrom1_mut_num = ts_chroms[0].get_num_mutations()
chrom1_ld = s[:chrom1_mut_num,:chrom1_mut_num]

chrom2_mut_num = ts_chroms[1].get_num_mutations()
chrom1and2_mut_num = chrom1_mut_num + chrom2_mut_num
chrom2_ld = s[chrom1_mut_num:chrom1and2_mut_num, chrom1_mut_num:chrom1and2_mut_num]

chrom3_mut_num = ts_chroms[2].get_num_mutations()
total_mut_num = chrom1and2_mut_num + chrom3_mut_num
chrom3_ld = s[chrom1and2_mut_num:total_mut_num,chrom1and2_mut_num:total_mut_num]

print(chrom3_ld.shape[0], chrom3_ld.shape[0])


85 85


In [1073]:
chrom1_ld = chrom1_ld[np.triu_indices_from(chrom1_ld)]
chrom2_ld = chrom2_ld[np.triu_indices_from(chrom2_ld)]
chrom3_ld = chrom3_ld[np.triu_indices_from(chrom3_ld)]

In [1074]:
np.nanmean(chrom3_ld)

0.066769905

In [1075]:

r2 = np.concatenate((chrom1_ld, chrom2_ld, chrom3_ld))
r2_quant = np.nanquantile(r2, [0.1,0.3,0.5,0.7,0.9])
r2_quant


array([0.00036036, 0.00228311, 0.00777778, 0.03166023, 0.18933332])

In [1076]:
summary_statistics.append(r2_quant[0])
summary_statistics.append(r2_quant[1])
summary_statistics.append(r2_quant[2])
summary_statistics.append(r2_quant[3])
summary_statistics.append(r2_quant[4])
summary_statistics.append(np.nanmean(r2))
summary_statistics.append(np.nanvar(r2))
summary_statistics

[1,
 10000.0,
 1.99,
 1,
 297,
 9.9e-05,
 1.8691578947368464e-05,
 0.013157894736842105,
 0.02631578947368421,
 0.06578947368421052,
 0.17105263157894737,
 0.37368421052631623,
 -0.19293021355510756,
 0.8171358392065822,
 0.0003603606310207397,
 0.002283105393871665,
 0.0077777765691280365,
 0.031660228967666626,
 0.18933331966400146,
 0.07040439,
 0.028821439]

In [1077]:
chrom1_ild = s[:chrom1_mut_num,chrom1_mut_num:]
chrom1_ild = np.matrix.flatten(chrom1_ild)
chrom2_ild_a = s[chrom1_mut_num:chrom1and2_mut_num,chrom1and2_mut_num:]
chrom2_ild_b = s[chrom1_mut_num:chrom1and2_mut_num,chrom1_mut_num:chrom1and2_mut_num]
chrom2_ild_a = np.matrix.flatten(chrom2_ild_a)
chrom2_ild_b = np.matrix.flatten(chrom2_ild_b)
chrom2_ild = np.concatenate((chrom2_ild_a, chrom2_ild_b))
chrom3_ild = s[chrom1and2_mut_num:total_mut_num,chrom1and2_mut_num:chrom3_mut_num]
chrom3_ild = np.matrix.flatten(chrom3_ild)
np.nanmean(chrom2_ild)

0.038438816

In [1078]:
ild_all = np.concatenate((chrom1_ild, chrom2_ild, chrom3_ild))
ild_quant = np.nanquantile(ild_all, [0.1,0.3,0.5,0.7,0.9])

In [None]:
summary_statistics.append(ild_quant[0])
summary_statistics.append(ild_quant[1])
summary_statistics.append(ild_quant[2])
summary_statistics.append(ild_quant[3])
summary_statistics.append(ild_quant[4])
summary_statistics.append(np.nanmean(ild_all))
summary_statistics.append(np.nanvar(ild_all))
summary_statistics

In [1085]:
import pandas as pd

In [1084]:
data = []


[[1,
  10000.0,
  1.99,
  1,
  297,
  9.9e-05,
  1.8691578947368464e-05,
  0.013157894736842105,
  0.02631578947368421,
  0.06578947368421052,
  0.17105263157894737,
  0.37368421052631623,
  -0.19293021355510756,
  0.8171358392065822,
  0.0003603606310207397,
  0.002283105393871665,
  0.0077777765691280365,
  0.031660228967666626,
  0.18933331966400146,
  0.07040439,
  0.028821439,
  0.000360360398190096,
  0.0013526568654924631,
  0.0036304970271885395,
  0.010372655186802136,
  0.0447138287127018,
  0.02452883,
  0.0074033057,
  0.000360360398190096,
  0.0013526568654924631,
  0.0036304970271885395,
  0.010372655186802136,
  0.0447138287127018,
  0.02452883,
  0.0074033057]]

In [1087]:
data.append((summary_statistics))
data

[[1,
  10000.0,
  1.99,
  1,
  297,
  9.9e-05,
  1.8691578947368464e-05,
  0.013157894736842105,
  0.02631578947368421,
  0.06578947368421052,
  0.17105263157894737,
  0.37368421052631623,
  -0.19293021355510756,
  0.8171358392065822,
  0.0003603606310207397,
  0.002283105393871665,
  0.0077777765691280365,
  0.031660228967666626,
  0.18933331966400146,
  0.07040439,
  0.028821439,
  0.000360360398190096,
  0.0013526568654924631,
  0.0036304970271885395,
  0.010372655186802136,
  0.0447138287127018,
  0.02452883,
  0.0074033057,
  0.000360360398190096,
  0.0013526568654924631,
  0.0036304970271885395,
  0.010372655186802136,
  0.0447138287127018,
  0.02452883,
  0.0074033057],
 [1,
  10000.0,
  1.99,
  1,
  297,
  9.9e-05,
  1.8691578947368464e-05,
  0.013157894736842105,
  0.02631578947368421,
  0.06578947368421052,
  0.17105263157894737,
  0.37368421052631623,
  -0.19293021355510756,
  0.8171358392065822,
  0.0003603606310207397,
  0.002283105393871665,
  0.0077777765691280365,
  0.0

In [1088]:
x = pd.DataFrame(data)
x

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,34
0,1,10000.0,1.99,1,297,9.9e-05,1.9e-05,0.013158,0.026316,0.065789,...,0.044714,0.024529,0.007403,0.00036,0.001353,0.00363,0.010373,0.044714,0.024529,0.007403
1,1,10000.0,1.99,1,297,9.9e-05,1.9e-05,0.013158,0.026316,0.065789,...,0.044714,0.024529,0.007403,0.00036,0.001353,0.00363,0.010373,0.044714,0.024529,0.007403


In [1100]:
alphas_list = [1.99, 1.9, 1.7, 1.5, 1.3, 1.1]
var = 0
for i in range(1500):
    if (i+1) % 250 == 0:
        var += 1
    alpha = alphas_list[var]
    print(alpha)

1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99
1.99


IndexError: list index out of range

In [1101]:
1%250

1