In [1]:
# statistics
import numpy as np
from scipy import stats

# population genetics
import tskit
import msprime

In [2]:
n = 5 # sample size
N = 100 # constant population size
ploidy = 2
rho = 1e-4 # recombination rate
mu = 2e-4 # mutation rate
L = 1_000 # sequence length
m = ploidy * n

In [3]:
# simulate the tree
trees = msprime.sim_ancestry(samples=n,
                             population_size=N,
                             ploidy=ploidy,
                             recombination_rate=rho,
                             sequence_length=L, 
                             )
trees = msprime.sim_mutations(trees, rate=mu)

# genotype matrix
genotype_matrix = trees.genotype_matrix()

In [27]:
K = 100

for k in range(K):

    # simulate the tree
    trees = msprime.sim_ancestry(samples=n,
                                 population_size=N,
                                 ploidy=ploidy,
                                 recombination_rate=rho,
                                 sequence_length=L, 
                                 )
    trees = msprime.sim_mutations(trees, rate=mu)

    # genotype matrix
    genotype_matrix = trees.genotype_matrix()

    # get the biallelic variant positions
    # and biallelic variants
    variant_positions = np.zeros(genotype_matrix.shape[0])
    biallelic = np.ones(genotype_matrix.shape[0],dtype=bool)
    itr = 0
    for variant in trees.variants():
        variant_positions[itr] = variant.position
        # check if biallelic
        if len(variant.alleles) > 2:
            biallelic[itr] = 0
        itr += 1

    # standardize the positions
    # and frequencies
    # to be between 0 and 1
    biallelic_matrix = genotype_matrix[biallelic]
    biallelic_positions = variant_positions[biallelic]
    biallelic_positions -= biallelic_positions.min()
    biallelic_positions /= biallelic_positions.max()

    # compute allele frequencies
    biallelic_frequencies = biallelic_matrix.mean(axis=1)

    # write numpy arrays to file
    np.savetxt("data/biallelic_positions_" + str(k) + ".csv", biallelic_positions, delimiter=",", fmt="%.8f")
    np.savetxt("data/biallelic_frequencies_" + str(k) + ".csv", biallelic_frequencies, delimiter=",",fmt="%.6f")
    np.savetxt("data/biallelic_matrix_" + str(k) + ".csv", biallelic_matrix, delimiter=",", fmt="%d")

In [24]:
biallelic_positions

array([0.        , 0.00202429, 0.00708502, 0.00809717, 0.01417004,
       0.01619433, 0.02327935, 0.02834008, 0.03238866, 0.05769231,
       0.06072874, 0.06174089, 0.06376518, 0.06578947, 0.06882591,
       0.08097166, 0.08704453, 0.09615385, 0.10222672, 0.11437247,
       0.11538462, 0.11842105, 0.12651822, 0.12854251, 0.1417004 ,
       0.1659919 , 0.17307692, 0.18117409, 0.18319838, 0.1902834 ,
       0.19129555, 0.20445344, 0.20850202, 0.21255061, 0.21356275,
       0.2145749 , 0.21659919, 0.22469636, 0.23380567, 0.23684211,
       0.24089069, 0.24595142, 0.24797571, 0.25      , 0.25101215,
       0.25202429, 0.25910931, 0.26518219, 0.26720648, 0.26821862,
       0.27024291, 0.27226721, 0.27834008, 0.28036437, 0.28137652,
       0.28340081, 0.28744939, 0.29251012, 0.29352227, 0.298583  ,
       0.30161943, 0.30566802, 0.30769231, 0.30870445, 0.31376518,
       0.31477733, 0.31578947, 0.31781377, 0.32894737, 0.33097166,
       0.33198381, 0.33603239, 0.34109312, 0.34210526, 0.34615

In [12]:
variant_positions

array([  2.,   3.,   4.,   5.,   6.,   9.,  15.,  22.,  23.,  25.,  30.,
        48.,  50.,  56.,  66.,  75.,  84.,  89.,  90., 113., 143., 146.,
       156., 164., 169., 177., 193., 196., 199., 204., 209., 210., 218.,
       220., 221., 224., 225., 232., 241., 243., 253., 265., 271., 274.,
       295., 299., 305., 307., 320., 322., 336., 337., 341., 344., 351.,
       364., 365., 367., 375., 380., 385., 391., 397., 411., 414., 421.,
       422., 463., 466., 472., 491., 509., 512., 513., 526., 532., 555.,
       557., 562., 563., 565., 567., 577., 580., 582., 584., 589., 605.,
       607., 609., 613., 621., 646., 648., 652., 654., 656., 658., 659.,
       660., 661., 662., 663., 667., 670., 671., 675., 676., 678., 681.,
       682., 683., 684., 685., 693., 697., 704., 710., 711., 712., 716.,
       718., 722., 738., 741., 749., 759., 771., 772., 776., 778., 786.,
       789., 793., 799., 808., 809., 812., 816., 820., 824., 825., 827.,
       834., 844., 845., 860., 862., 871., 872., 87

In [9]:
trees.num_variants

AttributeError: 'TreeSequence' object has no attribute 'num_variants'

In [8]:
genotype_matrix.mean(axis=1)

array([0.1, 0.5, 0.3, 0.1, 0.7, 0.1, 0.1, 0.2, 0.2, 0.2, 0.3, 0.3, 1.3,
       0.3, 0.3, 0.7, 0.4, 0.7, 0.1, 0.3, 0.2, 0.5, 0.7, 0.7, 0.3, 0.1,
       0.3, 0.3, 0.8, 0.7, 0.2, 0.3, 0.7, 0.5, 0.3, 0.1, 0.6, 0.6, 0.8,
       0.6, 0.3, 0.1, 0.4, 0.5, 0.4, 0.6, 0.4, 0.4, 0.1, 0.3, 0.6, 0.3,
       0.5, 0.6, 0.1, 0.1, 0.7, 0.2, 1.5, 0.7, 0. , 0.1, 0.4, 0.7, 0.1,
       0.9, 0.5, 0.1, 0.3, 0.6, 0.9, 0.1, 0.1, 0.5, 0.5, 0.1, 0.5, 0.2,
       0.4, 0.5, 0.5, 0.1, 0.2, 0.5, 0.5, 0.2, 0.5, 0.5, 0.5, 0.3, 0.3,
       0.3, 1. , 0.7, 1.7, 0.3, 0.7, 0.7, 0.3, 0.7, 0.3, 0.3, 0.2, 0.3,
       0.3, 0.7, 0.3, 0.3, 0.3, 0.6, 0.2, 0.2, 0.6, 0.7, 0.3, 0.3, 0.1,
       0.7, 0.6, 0.3, 0.1, 0.6, 0.1, 0.4, 0.2, 0.1, 0. , 0.7, 0.1, 1.4,
       0.7, 1.7, 0.3, 0.3, 0.3, 0.7, 0.7, 0.3, 0.1, 0.3, 0.8, 0.8, 0.6,
       0.4, 0.1, 0.6, 0.6, 0.1, 0.2, 0.5, 0.1, 0.5, 0.2, 0.5, 0.1, 0.1,
       0.3, 0.1, 0.1, 0.1, 0.6, 0.2, 0.1, 0.1, 0.2, 0.1, 0.3, 0.3, 0.1,
       0.2, 0.1, 0.4, 0.2, 0.1, 0.4, 0.1, 0.8, 0.8, 0.9, 0.7, 0.

In [10]:
# Initialize an empty list to store the variant positions
variant_positions = []

# Iterate over the variants
for variant in trees.variants():
    # Get the position of the variant
    position = variant.position
    
    # Append the position to the list
    variant_positions.append(position)

In [14]:
len(variant_positions)

202

In [13]:
variant_positions

[3.0,
 10.0,
 18.0,
 19.0,
 21.0,
 22.0,
 25.0,
 32.0,
 34.0,
 37.0,
 40.0,
 46.0,
 48.0,
 51.0,
 59.0,
 63.0,
 64.0,
 67.0,
 70.0,
 71.0,
 73.0,
 74.0,
 75.0,
 82.0,
 86.0,
 90.0,
 94.0,
 97.0,
 99.0,
 103.0,
 115.0,
 120.0,
 123.0,
 127.0,
 129.0,
 131.0,
 137.0,
 138.0,
 141.0,
 148.0,
 150.0,
 155.0,
 156.0,
 158.0,
 159.0,
 162.0,
 164.0,
 168.0,
 170.0,
 171.0,
 173.0,
 174.0,
 175.0,
 176.0,
 186.0,
 190.0,
 200.0,
 203.0,
 207.0,
 211.0,
 216.0,
 220.0,
 222.0,
 223.0,
 226.0,
 234.0,
 236.0,
 240.0,
 241.0,
 244.0,
 247.0,
 248.0,
 250.0,
 252.0,
 253.0,
 254.0,
 258.0,
 259.0,
 260.0,
 262.0,
 263.0,
 270.0,
 275.0,
 276.0,
 279.0,
 283.0,
 286.0,
 289.0,
 290.0,
 294.0,
 307.0,
 308.0,
 310.0,
 320.0,
 326.0,
 327.0,
 331.0,
 333.0,
 337.0,
 339.0,
 341.0,
 367.0,
 391.0,
 397.0,
 402.0,
 407.0,
 408.0,
 421.0,
 440.0,
 446.0,
 448.0,
 449.0,
 467.0,
 472.0,
 478.0,
 479.0,
 483.0,
 491.0,
 496.0,
 498.0,
 520.0,
 531.0,
 544.0,
 546.0,
 551.0,
 559.0,
 588.0,
 593.0,
 596.0