In [1]:
import msprime
import numpy as np
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from dismal.preprocess import s_matrix_from_dicts
from dismal.demography import DemographicModel
from dismal import generator_matrices
from dismal.likelihood import *
from numpy import linalg

In [2]:
def simulate_msprime(theta0, theta1, theta2, theta1_prime, theta2_prime, t1, v, m1_star, m2_star, m1_prime_star, m2_prime_star, Ne, block_len, num_replicates):

    # Time parameter conversions
    t0_coal_time = (t1+v)/theta1
    t0_gen_time = 2*Ne*t0_coal_time
    t1_coal_time = t1/theta1
    t1_gen_time = 2*Ne*t1_coal_time

    # Convert theta values from per block to per base
    ms_theta0, ms_theta1, ms_theta2, ms_theta1_prime, ms_theta2_prime = [theta/block_len for theta in [theta0, theta1, theta2, theta1_prime, theta2_prime]]

    # Mutation rate
    mu_per_bp = (ms_theta1/(4*Ne))

    # Convert thetas to Ne
    Ne_a, Ne_b, Ne_c1, Ne_c2 = [theta/(4*mu_per_bp) for theta in [ms_theta0, ms_theta2, ms_theta1_prime, ms_theta2_prime]]

    demography = msprime.Demography()
    demography.add_population(name="a", initial_size=Ne_a)
    demography.add_population(name="b1", initial_size=Ne)
    demography.add_population(name="b2", initial_size=Ne_b)
    demography.add_population(name="c1", initial_size=Ne_c1)
    demography.add_population(name="c2", initial_size=Ne_c2)
    demography.add_population_split(time=t0_gen_time, derived=["b1", "b2"], ancestral="a")
    demography.add_population_split(time=t1_gen_time, derived=["c1"], ancestral="b1")
    demography.add_population_split(time=t1_gen_time, derived=["c2"], ancestral="b2")
    demography.set_migration_rate("b2", "b1", m2_star/2) # NB: backwards in time in msprime
    demography.set_migration_rate("b1", "b2", m1_star/2)
    demography.set_migration_rate("c2", "c1", m2_prime_star/2)
    demography.set_migration_rate("c1", "c2", m1_prime_star/2)
    demography.sort_events()

    ts_state1 = msprime.sim_ancestry(samples={'c1':2, 'c2':0}, demography=demography, sequence_length=block_len, num_replicates=num_replicates, ploidy=2)
    ts_state2 = msprime.sim_ancestry(samples={'c1':0, 'c2':2}, demography=demography, sequence_length=block_len, num_replicates=num_replicates, ploidy=2)
    ts_state3 = msprime.sim_ancestry(samples={'c1':1, 'c2':1}, demography=demography, sequence_length=block_len, num_replicates=num_replicates, ploidy=2)

    s = []
    for ts in [ts_state1, ts_state2, ts_state3]:
        sim = np.zeros(2_000)
        for replicate_index, ts in enumerate(ts):
            ts_muts = msprime.sim_mutations(ts, rate=mu_per_bp, discrete_genome=False)
            sim[replicate_index] = ts_muts.divergence(sample_sets=[[0],[2]], span_normalise=False)

        s_counts_dict = Counter(sim)
        s.append(dict(sorted(s_counts_dict.items())))


    return s


### Isolation model, identical population sizes

In [3]:
sim_iso1 = simulate_msprime(theta0=2.4, theta1=2.4, theta2=2.4, theta1_prime=2.4, theta2_prime=2.4, t1=1, v=2, m1_star=0, m2_star=0, m1_prime_star=0, m2_prime_star=0, Ne=1e-6, block_len=200, num_replicates=2000)

In [4]:
sim_iso1

[{0.0: 601,
  1.0: 440,
  2.0: 268,
  3.0: 207,
  4.0: 147,
  5.0: 105,
  6.0: 53,
  7.0: 46,
  8.0: 38,
  9.0: 24,
  10.0: 22,
  11.0: 10,
  12.0: 16,
  13.0: 4,
  14.0: 2,
  15.0: 4,
  16.0: 4,
  17.0: 1,
  18.0: 4,
  19.0: 2,
  21.0: 1,
  26.0: 1},
 {0.0: 572,
  1.0: 432,
  2.0: 296,
  3.0: 195,
  4.0: 139,
  5.0: 104,
  6.0: 77,
  7.0: 54,
  8.0: 34,
  9.0: 20,
  10.0: 20,
  11.0: 19,
  12.0: 12,
  13.0: 8,
  14.0: 5,
  15.0: 6,
  16.0: 3,
  17.0: 2,
  18.0: 1,
  21.0: 1},
 {0.0: 22,
  1.0: 105,
  2.0: 224,
  3.0: 294,
  4.0: 289,
  5.0: 261,
  6.0: 193,
  7.0: 167,
  8.0: 120,
  9.0: 102,
  10.0: 65,
  11.0: 47,
  12.0: 37,
  13.0: 26,
  14.0: 15,
  15.0: 8,
  16.0: 7,
  17.0: 8,
  18.0: 3,
  19.0: 2,
  20.0: 1,
  21.0: 1,
  23.0: 1,
  24.0: 2}]

In [12]:
np.sum(np.array(list(sim_iso1[1].keys())) * np.array(list(sim_iso1[1].values())) / np.sum(np.array(list(sim_iso1[1].values()))))

2.4574999999999996

In [4]:
s_mat = s_matrix_from_dicts(sim_iso1)

In [8]:
DemographicModel(S=s_mat, model="gim").infer_parameters(verbose=False)

iv: [5, 5, 5, 5, 5, 5, 5, 0.3, 0.3, 0.3, 0.3], lb: [0.01, 0.01, 0.01, 0.01, 0.01, 0, 0, 0, 0, 0, 0], ub: [None, None, None, None, None, None, None, None, None, None, None]


  gammas.append((gamma/(gamma+rel_mu)) * ((rel_mu/(gamma+rel_mu))**s) * np.exp(gamma*t0) * poisson.cdf(s, (t0*(gamma+rel_mu))))
  gammas.append((gamma/(gamma+rel_mu)) * ((rel_mu/(gamma+rel_mu))**s) * np.exp(gamma*t0) * poisson.cdf(s, (t0*(gamma+rel_mu))))


{'theta0': 2.3663663947784817, 'theta1': 7.579156347830399, 'theta2': 9.622917713055912, 'theta1_prime': 17.610124972374027, 'theta2_prime': 17.72978387959142, 't1': 3.1605020262040555, 'v': 11.851214870728397, 'm1_star': 34.30703623341593, 'm2_star': 27.4761422396686, 'm1_prime_star': 0.06383717494791225, 'm2_prime_star': 0.027745338886504215, '-lnL': 13150.916582914175, 'aic': 26323.83316582835}

In [50]:
q1, q2, q3 = GeneratorMatrix.from_params([1,1,1,1,1,1,1,0,0,0,0])
np.exp(-np.array(likelihood_matrix(q1, q2, q3, 1, 1, s_mat)))[2][0]

0.06766764161830634

In [54]:
p_matrix()

array([[0.43233236],
       [0.43233236],
       [0.        ]])

In [57]:
def expect_state3(k, theta, tau):
    return 1/(theta+1) * poisson.pmf(k, 2*tau)

[expect_state3(k,1,1) for k in [i for i in range(0,10)]]

[0.06766764161830635,
 0.1353352832366127,
 0.1353352832366127,
 0.09022352215774178,
 0.04511176107887089,
 0.01804470443154836,
 0.0060149014771827825,
 0.0017185432791950808,
 0.0004296358197987701,
 9.547462662194911e-05]

In [58]:
import math

In [76]:
def expect_state3_cwh(k, theta, tau, a):
    return (math.exp(-theta*tau) * ((a*theta)**k) / ((1+a*theta)**(k+1))) * np.sum([(((1/a+theta)**l) * (tau**l))/math.factorial(l) for l in range(0,k+1)])


In [84]:
expect_state3_cwh(0, 1, 2, 1)

0.06766764161830635

In [87]:
k = 1
[i for i in range(0,k+1)]

[0, 1]