In [1]:
import sys 
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path+"/python" not in sys.path:
    sys.path.append(module_path+"/python")

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import observation as obs
import simulation as sim
import intervals as ci
from datetime import datetime

In [2]:
def within(interval, pop_val):
    return 1 if interval[0] <= pop_val <= interval[1] else 0

In [7]:
pop_ts = sim.sim_one_population(
            diploid_size=200,
            seq_len=1e9,
            rec_rate=1e-8,
            mut_rate=1e-8
        )
pop_ts_diversity = pop_ts.diversity(span_normalise=False, windows='sites').mean()
print('Population site diversity:', pop_ts_diversity)

Population site diversity: 0.14947346149597798


In [8]:
obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=1000)
obs_ts_diversity = np.mean(obs_ts.site_diversity)
obs_ts_diversity

0.1453767676767677

## 50K sites, 99%ci 

In [11]:
%%time
nrep = 10
result = {'pop_ts_diversity': [pop_ts_diversity] * nrep,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}

for i in range(nrep):
    obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=50000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.99, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.99, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.99, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

df = pd.DataFrame(result)
df

CPU times: user 47.8 s, sys: 12 ms, total: 47.8 s
Wall time: 47.8 s


Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149473,0.149881,"(0.14869518403816404, 0.1510672563658764)",1,"(0.14895732599272002, 0.1508051144113204)",1,"(0.14899579711714836, 0.15076664328689207)",1
1,0.149473,0.149368,"(0.14824137289009062, 0.15049553216041442)",1,"(0.148569916223736, 0.15016698882676904)",1,"(0.14843104221338388, 0.15030586283712116)",1
2,0.149473,0.149514,"(0.1483578957513038, 0.15067076687495887)",1,"(0.1487029039079165, 0.15032575871834616)",1,"(0.14885543172351273, 0.15017323090274992)",1
3,0.149473,0.149696,"(0.14845904779888996, 0.15093235422131202)",1,"(0.14872143808422864, 0.15066996393597334)",1,"(0.14878856163567242, 0.15060284038452956)",1
4,0.149473,0.14988,"(0.1487945879952486, 0.15096531503505436)",1,"(0.1491377241131388, 0.15062217891716417)",1,"(0.14914514623035585, 0.15061475679994712)",1
5,0.149473,0.149609,"(0.1484617627141005, 0.1507572191040813)",1,"(0.14870162305814782, 0.15051735876003397)",1,"(0.14860110686839706, 0.15061787494978474)",1
6,0.149473,0.149104,"(0.1478456040831817, 0.15036289692691934)",1,"(0.14809208236892643, 0.15011641864117461)",1,"(0.14825886400449886, 0.14994963700560218)",1
7,0.149473,0.149228,"(0.14814257099941835, 0.150313065364218)",1,"(0.148534204058922, 0.14992143230471436)",1,"(0.14818803164634417, 0.1502676047172922)",1
8,0.149473,0.150484,"(0.14937979186342187, 0.15158878995476)",1,"(0.1497347560464035, 0.15123382577177838)",0,"(0.14983925912013868, 0.1511293226980432)",0
9,0.149473,0.149336,"(0.14827926358966093, 0.15039330206690477)",1,"(0.14860231206239113, 0.15007025359417456)",1,"(0.1482271129862259, 0.1504454526703398)",1


## 50K sites, 95%ci 

In [13]:
%%time
nrep = 10
result = {'pop_ts_diversity': [pop_ts_diversity] * nrep,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}

for i in range(nrep):
    obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=50000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.95, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.95, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.95, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

df = pd.DataFrame(result)
df

CPU times: user 54.3 s, sys: 47.9 ms, total: 54.3 s
Wall time: 54.5 s


Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149473,0.149091,"(0.14823027880712275, 0.1499514262433823)",1,"(0.14840475640649997, 0.1497769486440051)",1,"(0.14840234513215536, 0.1497793599183497)",1
1,0.149473,0.149137,"(0.14826459262021696, 0.15001033263230834)",1,"(0.14847945185431635, 0.14979547339820895)",1,"(0.14856208935975665, 0.14971283589276865)",1
2,0.149473,0.149006,"(0.14812981043005555, 0.14988155926691418)",1,"(0.14844464939185392, 0.1495667203051158)",1,"(0.148580131915061, 0.14943123778190873)",0
3,0.149473,0.149453,"(0.14859995646869142, 0.1503061607030258)",1,"(0.14879660496630864, 0.15010951220540858)",1,"(0.1490648270407803, 0.14984129013093692)",1
4,0.149473,0.148706,"(0.14773092835491303, 0.14968119285720816)",1,"(0.14793250077516576, 0.14947962043695542)",1,"(0.14800409609514154, 0.14940802511697965)",0
5,0.149473,0.149452,"(0.14844423850195876, 0.15045887260915236)",1,"(0.14870016029586122, 0.1502029508152499)",1,"(0.14859424517823802, 0.1503088659328731)",1
6,0.149473,0.148712,"(0.14770558888115878, 0.1497185565733867)",1,"(0.14794596186156736, 0.14947818359297813)",1,"(0.14801358200880255, 0.14941056344574294)",0
7,0.149473,0.149731,"(0.14883049218896638, 0.1506316815484074)",1,"(0.1490182263930639, 0.15044394734430988)",1,"(0.1486729802488905, 0.15078919348848327)",1
8,0.149473,0.149537,"(0.14853496600465343, 0.15053897338928596)",1,"(0.14877582089072083, 0.15029811850321856)",1,"(0.14831729419243483, 0.15075664520150456)",1
9,0.149473,0.149901,"(0.14900998163084822, 0.1507927173590507)",1,"(0.1491650817968927, 0.15063761719300622)",1,"(0.14935012475741552, 0.1504525742324834)",1


## 20K sites, 99%ci 

In [12]:
%%time
nrep = 10
result = {'pop_ts_diversity': [pop_ts_diversity] * nrep,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}

for i in range(nrep):
    obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=20000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.99, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.99, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.99, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

df = pd.DataFrame(result)
df

CPU times: user 18.9 s, sys: 0 ns, total: 18.9 s
Wall time: 18.9 s


Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149473,0.149669,"(0.14832648072291438, 0.15101093341849975)",1,"(0.14873168417162774, 0.15060572996978638)",1,"(0.14893351455554438, 0.15040389958586975)",1
1,0.149473,0.149238,"(0.1479234322084867, 0.1505534970844426)",1,"(0.14814963471202916, 0.15032729458090013)",1,"(0.14806023880718783, 0.15041669048574147)",1
2,0.149473,0.150945,"(0.14976250291378432, 0.15212786072257936)",0,"(0.15011641617693156, 0.15177394745943212)",0,"(0.15052752448426196, 0.15136283915210172)",0
3,0.149473,0.150119,"(0.14891264680308047, 0.1513243834999498)",1,"(0.14923246687010983, 0.15100456343292046)",1,"(0.14934168131615083, 0.15089534898687945)",1
4,0.149473,0.147847,"(0.14684598834003323, 0.1488471227710779)",0,"(0.14715440985925232, 0.1485387012518588)",0,"(0.1470159571882164, 0.14867715392289474)",0
5,0.149473,0.150183,"(0.14878334474646254, 0.15158318050606273)",1,"(0.1491572507504887, 0.15120927450203658)",1,"(0.14923011708243605, 0.15113640817008922)",1
6,0.149473,0.14948,"(0.1482444680768112, 0.15071530970096664)",1,"(0.1484697255766371, 0.15049005220114073)",1,"(0.14838249156502095, 0.15057728621275687)",1
7,0.149473,0.148009,"(0.14690953732720224, 0.14910789701623212)",0,"(0.14718441830805012, 0.14883301603538424)",0,"(0.14747306841617125, 0.1485443659272631)",0
8,0.149473,0.149563,"(0.1483486117259819, 0.15077732766795748)",1,"(0.1486391860981937, 0.15048675329574568)",1,"(0.14882352053705253, 0.15030241885688686)",1
9,0.149473,0.150767,"(0.1495912850262056, 0.1519423109333904)",0,"(0.14985799590919827, 0.15167560005039774)",0,"(0.149468162734563, 0.152065433225033)",1


## 20K sites, 95%ci 

In [15]:
%%time
nrep = 10
result = {'pop_ts_diversity': [pop_ts_diversity] * nrep,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}

for i in range(nrep):
    obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=20000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.95, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.95, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.95, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

df = pd.DataFrame(result)
df

CPU times: user 19.4 s, sys: 0 ns, total: 19.4 s
Wall time: 19.4 s


Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149473,0.150947,"(0.1500186872703281, 0.1518745652549244)",0,"(0.15025191283253136, 0.15164133969272114)",0,"(0.15036656870458198, 0.15152668382067053)",0
1,0.149473,0.150482,"(0.14961362697385822, 0.15134994878371755)",0,"(0.1498560082752007, 0.15110756748237505)",0,"(0.14986659084039541, 0.15109698491718035)",0
2,0.149473,0.148815,"(0.14790220796830952, 0.14972835768825618)",1,"(0.14808029930902158, 0.14955026634754412)",1,"(0.14789180553004008, 0.14973876012652562)",1
3,0.149473,0.149998,"(0.14896669102595847, 0.1510285210952537)",1,"(0.14913334744525047, 0.1508618646759617)",1,"(0.14884898257546797, 0.1511462295457442)",1
4,0.149473,0.151305,"(0.15031084720971247, 0.15229953662867138)",0,"(0.150515328251053, 0.15209505558733086)",0,"(0.15048771827264057, 0.15212266556574328)",0
5,0.149473,0.149218,"(0.14836726318226476, 0.15006932267632114)",1,"(0.1485377915876921, 0.1498987942708938)",1,"(0.14875570033938873, 0.14968088551919717)",1
6,0.149473,0.149949,"(0.148991684019857, 0.15090609375792083)",1,"(0.1492046119440798, 0.15069316583369802)",1,"(0.14902070864210454, 0.15087706913567328)",1
7,0.149473,0.148956,"(0.14802332274872915, 0.1498877479583416)",1,"(0.14818018768273603, 0.1497308830243347)",1,"(0.14813846115330603, 0.1497726095537647)",1
8,0.149473,0.150165,"(0.1492611354865797, 0.15106868269523843)",1,"(0.14947756779800866, 0.15085225038380948)",0,"(0.14964054145906577, 0.15068927672275237)",0
9,0.149473,0.147959,"(0.14705032109237345, 0.1488682041601518)",0,"(0.14733420729189184, 0.14858431796063343)",0,"(0.1471152349393965, 0.14880329031312878)",0
