In [1]:
import sys 
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path+"/python" not in sys.path:
    sys.path.append(module_path+"/python")

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import observation as obs
import simulation as sim
import intervals as ci
from datetime import datetime

In [3]:
pop_ts = sim.sim_one_population(
            diploid_size=200,
            seq_len=1e9,
            rec_rate=1e-8,
            mut_rate=1e-8
        )
pop_ts_diversity = pop_ts.diversity(span_normalise=False, windows='sites').mean()
print('Population site diversity:', pop_ts_diversity)

Population site diversity: 0.1498461476286317


In [4]:
obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=1000)
obs_ts_diversity = np.mean(obs_ts.site_diversity)
obs_ts_diversity

0.1557090909090909

In [82]:
%%time
result = {'pop_ts_diversity': [pop_ts_diversity] * 10,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}
for i in range(10):
    obs_ts = obs.Observation1(pop_ts, num_ind=50, max_sites=1000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.99, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.99, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.99, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

CPU times: user 3.61 s, sys: 314 ms, total: 3.92 s
Wall time: 4.1 s


In [83]:
df = pd.DataFrame(result)
df

Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149846,0.1505,"(0.1470721539335382, 0.1539270379856537)",1,"(0.1470694183116347, 0.1539297736075572)",1,"(0.14750209451021137, 0.15349709740898052)",1
1,0.149846,0.154286,"(0.15182343324510064, 0.15674787988621244)",0,"(0.15190443559676042, 0.15666687753455266)",0,"(0.1507361961950526, 0.15783511693626048)",0
2,0.149846,0.143866,"(0.14128363031174235, 0.14644889494078295)",0,"(0.14119052235290733, 0.14654200289961797)",0,"(0.13955844998386815, 0.14817407526865714)",0
3,0.149846,0.137756,"(0.13525444715022575, 0.14025787608209747)",0,"(0.1353517086507148, 0.14016061458160842)",0,"(0.1351768102047879, 0.14033551302753533)",0
4,0.149846,0.151601,"(0.14923769734192777, 0.1539647269004965)",1,"(0.14929671190495236, 0.1539057123374719)",1,"(0.14975276640487828, 0.153449657837546)",1
5,0.149846,0.15338,"(0.1500248790370978, 0.1567347169224981)",0,"(0.15002461742813988, 0.15673497853145602)",0,"(0.15029840122510146, 0.15646119473449444)",0
6,0.149846,0.145781,"(0.1425104756123152, 0.1490519486301091)",0,"(0.1425057601259686, 0.14905666411645568)",0,"(0.14351114475575755, 0.14805127948666674)",0
7,0.149846,0.15005,"(0.14663090481476143, 0.15346929720544056)",1,"(0.1467498112039207, 0.1533503908162813)",1,"(0.14631624748182753, 0.15378395453837446)",1
8,0.149846,0.145073,"(0.14185762897248005, 0.14828822961337856)",0,"(0.14189880586333453, 0.1482470527225241)",0,"(0.14310835027930222, 0.1470375083065564)",0
9,0.149846,0.153371,"(0.15075418446699612, 0.15598682563401395)",0,"(0.15080060775872153, 0.15594040234228854)",0,"(0.1517834112017547, 0.15495759889925537)",0


In [12]:
def plot(data, ci):
    sns.histplot(data)
    plt.axvline(obs_ts_diversity, color = 'red', label='obs')
    plt.axvline(pop_ts_diversity, color = 'green', label='pop')
    plt.axvline(ci[0], color = 'yellow', label='ci')
    plt.axvline(ci[1], color = 'yellow', label='ci')
    plt.legend()
    plt.show()

In [72]:
def within(interval, pop_val):
    return 1 if interval[0] <= pop_val <= interval[1] else 0

In [80]:
%%time
result = {'pop_ts_diversity': [pop_ts_diversity] * 10,
          'obs_ts_diversity': [],
          'bt_ci': [], 'bt_cov': [],
          'jk_one_ci':[], 'jk_one_cov': [],
          'jk_mj_ci':[], 'jk_mj_cov': []}
for i in range(10):
    obs_ts = obs.Observation1(pop_ts, num_ind=200, max_sites=10000)
    obs_ts_diversity = np.mean(obs_ts.site_diversity)
    bt_ind_vals = obs_ts.bootstrap_ind_diversity()
    jk_one_ind_vals = obs_ts.jackknife_one_ind_diversity()
    jk_mj_ind_vals = obs_ts.jackknife_mj_ind_diversity()
    
    bt_ci = ci.bt_standard(bt_ind_vals, 0.95, obs_ts_diversity)
    jk_one_ci = ci.jk_delete_one(jk_one_ind_vals, 0.95, obs_ts_diversity)
    jk_mj_ci = ci.jk_delete_mj(jk_mj_ind_vals, 0.95, obs_ts_diversity)
    result['obs_ts_diversity'].append(obs_ts_diversity)
    
    result['bt_ci'].append(bt_ci)
    result['bt_cov'].append(within(bt_ci, pop_ts_diversity))
    
    result['jk_one_ci'].append(jk_one_ci)
    result['jk_one_cov'].append(within(jk_one_ci, pop_ts_diversity))
                                
    result['jk_mj_ci'].append(jk_mj_ci)
    result['jk_mj_cov'].append(within(jk_mj_ci, pop_ts_diversity))

CPU times: user 1min 59s, sys: 4.01 s, total: 2min 3s
Wall time: 2min 16s


In [81]:
pd.DataFrame(result)

Unnamed: 0,pop_ts_diversity,obs_ts_diversity,bt_ci,bt_cov,jk_one_ci,jk_one_cov,jk_mj_ci,jk_mj_cov
0,0.149846,0.150749,"(0.15030789601630254, 0.15118958017417364)",0,"(0.15029933503417656, 0.15119814115629962)",0,"(0.15041467848119208, 0.1510827977092841)",0
1,0.149846,0.146701,"(0.14621974946678326, 0.1471813633151716)",0,"(0.1462200896972209, 0.14718102308473396)",0,"(0.1462338541993706, 0.14716725858258425)",0
2,0.149846,0.147314,"(0.14685025414320255, 0.14777781352597033)",0,"(0.14688549936114176, 0.14774256830803112)",0,"(0.14675562011093518, 0.1478724475582377)",0
3,0.149846,0.152028,"(0.15155589025686567, 0.15250020999376085)",0,"(0.15156136895097894, 0.15249473129964758)",0,"(0.15146812007224017, 0.15258798017838635)",0
4,0.149846,0.152405,"(0.15196991784180922, 0.1528399618574389)",0,"(0.1519775835862201, 0.152832296113028)",0,"(0.15199513840265044, 0.15281474129659767)",0
5,0.149846,0.149749,"(0.14926728265996939, 0.15023151934504317)",1,"(0.1492818997325032, 0.15021690227250936)",1,"(0.14929422469079845, 0.1502045773142141)",1
6,0.149846,0.151363,"(0.1509062124102779, 0.1518194291937321)",0,"(0.15092350456584533, 0.15180213703816467)",0,"(0.15100606862045857, 0.15171957298355143)",0
7,0.149846,0.150864,"(0.15038943742172065, 0.15133877561086076)",0,"(0.15040041457276304, 0.15132779845981836)",0,"(0.150401846196063, 0.1513263668365184)",0
8,0.149846,0.150582,"(0.1501087339190538, 0.1510553412689161)",0,"(0.15012670201352907, 0.15103737317444083)",0,"(0.1500194099839334, 0.1511446652040365)",0
9,0.149846,0.148288,"(0.14782121809071996, 0.14875533078146047)",0,"(0.1478170259954386, 0.14875952287674182)",0,"(0.1478345265087419, 0.14874202236343853)",0


In [78]:
pop_ts.num_sites

53922