In [1]:
import os
import sys

import numpy as np
import pandas as pd

from bokeh.plotting import show
from bokeh.io import output_notebook
from bokeh.models.widgets import RangeSlider

import msprime

sys.path.append("../src")
import util
import vis

output_notebook()


In [2]:
os.environ["BOKEH_ALLOW_WS_ORIGIN"] = '0aaf0agotd3etfja916liv2etcl4ul9j3fk8kav1m1a16m18da6b'


In [3]:
ts = msprime.sim_mutations(
    msprime.sim_ancestry(
        100,
        sequence_length=1e7,
        ploidy=2,
        recombination_rate=1e-8,
        random_seed=1234
    ),
    rate=1e-7,
    random_seed=1234
)
ts


Tree Sequence,Unnamed: 1
Trees,5
Sequence Length,10000000.0
Time Units,generations
Sample Nodes,200
Total Size,33.8 KiB
Metadata,No Metadata

Table,Rows,Size,Has Metadata
Edges,414,12.9 KiB,
Individuals,100,2.8 KiB,
Migrations,0,8 Bytes,
Mutations,31,1.1 KiB,
Nodes,403,11.0 KiB,
Populations,1,224 Bytes,✅
Provenances,2,1.7 KiB,
Sites,31,791 Bytes,


In [4]:
path = util.SamplePath(
    individual="test",
    nodes=np.concatenate((
        np.repeat( 80, 5),
        np.repeat(100, 6),
        np.repeat(240, 6),
        np.repeat(350, 5),
        np.repeat( 70, 4),
        np.repeat(260, 5),
    )),
    site_positions=ts.sites_position,
)
path


SamplePath(individual='test', nodes=array([ 80,  80,  80,  80,  80, 100, 100, 100, 100, 100, 100, 240, 240,
       240, 240, 240, 240, 350, 350, 350, 350, 350,  70,  70,  70,  70,
       260, 260, 260, 260, 260]), site_positions=array([ 378546.,  762087., 1121394., 1252798., 1794978., 2144190.,
       3137811., 3220575., 3517468., 3575760., 3578269., 3811182.,
       3943932., 4080679., 4477739., 5174622., 5390107., 5823286.,
       6892356., 6905343., 7247899., 7462474., 7913459., 7974395.,
       8187035., 8338579., 8988312., 9046731., 9588017., 9763050.,
       9790388.]), metadata=None, is_valid=True)

In [5]:
np.random.seed(1234)
fwd_prob_mat_ar = np.random.uniform(size=(ts.num_nodes, ts.num_sites))
fwd_prob_mat_df = pd.DataFrame({
    'node_id' : np.array([np.repeat(i, ts.num_sites) for i in np.arange(ts.num_nodes)]).flatten(),
    'site_id' : np.array([np.arange(ts.num_sites) for _ in np.arange(ts.num_nodes)]).flatten(),
    'site_pos': np.tile(ts.sites_position, ts.num_nodes).flatten(),
    'prob'    : fwd_prob_mat_ar.flatten(),
})
fwd_prob_mat_df


Unnamed: 0,node_id,site_id,site_pos,prob
0,0,0,378546.0,0.191519
1,0,1,762087.0,0.622109
2,0,2,1121394.0,0.437728
3,0,3,1252798.0,0.785359
4,0,4,1794978.0,0.779976
...,...,...,...,...
12488,402,26,8988312.0,0.659749
12489,402,27,9046731.0,0.648086
12490,402,28,9588017.0,0.148121
12491,402,29,9763050.0,0.119325


In [6]:
# TODO: Use toggle buttons for the different views.
controls = {
    "interval": RangeSlider(
        start=0, end=len(path), step=1,
        value=(0, len(path)),
        title="Genomic interval"
    )
}

tracks = [
    {'site_pos': path.site_positions, 'site_base': np.random.choice(np.arange(4), size=len(path))},
    {'site_pos': path.site_positions, 'site_base': np.random.choice(np.arange(4), size=len(path))},
    {'site_pos': path.site_positions, 'site_base': np.random.choice(np.arange(4), size=len(path))},
    {'site_pos': path.site_positions, 'site_base': np.random.choice([-2, -1], size=len(path))},
]

app = vis.create_sample_path_vis_app(
    path,
    ts,
    tracks=tracks,
    legend_labels=[
        'BEAGLE',
        'tskit',
        'truth',
        'chip',
    ],
    controls=controls,
    #range_node_id=(0, 99),
    #range_site_pos=(0, 4e6),
)

show(app)
