# Showcase for phasic

In [None]:
from phasic import Graph
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
sns.set_style('ticks')
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('retina', 'png')
from graphviz import Digraph

In [None]:
def plot_graph(states, ipv, sim, constrained=True, size='10'):
    constrained = constrained and 'true' or 'false'
    states = np.array(states)
    
    # add the missing row and col to SIM
    rates = np.r_[np.c_[sim, -sim.sum(axis=1)], np.zeros((1, len(sim)+1))]

    dot = Digraph()
    dot.node('S', 'S')
    for i in range(1, len(states)):
        dot.node(str(i), str(states[i]))
    for i in range(len(ipv)):
        if ipv[i]:
            dot.edge('S', str(i+1), constraint=constrained, label=str(ipv[i]))
    for i in range(1, len(states)):
        for j in range(1, len(states)):
            if i != j and rates[i-1, j-1] > 0:
                dot.edge(str(i), str(j), constraint=constrained, label=str(rates[i-1, j-1]))
    dot.graph_attr['size'] = size                
    return dot

## Standard coalescent

If you already have the subintensity matrix and initial probability vector:

In [None]:
sim = np.array([[-6, 6, 0, 0], 
                [0, -3, 1, 2],
                [0, 0, -1, 0],
                [0, 0, 0, -1]], dtype=float)
ipv = np.array([1, 0, 0, 0], dtype=float)
sim

In [None]:
graph = Graph.from_matrix(ipv, sim)

Note that the state vectors are of course undefined (all zero) when constructing the graph this way:

In [None]:
result = graph.to_matrix()
print("States:")
print(result['states'])
print("\nSIM:")
print(result['sim'])
print("\nIPV:")
print(result['ipv'])

In [None]:
graph.dph_pmf(0.9)

If you want to generate the state space:

In [None]:
n = 6
  
state_vector_length = n
graph = Graph(state_length=state_vector_length)
starting_vertex = graph.starting_vertex()
initial_state = np.zeros(n, dtype=int)
initial_state[0] = n

starting_vertex.add_edge(
    graph.find_or_create_vertex(initial_state),
    1
)
index = 1

while index < graph.vertices_length():
    vertex = graph.vertex_at(index)
    
    # loop over all classes of lineages
    for i in range(n):
        for j in range(i, n):
            state = vertex.state().copy()
            
            # if same class, there need to be at least two to coalesce
            if i == j:
                if state[i] < 2:
                    continue
                # coal rate
                rate = state[i] * (state[i] - 1) / 2
            else:
                # else at least one in each class to coalesce
                if state[i] < 1 or state[j] < 1:
                    continue
                # number of combinations
                rate = state[i] * state[j]
            
            # copy state
            child_state = state.copy()
            # update child state
            child_state[i] = child_state[i] - 1
            child_state[j] = child_state[j] - 1
            child_state[i+j] = child_state[i+j] + 1
            
            vertex.add_edge(
                graph.find_or_create_vertex(child_state),
                rate
            )
    
    index = index + 1

states = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
result = graph.to_matrix()
ipv = result['ipv']
sim = result['sim']

graph.vertices_length()

In [None]:
plot_graph(states, ipv, sim)

As matrices:

In [None]:
matrices = graph.to_matrix()
states = matrices['states']
sim = matrices['sim']
print("States:")
print(states)
print("\nSIM:")
print(sim)
print("\nIPV:")
print(matrices['ipv'])

Invert subintensity matrix to get Green matrix:

In [None]:
U = -np.linalg.inv(matrices['sim'])
U

Compute expectation from matrices:

In [None]:
matrices['ipv'] @ U @ np.ones(len(matrices['ipv']))

Compute moments:

In [None]:
graph.expectation()

In [None]:
graph.expected_waiting_time()

In [None]:
graph.variance()

In [None]:
r_prime = graph.expected_waiting_time()
2 * graph.expected_waiting_time(r_prime) - r_prime * r_prime

In [None]:
graph.moments(4)

Marginal expectations using rewards:

In [None]:
state_matrix = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())]).T
state_matrix

Rows happens to be our reward vectors. E.g. singleton rewards:

In [None]:
rewards = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())]).T
singletons = rewards[0, :]
doubletons = rewards[1, :]
tripletons = rewards[2, :]
singletons

Expected tripleton branch length:

In [None]:
graph.expectation(tripletons)

SFS:

In [None]:
sfs = np.array([graph.expectation(rewards[i, :]) for i in range(rewards.shape[0]-1)])
sfs

In [None]:
sns.barplot(x=list(range(1, len(sfs)+1)), y=sfs)
sns.despine()

Covariance:

In [None]:
graph.covariance(singletons, doubletons)

In [None]:
graph.covariance(singletons, tripletons)

Covariance between "ton" branch length:

In [None]:
cov_mat = np.zeros((n-1, n-1))

for i in range(n-1):
    for j in range(n-1):
        cov_mat[i, j] = graph.covariance(rewards[i, :], rewards[j, :])

In [None]:
plt.subplots(1, 1, figsize=(7, 5))
ticks = list(range(1, int(n)))
ax = sns.heatmap(cov_mat, cmap="PiYG", 
                annot=True,
                center=0,
                yticklabels=ticks,
                xticklabels=ticks
                )
ax.invert_yaxis()

Distributions of each "ton" branch length:

In [None]:
result = pd.DataFrame()
for i in range(rewards.shape[0]-1):
    x = np.arange(0, 5, 0.01)
    pdf = graph.reward_transform(rewards[i, :]).pdf(x)
    df = pd.DataFrame({'prob': pdf, 't': x, 'ton': i+1})
    result = pd.concat([result, df], ignore_index=True)

In [None]:
sns.lineplot(data=result, y='prob', x='t', hue='ton')
sns.despine()

Compute expectations from the distributions:

In [None]:
ctx = graph.distribution_context(1000)
prev_size = 0
while ctx.state()['cdf'] < 0.999:
    ctx.step()
expected_visits = ctx.accumulated_visiting_time()
sfs = np.array([np.sum(expected_visits * rewards[i, :]) for i in range(rewards.shape[0]-1)])
sfs

In [None]:
sns.barplot(x=list(range(1, len(sfs)+1)), y=sfs)
sns.despine()

Changing rates (edge weights) using parametrization:

In [None]:
# Note: This requires the graph to be built with parameterized edges
# graph.update_weights_parameterized(np.array([2.0]))

# states = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
# result = graph.to_matrix()
# ipv = result['ipv']
# sim = result['sim']

Change rates back to normal:

In [None]:
# plot_graph(states, ipv, sim)

## Time-inhomogeneous coalescent

The distribution context is always made on the graph without rewards transformation. You can then:

- Get the distribution (and from that all moments) of time to absorption. Note that you cannot use dph for this as this assumes the current edge-weights and not sequentially updated ones in the distribution context.
- Get the marginal expectations by summing over the products of the expected accumulated visiting times and rewards.

In [None]:
def cdf_timeinhom(graph, epoques, sizes):
    tmrca_pdf = []
    time = []
    
    ctx = graph.distribution_context(1000)
    prev_size = 0
    while ctx.state()['cdf'] < 0.999:
        tmrca_pdf.append(ctx.state()['cdf'])
        time.append(ctx.state()['time'])
        
        size = sizes[np.searchsorted(epoques, ctx.state()['time'], side='right') - 1]
        if size != prev_size:
            # Note: This would require parameterized graph
            # graph.update_weights_parameterized(np.array([1/size]))
            pass
        prev_size = size
        ctx.step()
    
    return pd.DataFrame({'prob': tmrca_pdf, 't': time})

epoques = np.array([0, 1, 2, 3, 4])
# Note: Requires parameterized graph for this to work properly
# cdf1 = cdf_timeinhom(graph, epoques, np.array([1, 1, 1, 1, 1]))
# cdf2 = cdf_timeinhom(graph, epoques, np.array([1, 2, 4, 8, 16]))
# cdf3 = cdf_timeinhom(graph, epoques, np.array([16, 8, 4, 2, 1]))

In [None]:
# fig, axes = plt.subplots(1, 3, figsize=(12, 4))
# axes[0].plot(cdf1['t'], cdf1['prob'])
# axes[1].plot(cdf2['t'], cdf2['prob'])
# axes[2].plot(cdf3['t'], cdf3['prob'])
# sns.despine()

In [None]:
def sfs_timeinhom(graph, epoques, sizes, rewards):
    tmrca_pdf = []
    time = []
    
    ctx = graph.distribution_context(1000)
    prev_size = 0
    while ctx.state()['cdf'] < 0.999:
        tmrca_pdf.append(ctx.state()['pdf'])
        time.append(ctx.state()['time'])
        
        size = sizes[np.searchsorted(epoques, ctx.state()['time'], side='right') - 1]
        if size != prev_size:
            # Note: This would require parameterized graph
            # graph.update_weights_parameterized(np.array([1/size]))
            pass
        prev_size = size
        ctx.step()
    expected_visits = ctx.accumulated_visiting_time()
    
    sfs = []
    for i in range(rewards.shape[0]-1):
        sfs.append(np.sum(expected_visits * rewards[i, :]))
    return np.array(sfs)

# epoques = np.array([0, 1, 2, 3, 4])
# sfs1 = sfs_timeinhom(graph, epoques, np.array([1, 1, 1, 1, 1]), rewards)
# sfs2 = sfs_timeinhom(graph, epoques, np.array([1, 2, 4, 8, 16]), rewards)
# sfs3 = sfs_timeinhom(graph, epoques, np.array([16, 8, 4, 2, 1]), rewards)

In [None]:
# fig, axes = plt.subplots(1, 3, figsize=(12, 4))
# sns.barplot(x=list(range(1, len(sfs1)+1)), y=sfs1, ax=axes[0]).set_title("Constant 1")
# sns.barplot(x=list(range(1, len(sfs2)+1)), y=sfs2, ax=axes[1]).set_title("Exp growth from 1")
# sns.barplot(x=list(range(1, len(sfs3)+1)), y=sfs3, ax=axes[2]).set_title("Exp decline to 1")
# sns.despine()

# Simple model in Keynote presentation

In [None]:
graph = Graph(state_length=1)

A = graph.find_or_create_vertex(np.array([1], dtype=int))
B = graph.find_or_create_vertex(np.array([2], dtype=int))
C = graph.find_or_create_vertex(np.array([3], dtype=int))
D = graph.find_or_create_vertex(np.array([4], dtype=int))

graph.starting_vertex().add_edge(A, 1)
A.add_edge(B, 0.12)
A.add_edge(D, 0.08)
B.add_edge(C, 0.25)
B.add_edge(D, 0.25)
C.add_edge(A, 0.2)

states = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
result = graph.to_matrix()
ipv = result['ipv']
sim = result['sim']

graph.vertices_length()

In [None]:
plot_graph(states, ipv, sim)

States:

In [None]:
graph.expected_waiting_time()

Convert graph to matrix:

In [None]:
result = graph.to_matrix()
print("States:")
print(result['states'])
print("\nSIM:")
print(result['sim'])
print("\nIPV:")
print(result['ipv'])

Invert subintensity matrix to get Green matrix:

In [None]:
U = -np.linalg.inv(result['sim'])
U

Sum first row of Green matrix to get expectation:

In [None]:
U.sum(axis=1)

In [None]:
graph.variance()

In [None]:
r_prime = graph.expected_waiting_time()
r_prime

In [None]:
2 * graph.expected_waiting_time(r_prime) - r_prime * r_prime

## Super simple model in Keynote presentation

Simplest possible model as example of how higher order moments are computed.

In [None]:
graph = Graph(state_length=1)

A = graph.find_or_create_vertex(np.array([1], dtype=int))
B = graph.find_or_create_vertex(np.array([2], dtype=int))
C = graph.find_or_create_vertex(np.array([3], dtype=int))
D = graph.find_or_create_vertex(np.array([4], dtype=int))

graph.starting_vertex().add_edge(A, 1)
A.add_edge(B, 1)
B.add_edge(C, 1)
C.add_edge(D, 1)

states = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
result = graph.to_matrix()
ipv = result['ipv']
sim = result['sim']

In [None]:
plot_graph(states, ipv, sim, constrained=False)

In [None]:
graph.expected_waiting_time()

In [None]:
r = graph.expected_waiting_time()
graph.expected_waiting_time(r)

In [None]:
graph.moments(3)

In [None]:
r_prime = graph.expected_waiting_time()
2 * graph.expected_waiting_time(r_prime) - r_prime * r_prime

In [None]:
graph.variance()

# IM model

Note: The IM model construction requires external C++ code that is not part of the standard phasic Python API. This section is included for reference but may not run without additional setup.

# Rabbit model

Note: The rabbit model construction also requires external C++ code. See the R notebook for the full implementation.

In [None]:
# Rabbit model example
L = 2
  
state_vector_length = 2
graph = Graph(state_length=state_vector_length)
starting_vertex = graph.starting_vertex()
initial_state = np.array([L, 0], dtype=int)

starting_vertex.add_edge(
    graph.find_or_create_vertex(initial_state),
    1
)
index = 1

while index < graph.vertices_length():
    vertex = graph.vertex_at(index)
    state = vertex.state().copy()
    
    if state[0] > 0:
        # Rabbit jump left to right
        child_state = np.array([state[0] - 1, state[1] + 1], dtype=int)
        vertex.add_edge(
            graph.find_or_create_vertex(child_state),
            1
        )
        
        # Island flooding
        child_state = np.array([0, state[1]], dtype=int)
        vertex.add_edge(
            graph.find_or_create_vertex(child_state),
            2
        )
    
    if state[1] > 0:
        # Rabbit jump right to left
        child_state = np.array([state[0] + 1, state[1] - 1], dtype=int)
        vertex.add_edge(
            graph.find_or_create_vertex(child_state),
            1
        )
        
        # Island flooding
        child_state = np.array([state[0], 0], dtype=int)
        vertex.add_edge(
            graph.find_or_create_vertex(child_state),
            4
        )
    
    index = index + 1

states = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
result = graph.to_matrix()
ipv = result['ipv']
sim = result['sim']

graph.vertices_length()

In [None]:
plot_graph(states, ipv, sim)

In [None]:
graph.expectation()