# Showcase for phasic

Python setup:

In [None]:
# Always import phasic first to set jax backend correctly
import phasic
import numpy as np
np.random.seed(42)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('retina', 'png')
import matplotlib
matplotlib.rcParams['figure.figsize'] = (5, 3.7)
sns.set_context('paper', font_scale=0.9)
# import warnings
# warnings.filterwarnings(action='ignore', category=Warning, module='seaborn')
phasic.set_theme('dark')

## Standard coalescent

If you want to generate the state space:

In [None]:
n = 4

state_vector_length = n
graph = phasic.Graph(state_vector_length)
starting_vertex = graph.vertex_at(0)  # 0-indexed in Python
initial_state = np.zeros(n, dtype=int)
initial_state[0] = n

starting_vertex.add_edge(
    graph.find_or_create_vertex(initial_state),
    1.0
)

index = 1  # 0-indexed, start from second vertex

while index < graph.vertices_length():
    vertex = graph.vertex_at(index)
    
    # loop over all classes of lineages
    for i in range(n):
        for j in range(i, n):
            state = vertex.state()
            
            # if same class, there need to be at least two to coalesce
            if i == j:
                if state[i] < 2:
                    continue
                # coal rate
                rate = state[i] * (state[i] - 1) / 2
            else:
                # else at least one in each class to coalesce
                if state[i] < 1 or state[j] < 1:
                    continue
                # number of combinations
                rate = state[i] * state[j]
            
            # copy state
            child_state = state.copy()
            # update child state
            child_state[i] = child_state[i] - 1
            child_state[j] = child_state[j] - 1
            child_state[i+j] = child_state[i+j] + 1
            
            vertex.add_edge(
                graph.find_or_create_vertex(child_state),
                rate
            )
    
    index = index + 1

print(f"Number of vertices: {graph.vertices_length()}")

Visualize the graph structure:

In [None]:
# Collect states for display
states = []
for i in range(graph.vertices_length()):
    vertex = graph.vertex_at(i)
    states.append(vertex.state())

states_df = pd.DataFrame(states, columns=[f'n_{i+1}' for i in range(n)])
print("State vectors:")
print(states_df)

Compute moments:

In [None]:
# Compute expectation (first moment)
# In Python we use reward_transform followed by moment computation
# Default rewards are all ones (total waiting time)
rewards = np.ones(graph.vertices_length())
reward_graph = graph.reward_transform(rewards)
expectation = reward_graph.phase_type_moment(1)
print(f"Expected total coalescence time: {expectation}")

In [None]:
# Compute variance
second_moment = reward_graph.phase_type_moment(2)
variance = second_moment - expectation**2
print(f"Variance: {variance}")

In [None]:
# Compute higher moments
moments = [reward_graph.phase_type_moment(k) for k in range(1, 5)]
print(f"Moments (k=1,2,3,4): {moments}")

Marginal expectations using rewards:

In [None]:
# Build state matrix
state_matrix = np.array([graph.vertex_at(i).state() for i in range(graph.vertices_length())])
print("State matrix (rows are vertices, columns are lineage classes):")
print(state_matrix.T)  # Transpose to match R output

Rows happen to be our reward vectors. E.g., singleton rewards:

In [None]:
# State matrix columns are reward vectors for each lineage class
singletons = state_matrix[:, 0]
doubletons = state_matrix[:, 1]
tripletons = state_matrix[:, 2]
print(f"Singleton rewards: {singletons}")

Expected tripleton branch length:

In [None]:
tripleton_graph = graph.reward_transform(tripletons)
tripleton_expectation = tripleton_graph.phase_type_moment(1)
print(f"Expected tripleton branch length: {tripleton_expectation}")

SFS (Site Frequency Spectrum):

In [None]:
# Compute SFS by computing expectation for each lineage class
sfs = []
for i in range(n-1):  # Exclude the n-pleton (all coalesced)
    rewards = state_matrix[:, i]
    reward_graph = graph.reward_transform(rewards)
    expectation = reward_graph.phase_type_moment(1)
    sfs.append(expectation)

sfs = np.array(sfs)
print(f"Site Frequency Spectrum: {sfs}")

In [None]:
# Plot SFS
fig, ax = plt.subplots(figsize=(6, 4))
x = np.arange(1, len(sfs) + 1)
ax.bar(x, sfs)
ax.set_xlabel('Number of lineages')
ax.set_ylabel('Expected branch length')
ax.set_title('Site Frequency Spectrum')
sns.despine()
plt.tight_layout()
plt.show()

# Discrete phase-type distribution

In [None]:
n = 14

state_vector_length = n
graph = phasic.Graph(state_vector_length)
starting_vertex = graph.vertex_at(0)
initial_state = np.zeros(n, dtype=int)
initial_state[0] = n

starting_vertex.add_edge(
    graph.find_or_create_vertex(initial_state),
    1.0
)

index = 1

while index < graph.vertices_length():
    vertex = graph.vertex_at(index)
    
    # loop over all classes of lineages
    for i in range(n):
        for j in range(i, n):
            state = vertex.state()
            
            # if same class, there need to be at least two to coalesce
            if i == j:
                if state[i] < 2:
                    continue
                # coal rate
                rate = state[i] * (state[i] - 1) / 2
            else:
                # else at least one in each class to coalesce
                if state[i] < 1 or state[j] < 1:
                    continue
                # number of combinations
                rate = state[i] * state[j]
            
            # copy state
            child_state = state.copy()
            # update child state
            child_state[i] = child_state[i] - 1
            child_state[j] = child_state[j] - 1
            child_state[i+j] = child_state[i+j] + 1
            
            vertex.add_edge(
                graph.find_or_create_vertex(child_state),
                rate
            )
    
    index = index + 1

print(f"Number of vertices: {graph.vertices_length()}")

In [None]:
def make_discrete(mutation_graph, mutation_rate):
    """Convert continuous-time graph to discrete mutation model.
    
    Adds auxiliary nodes for mutation events along branches.
    Returns reward matrix for computing SFS.
    """
    # Current number of states in graph
    vlength = mutation_graph.vertices_length()
    
    # Number of fields in state vector
    state_vector_length = len(mutation_graph.vertex_at(0).state())
    
    # List of state vector indexes to reward at each auxiliary node
    rewarded_state_vector_indexes = [[] for _ in range(state_vector_length)]
    
    # Dictionary to track new auxiliary vertices
    auxiliary_vertices = {}
    
    # Loop all but starting node
    for i in range(1, vlength):
        vertex = mutation_graph.vertex_at(i)
        # Check if not absorbing (we need to check exit rate from state)
        # For now, we assume non-absorbing if state is non-zero
        state = vertex.state()
        if np.any(state > 0):  # Not absorbing
            for j in range(len(state)):
                val = state[j]
                if val > 0:  # Only ones we may reward
                    # Add auxiliary node
                    aux_state = np.zeros(state_vector_length, dtype=int)
                    mutation_vertex = mutation_graph.find_or_create_vertex(aux_state)
                    mutation_vertex.add_edge(vertex, 1.0)
                    vertex.add_edge(mutation_vertex, mutation_rate * val)
                    
                    # Track which state vector index to reward
                    if mutation_vertex.index() not in auxiliary_vertices:
                        auxiliary_vertices[mutation_vertex.index()] = []
                    auxiliary_vertices[mutation_vertex.index()].append(j)
    
    # Normalize graph
    mutation_graph.normalize()
    
    # Build reward matrix
    final_vlength = mutation_graph.vertices_length()
    rewards = np.zeros((final_vlength, state_vector_length))
    
    for vertex_idx, state_indexes in auxiliary_vertices.items():
        for state_idx in state_indexes:
            rewards[vertex_idx, state_idx] = 1.0
    
    return rewards

In [None]:
mutation_rate = 1.0

# Clone the graph for mutation model
mutation_graph = graph.clone()

# Convert to discrete mutation model
rewards = make_discrete(mutation_graph, mutation_rate)

# Compute SFS for each lineage class
sfs = []
for i in range(n-1):
    reward_graph = mutation_graph.reward_transform(rewards[:, i])
    expectation = reward_graph.phase_type_moment(1)
    sfs.append(expectation)

sfs = np.array(sfs)
print(f"Site Frequency Spectrum (n={n}): {sfs}")

In [None]:
# Plot SFS
fig, ax = plt.subplots(figsize=(7, 4))
x = np.arange(1, len(sfs) + 1)
ax.bar(x, sfs)
ax.set_xlabel('Number of lineages')
ax.set_ylabel('Expected number of mutations')
ax.set_title(f'Site Frequency Spectrum (n={n}, mutation rate={mutation_rate})')
sns.despine()
plt.tight_layout()
plt.show()