# Guide to tHMM

In [1]:
import numpy as np
import scipy.stats as sp

### Synthesizing Cells (not required by the user)

In [2]:
from lineage.CellVar import CellVar as c
from lineage.CellVar import _double

In [3]:
 T = np.array([[1.0, 0.0],
               [0.0, 1.0]])
parent_state = 1
parent_cell = c(state=parent_state, left=None, right=None, parent=None, gen=1)
parent_cell._divide(T)

left_state, right_state = _double(parent_state, T)
left = c(state=left_state, left=None, right=None, parent=parent_cell, gen=parent_cell.gen + 1)
print(left.state)

[1]


In [14]:
print(left_cell, '\n', parent_cell.left)

Generation: 2, Observation: This cell has no observations to report. 
 Generation: 2, Observation: This cell has no observations to report.


In [15]:
print(right_cell, '\n', parent_cell.right)

Generation: 2, Observation: This cell has no observations to report. 
 Generation: 2, Observation: This cell has no observations to report.


### Creating a synthetic lineage (required by the user)

In [4]:
from lineage.LineageTree import LineageTree
from lineage.StateDistribution import StateDistribution

In [5]:
# set up for a two state model

# first state parameters
state1 = 1
bern_p1 = 0.99
expon_scale_beta1 = 40
gamma_a1 = 10.0
gamma_scale1 = 2.0

# second state parameteres
state2 = 0
bern_p2 = 0.7
expon_scale_beta2 = 20
gamma_a2 = 5.0
gamma_scale2 = 1.0

# Lineage arguments
# pi: the initial probability vector
pi = np.array([0.8, 0.2])

# transition probability matrix
T = np.array([[0.95, 0.05],
              [0.05, 0.95]])

In [6]:
state_obj1 = StateDistribution(state1, bern_p1, expon_scale_beta1, gamma_a1, gamma_scale1)
state_obj2 = StateDistribution(state2, bern_p2, expon_scale_beta2, gamma_a2, gamma_scale2)

In [7]:
# list of Emission objects
E = [state_obj1, state_obj2]
desired_num_cells = 50
prune_boolean = False # To get the full tree

In [8]:
lineage = LineageTree(pi, T, E, desired_num_cells, prune_boolean)

ValueError: object too deep for desired array

In [None]:
from lineage.CellNode import CellNode
from lineage.BaumWelch import fit
from lineage.DownwardRecursion import get_root_gammas, get_nonroot_gammas
from lineage.Viterbi import get_leaf_deltas, get_nonleaf_deltas, Viterbi
from lineage.UpwardRecursion import get_leaf_Normalizing_Factors, get_leaf_betas, get_nonleaf_NF_and_betas
from lineage.tHMM import tHMM
from lineage.tHMM_utils import max_gen, get_gen, get_parents_for_level, getAccuracy, getAIC
from lineage.Lineage_utils import get_numLineages, init_Population, remove_singleton_lineages, remove_unfinished_cells
from lineage.Lineage_utils import generatePopulationWithTime as gpt
from lineage.plotting_utils import make_colormap_graph
from lineage.Analyze import Analyze
from lineage.Depth_Two_State_Lineage import Depth_Two_State_Lineage

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import networkx as nx
from networkx.drawing.nx_agraph import graphviz_layout

## Creating and analyzing our synthetic lineage

Variables subscripted or prefixed with `MAS` are referring to the first lineage in this heterogeneous lineage, while variables subscripted with `2` are referring to the second lineage. These two lineages are what comprise the _depth two state_ lineage.

In [None]:
T_MAS = 400. # how long the first synthetic lineage runs for
T_2 = 250. # how long the second synthetic lineage runs for
experimentTime = T_MAS + T_2
MASinitCells = [1] # how many initial cells are in the first lineage (has to be one)
MASlocBern = [0.999] # the bernoulli parameter regarding the chance of dividing over dying for the master (first) lineage
MASbeta = [80] # the exponential parameter regarding how long a cell lives [hours]

initCells2 = [1] # how many initial cells are in the second lineage (has to be one)
locBern2 = [0.8] # the bernoulli parameter regarding the change of dividing over dying for the second lineage
beta2 = [20] # the exponential parameter regarding how long a cell lives [hours]

# In general, the above parameters are described as lists because one can have multiple groups of parameters
# to describe their population. For example, one can easily create a population with initCells = [10, 50, 3],
# locBern = [0.8, 0.9, 0.75], beta = [80, 20, 90] to create three distinct populations. That is to say,
# 10+50+3=63 lineages will be created, each tree following their own distribution parameters using the same index
# in the other lists. 

# To create these depth heterogenous lineages, we created the function below that can join one lineage to another
# and also perform the right house-keeping tasks to make sure that the lineages are sufficiently joined, such as
# making sure the correct parent-daughter links exist, the cells in the second lineage recognize the root cell of
# the master lineage tree, etc.

max_lin_length = 300 # for the purpose of visualizing data, we set restrictions on the maximum length and
min_lin_length = 5 # minimum length of the lineage generated
FOM='E' # this just tells the code that we are using an exponential Force of Mortality
X, newLineage, masterLineage, subLineage2 = Depth_Two_State_Lineage(T_MAS, MASinitCells, MASlocBern, T_2, initCells2, locBern2, FOM=FOM, betaExp=MASbeta, betaExp2=beta2)

X1 = remove_singleton_lineages(X) # this removes lineages that are just one cell as they seem to break under the analysus
x_new, end_time = remove_unfinished_cells(X1)
# X = remove_unfinished_cells(X) # this removes cells that lived past the experimental end time

numStates = 2 # need to tell our code how many states we expect to see
_, _, all_states, tHMMobj, _, _ = Analyze(x_new, numStates) # this function contains the bulk of our analysis code
# that is, it neatly runs our analysis steps in order

# Visualizing results

In [None]:
getAccuracy(tHMMobj, all_states, verbose=True) # this function simply runs the state comparison and checks for accuracy
import matplotlib as mpl
from matplotlib import ticker

G, cmap, _ = make_colormap_graph(x_new)
M = G.number_of_edges()
edge_weights = [d for (u,v,d) in G.edges.data('weight')]
#pos prog options: neato, dot, twopi, circo (don't use), fdp (don't use), nop (don't use), wc (don't use), acyclic (don't use), gvpr (don't use), gvcolor (don't use), ccomps (don't use), sccmap (don't use), tred (don't use), sfdp (don't use), unflatten (don't use)
pos = graphviz_layout(G, prog='dot', root=0)
plt.figure(figsize=(40,31))
plt.figaspect(1)
node_size = 200
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=cmap, alpha=1)
edges = nx.draw_networkx_edges(G, pos, node_size=node_size, edge_color=edge_weights, edge_cmap=plt.cm.plasma, width=2)

ax = plt.gca()
ax.set_axis_off()
cb = plt.colorbar(edges, ticks=edge_weights)
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
cb.set_label(label=r'Experiment Time [hrs]', labelpad=90)
cb.ax.invert_yaxis()
plt.title('Simulated Heterogeneous (by Depth) Lineage')
plt.rcParams.update({'font.size': 64})
plt.savefig('true.svg')
plt.show()

In [None]:
G, cmap, _ = make_colormap_graph(x_new, tHMMobj.states[0])
M = G.number_of_edges()
edge_weights = [d for (u,v,d) in G.edges.data('weight')]
#pos prog options: neato, dot, twopi, circo (don't use), fdp (don't use), nop (don't use), wc (don't use), acyclic (don't use), gvpr (don't use), gvcolor (don't use), ccomps (don't use), sccmap (don't use), tred (don't use), sfdp (don't use), unflatten (don't use)
pos = graphviz_layout(G, prog='dot', root=0)
plt.figure(figsize=(40,31))
plt.figaspect(1)
node_size = 200
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size, node_color=cmap, alpha=1)
edges = nx.draw_networkx_edges(G, pos, node_size=node_size, edge_color=edge_weights, edge_cmap=plt.cm.plasma, width=2)

ax = plt.gca()
ax.set_axis_off()
cb = plt.colorbar(edges, ticks=edge_weights)
tick_locator = ticker.MaxNLocator(nbins=5)
cb.locator = tick_locator
cb.update_ticks()
cb.set_label(label=r'Experiment Time [hrs]', labelpad=90)
cb.ax.invert_yaxis()
plt.title('Estimated Subpopulation Classification')
plt.rcParams.update({'font.size': 64})
plt.savefig('estimated.svg')
plt.show()