In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import biolqm
import numpy as np
import boolean
import colomoto.minibn as minibn
import scipy as sp

### Building STG table

##### First, loading the model as a minibn

In [3]:
bnet_model = biolqm.load("model_files/krasmodel15vars.bnet", "bnet")

In [4]:
model = biolqm.to_minibn(bnet_model)

In [5]:
# The list order of the model's keys is different from Mihaly list's order. So we fix it, to be the same as Mihaly
nodes = [
    "cc", "KRAS", "DSB", "CHEK1", "MAPKAPK2", "FAHRR", "HRR2", "CDC25B", "g2m_trans", "cell_death", 
    "ATM", "ATR", "FAcore", "FANCD2I", "NHEJ"
]
nodes

['cc',
 'KRAS',
 'DSB',
 'CHEK1',
 'MAPKAPK2',
 'FAHRR',
 'HRR2',
 'CDC25B',
 'g2m_trans',
 'cell_death',
 'ATM',
 'ATR',
 'FAcore',
 'FANCD2I',
 'NHEJ']

In [6]:
n=len(nodes)
n

15

##### Then, building the list of binary states

In [7]:
x = np.array([range(pow(2, n))]).transpose()
x

array([[    0],
       [    1],
       [    2],
       ...,
       [32765],
       [32766],
       [32767]])

In [8]:
y = np.array([np.power([2.0]*n, np.array(range(0, -n, -1)))])
y

array([[1.00000000e+00, 5.00000000e-01, 2.50000000e-01, 1.25000000e-01,
        6.25000000e-02, 3.12500000e-02, 1.56250000e-02, 7.81250000e-03,
        3.90625000e-03, 1.95312500e-03, 9.76562500e-04, 4.88281250e-04,
        2.44140625e-04, 1.22070312e-04, 6.10351562e-05]])

In [9]:
list_binary_states = np.remainder(
    np.floor(
        np.multiply(x, y)
    ), 2
).astype(bool)

In [10]:
np.shape(list_binary_states)

(32768, 15)

##### Then, building the update matrix

In [11]:
def gen_array(formula, list_binary_states, nodes):

    if isinstance(formula, boolean.boolean.Symbol):
        return list_binary_states[:, nodes.index(str(formula))]
    
    elif isinstance(formula, boolean.boolean.NOT) or isinstance(formula, minibn.NOT):
        return np.logical_not(
            gen_array(formula.args[0], list_binary_states, nodes)
        )
    
    elif isinstance(formula, boolean.boolean.OR):
        ret = gen_array(formula.args[0], list_binary_states, nodes)
        for i in range(1, len(formula.args)):
            ret = np.logical_or(ret, 
                gen_array(formula.args[i], list_binary_states, nodes)
            )
        return ret
    
    elif isinstance(formula, boolean.boolean.AND):
        ret = gen_array(formula.args[0], list_binary_states, nodes)
        for i in range(1, len(formula.args)):
            ret = np.logical_and(ret, 
                gen_array(formula.args[i], list_binary_states, nodes)
            )
        return ret
    
    else:
        print("Unknown boolean operator : %s" % type(formula))

In [12]:
update_matrix = np.array(
    [
        gen_array(model[node], list_binary_states, nodes) 
        for node in nodes
    ]
).transpose()

In [13]:
np.shape(update_matrix)

(32768, 15)

It should be using scipy's sparse matricies, but for now it's ok

##### Build up/down trans source/target

In [14]:
def fcn_states_inds(yes_no, n_series_exp, n_isl_exp):
    # Here we have the problem of array indices from matlab (1...n) that are different 
    # Shoud we just substract one at the end ? Yes we should, but not only
    #
    # n_isl_exp is the indice of the up matrix column, so we should increase it by one
    # (because our input will be already decrease by one)
    # yes_no should be some boolean value, hopefully. So we don't touch
    # n_series_exp is the number of nodes, so it should already be ok
    
    n_series_exp = n_series_exp - 1
#     n_isl_exp = n_isl_exp - 1
    yes_no = yes_no - 1
    
    f_mat = np.array(
        range(
            1, 
            pow(2, (n_series_exp-n_isl_exp))+1
        )
    ) + yes_no

    t_repmat = np.array([f_mat]*int(pow(2, n_isl_exp)))
        
    t_reshaped = np.reshape(t_repmat, (1, int(pow(2, n_series_exp))), order='F')
    
    t_mult = t_reshaped*pow(2, n_isl_exp)
    t_last = np.array(
        range(
            1, 
            pow(2, n_series_exp)+1
        )
    )
    
    
    return np.sum([t_last, t_mult])-1

In [15]:
up_trans_source = [
    np.intersect1d(
        np.nonzero(update_matrix[:, x])[0],
        fcn_states_inds(0, n, x)[0, :]
    ) 
    for x in range(n)
]
up_trans_source

[array([], dtype=int64),
 array([], dtype=int64),
 array([    2,     3,    10, ..., 16275, 16282, 16283]),
 array([ 1024,  1025,  1026, ..., 32757, 32758, 32759]),
 array([ 1026,  1027,  1030, ..., 32747, 32750, 32751]),
 array([    4,     5,     6, ..., 15837, 15838, 15839]),
 array([    4,     5,     6, ..., 15773, 15774, 15775]),
 array([   17,    18,    19, ..., 32117, 32118, 32119]),
 array([  128,   129,   130, ..., 32509, 32510, 32511]),
 array([  260,   261,   262, ..., 32253, 32254, 32255]),
 array([    4,     5,     6, ..., 31741, 31742, 31743]),
 array([ 1024,  1025,  1026, ..., 30717, 30718, 30719]),
 array([ 1024,  1025,  1026, ..., 28669, 28670, 28671]),
 array([    4,     5,     6, ..., 24573, 24574, 24575]),
 array([    4,     5,     6, ..., 15773, 15774, 15775])]

In [16]:
down_trans_source = [
    np.intersect1d(
        np.nonzero(np.logical_not(update_matrix[:, x]))[0],
        fcn_states_inds(1, n, x)[0, :]
    ) 
    for x in range(n)
]
down_trans_source

[array([], dtype=int64),
 array([], dtype=int64),
 array([   36,    37,    38, ..., 32765, 32766, 32767]),
 array([    8,     9,    10, ..., 29693, 29694, 29695]),
 array([   16,    17,    18, ..., 32761, 32764, 32765]),
 array([  544,   545,   546, ..., 32765, 32766, 32767]),
 array([   96,    97,    98, ..., 32765, 32766, 32767]),
 array([  128,   129,   130, ..., 32765, 32766, 32767]),
 array([], dtype=int64),
 array([], dtype=int64),
 array([ 1024,  1025,  1026, ..., 32761, 32762, 32763]),
 array([ 2048,  2049,  2050, ..., 31741, 31742, 31743]),
 array([ 4096,  4097,  4098, ..., 29693, 29694, 29695]),
 array([ 8192,  8193,  8194, ..., 25593, 25594, 25595]),
 array([16416, 16417, 16418, ..., 32765, 32766, 32767])]

In [17]:
# Question to Mihaly : Should it be [x 2] or [x 1] ? 
# Is it some kind of label, or a indice ?
# Actually, it is used as some kind of indice !
# so, we decreate it by one
down_trans_target = [
    np.concatenate(
        (
            np.array([down_trans_source[x]-pow(2, x)]).transpose(), 
            np.repeat(np.array([[x,1]]), len(down_trans_source[x]), axis=0)
        ), axis=1
    )
    for x in range(len(down_trans_source))
]
down_trans_target

[array([], shape=(0, 3), dtype=int64),
 array([], shape=(0, 3), dtype=int64),
 array([[   32,     2,     1],
        [   33,     2,     1],
        [   34,     2,     1],
        ...,
        [32761,     2,     1],
        [32762,     2,     1],
        [32763,     2,     1]]),
 array([[    0,     3,     1],
        [    1,     3,     1],
        [    2,     3,     1],
        ...,
        [29685,     3,     1],
        [29686,     3,     1],
        [29687,     3,     1]]),
 array([[    0,     4,     1],
        [    1,     4,     1],
        [    2,     4,     1],
        ...,
        [32745,     4,     1],
        [32748,     4,     1],
        [32749,     4,     1]]),
 array([[  512,     5,     1],
        [  513,     5,     1],
        [  514,     5,     1],
        ...,
        [32733,     5,     1],
        [32734,     5,     1],
        [32735,     5,     1]]),
 array([[   32,     6,     1],
        [   33,     6,     1],
        [   34,     6,     1],
        ...,
        [327

In [18]:
# Same here : Question to Mihaly : Should it be [x 1] or [x 0] ? 
# Is it some kind of label, or a indice ?
# Actually, yes
up_trans_target = [
    np.concatenate(
        (
            np.array([up_trans_source[x]+pow(2, x)]).transpose(), 
            np.repeat(np.array([[x,0]]), len(up_trans_source[x]), axis=0)
        ), axis=1
    )
    for x in range(len(up_trans_source))
]
up_trans_target

[array([], shape=(0, 3), dtype=int64),
 array([], shape=(0, 3), dtype=int64),
 array([[    6,     2,     0],
        [    7,     2,     0],
        [   14,     2,     0],
        ...,
        [16279,     2,     0],
        [16286,     2,     0],
        [16287,     2,     0]]),
 array([[ 1032,     3,     0],
        [ 1033,     3,     0],
        [ 1034,     3,     0],
        ...,
        [32765,     3,     0],
        [32766,     3,     0],
        [32767,     3,     0]]),
 array([[ 1042,     4,     0],
        [ 1043,     4,     0],
        [ 1046,     4,     0],
        ...,
        [32763,     4,     0],
        [32766,     4,     0],
        [32767,     4,     0]]),
 array([[   36,     5,     0],
        [   37,     5,     0],
        [   38,     5,     0],
        ...,
        [15869,     5,     0],
        [15870,     5,     0],
        [15871,     5,     0]]),
 array([[   68,     6,     0],
        [   69,     6,     0],
        [   70,     6,     0],
        ...,
        [158

##### Assembling them in the STG table

In [19]:
source = np.concatenate([
    np.concatenate(down_trans_source, axis=0),
    np.concatenate(up_trans_source, axis=0)
])

target = np.concatenate([
    np.concatenate(down_trans_target, axis=0),
    np.concatenate(up_trans_target, axis=0)
])

In [20]:
stg_table = np.concatenate((np.array([source]).transpose(), target), axis=1)

In [21]:
stg_table.shape

(188416, 4)

In [22]:
print("%d bytes" % (stg_table.shape[0] * stg_table.shape[1] * stg_table.itemsize))

6029312 bytes


In [23]:
stg_table[0:10, :]

array([[36, 32,  2,  1],
       [37, 33,  2,  1],
       [38, 34,  2,  1],
       [39, 35,  2,  1],
       [44, 40,  2,  1],
       [45, 41,  2,  1],
       [46, 42,  2,  1],
       [47, 43,  2,  1],
       [52, 48,  2,  1],
       [53, 49,  2,  1]])

In [24]:
stg_table[188000:188010, :]

array([[ 9476, 25860,    14,     0],
       [ 9477, 25861,    14,     0],
       [ 9478, 25862,    14,     0],
       [ 9479, 25863,    14,     0],
       [ 9484, 25868,    14,     0],
       [ 9485, 25869,    14,     0],
       [ 9486, 25870,    14,     0],
       [ 9487, 25871,    14,     0],
       [ 9492, 25876,    14,     0],
       [ 9493, 25877,    14,     0]])

##### Building transition rates table

In [25]:
# to define transition rates, we can select given rates to have different values than 1, or from randomly chosen
# name of rates: 'u_nodename' or 'd_nodename'
# chosen_rates={'u_ERBB1','u_ERBB2','u_ERBB3'}; chosen_rates_vals=zeros(size(chosen_rates));
# OR leave them empty: 
chosen_rates = []
chosen_rates_vals = []

In [26]:
# then we generate the table of transition rates: first row is the 'up'rates, second row 'down' rates, 
# in the order of 'nodes'
# ARGUMENTS
# distr_type={'uniform','random'}; % <uniform> assigns a value of 1 to all params. other option: <random>
# meanval=[]; sd_val=[]; % if 'random' is chosen, the mean and standard dev of a normal distrib has to be defined 
# transition_rates_table=fcn_trans_rates_table(nodes,distr_type{1},meanval,sd_val,chosen_rates,chosen_rates_vals);

In [27]:
distr_type = ['uniform', 'random'] # <uniform> assigns a value of 1 to all params. other option: <random>
meanval = [] # if 'random' is chosen, the mean and standard dev of a normal distrib has to be defined
sd_val = []

In [28]:
# function transition_rates_table=fcn_trans_rates_table(nodes,uniform_or_rand,meanval,sd_val,chosen_rates,chosen_rates_vals)
def fcn_trans_rates_table(nodes, uniform_or_rand, meanval, sd_val, chosen_rates, chosen_rates_vals):
# n=numel(nodes); 

# if strcmp(uniform_or_rand,'uniform')
#     rate_vals_num=ones(1,2*length(nodes)); % abs(ones(1,2*length(nodes)) + normrnd(0,0.5,1,2*length(nodes)));
#     if uniform_or_rand == "uniform":
    rate_vals_num = np.ones((1, 2*n)).astype(np.int64)

# elseif strcmp(uniform_or_rand,'random')
#     % meanval,sd_val
#     rate_vals_num=normrnd(meanval,sd_val,1,2*length(nodes));
#     % don't let negative values occur!!
#     if any(rate_vals_num<0)
#         neg_cnt=0;
#         while any(rate_vals_num<0)
#             disp('negative value, reassigning')
#             rate_vals_num=normrnd(meanval,sd_val,1,2*length(nodes));
#             neg_cnt=neg_cnt+1;
#             if neg_cnt>100
#                 break
#             end
#         end
#     end
# else
#     disp('choose "uniform" or "random" to generate transition rates')
#     rate_vals_num=[];
# end

# if ~isempty(rate_vals_num)

# % changing individual transition rate values 
# for k=1:numel(chosen_rates)
#     split_rate = strsplit(chosen_rates{k},'_'); 
#     if numel(split_rate)>2
#         node_mod_ind=strjoin(split_rate(2:end),'_');
#     else
#         node_mod_ind=split_rate{2};
#     end
    
#     if strcmp(split_rate{1},'d')
#         rate_vals_num(find(strcmp(nodes,node_mod_ind))+n)=chosen_rates_vals(k);
#     elseif strcmp(split_rate{1},'u')
#         % disp(k)
#         rate_vals_num(strcmp(nodes,node_mod_ind))=chosen_rates_vals(k); 
#     else
#         disp('wrong name for transition rate, has to be "u_nodename" or "d_nodename"')
#     end
# end

# % rate_vals_cell=num2cell(rate_vals_num); 
# % rate_names (create by strcat)
# % [u_cc,u_kras,u_dna_dam,u_chek1,u_mk2,u_atm_atr,u_hr,u_cdc25b,u_g2m_trans,u_cell_death, ...
# %     d_cc,d_kras,d_dna_dam,d_chek1,d_mk2,d_atm_atr,d_hr,d_cdc25b,d_g2m_trans,d_cell_death]=deal(rate_vals_cell{:});
    return np.reshape(rate_vals_num, (n, 2)).transpose()
# transition_rates_table = transpose(reshape(rate_vals_num,length(nodes),2));
# end

In [29]:
transition_rates_table = fcn_trans_rates_table(nodes, distr_type[1], meanval, sd_val, chosen_rates, chosen_rates_vals)

##### Building the (sparse) transition matrix

In [87]:
# dim_matr=2^size(transition_rates_table,2);
dim_matr = pow(2, transition_rates_table.shape[1])
# state_transitions_inds=[trans_source_states_mat, trans_target_states_mat, cell2mat(node_inds), up_down_inds_arr];
    
# trans_source_states_mat=stg_table(:,1); trans_target_states_mat=stg_table(:,2); up_down_inds_arr=stg_table(:,4); 
# rate_inds=(stg_table(:,3)-1)*2+stg_table(:,4);
rate_inds = ((stg_table[:, 2])*2)+stg_table[:, 3]

reshaped_trt = np.reshape(transition_rates_table, (1, np.product(transition_rates_table.shape)), order="F")[0, :]

# sub2ind(size(transition_rates_table),up_down_inds_arr, stg_table(:,3));
# transition_rates_table.shape
# transition_rates_table[rate_inds]
# B=sparse(stg_table(:,1), stg_table(:,2), transition_rates_table(rate_inds)/sum(transition_rates_table(:)),dim_matr,dim_matr);
B = sp.sparse.csr_matrix(
    (
        reshaped_trt[rate_inds]/np.sum(transition_rates_table),
        (stg_table[:, 0], 
        stg_table[:, 1])
    ),
    shape=(dim_matr, dim_matr)
)
    
# diag_vals=1-sum(A_sparse_fast,2); % dim_matr_arr=(1:dim_matr)';
# rows = [stg_table(:,1); (1:dim_matr)']; 
# cols=[stg_table(:,2); (1:dim_matr)']; 
# vals=[transition_rates_table(rate_inds)/sum(transition_rates_table(:));diag_vals];
# A_sparse_fast = sparse(rows,cols,vals,dim_matr,dim_matr);
A_sparse_fast = B + (sp.sparse.eye(B.shape[0]) - sp.sparse.diags(np.array(sp.sparse.csr_matrix.sum(B, axis=1).transpose())[0]))
# A_sparse_fast = B + (speye(size(B)) - diag(sum(B,2)));

# if ~isempty(kin_matr_flag)
#     K_sparse=(transpose(A_sparse_fast) - speye(size(A_sparse_fast)) )*sum(transition_rates_table(:));
# else
#     K_sparse=[];
# end


In [88]:
B

<32768x32768 sparse matrix of type '<class 'numpy.float64'>'
	with 188416 stored elements in Compressed Sparse Row format>