# 1 Notebook Setup

In [1]:
import os
import sys
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
import pandas as pd
import pickle

In [2]:
# Add src folder to environmental path
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), os.pardir, 'src/')))

# 2 Loading Baseline Model & Extracted Event, Parse Event

In [3]:
# To keep root folder intact, load inputs from temporary directory 'examples/input/'
in_dir = 'input/'
isExist = os.path.exists(in_dir)
if not isExist:
    os.makedirs(in_dir)

# support your input baseline model here
model_file = 'BooleanTcell.xlsx'
baseline = in_dir + model_file

In [4]:
from runAccordion import get_model

# Load baseline model file into two dictionaries
# we highlight the key information of the baseline model
model_dict, regulators = get_model(baseline)
df_model = pd.read_excel(baseline, index_col=0)
df_model[['Element name','Element IDs','Positive regulators','Negative regulators','Levels','Initial 0']]

Unnamed: 0_level_0,Element name,Element IDs,Positive regulators,Negative regulators,Levels,Initial 0
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,AKT,P31749,"(PDK1,MTORC2)",AKT_OFF,2,0
2,AKT_OFF,P31749,,,2,0
3,AP1,"P05412,P01100","(FOS_DD,JUN)",,2,0
4,CA,,TCR,,2,0
5,CD122,P14784,,,2,1
...,...,...,...,...,...,...
58,TCR_HIGH,"P01850,P01848",,,2,0
59,TCR_LOW,"P01850,P01848",,,2,0
60,TGFBETA,P01137,,,2,0
61,TSC,"Q92574,P49815",,AKT,2,1


In [5]:
# 'model_dict' is a dictionary that holds critical information of each baseline model element
# 'regulators' is a dict that contains baseline model elements and corresponding regulator elements
# model_dict
regulators

{'AKT': {'AKT_OFF', 'MTORC2', 'PDK1'},
 'AKT_OFF': set(),
 'AP1': {'FOS_DD', 'JUN'},
 'CA': {'TCR'},
 'CD122': set(),
 'CD132': set(),
 'CD25': {'AP1', 'FOXP3', 'NFAT', 'NFKAPPAB', 'STAT5'},
 'CD28': set(),
 'ERK': {'MEK2'},
 'FOS': {'ERK'},
 'FOS_D': {'FOS'},
 'FOS_DD': {'FOS_D'},
 'FOS_DDD': set(),
 'FOXP3': {'MTOR_DD', 'NFAT', 'SMAD3', 'STAT5'},
 'IL2': {'AP1', 'FOXP3', 'IL2', 'NFAT', 'NFKAPPAB'},
 'IL2_EX': {'IL2', 'IL2_EX'},
 'IL2R': {'CD122', 'CD132', 'CD25'},
 'JAK3': {'IL2R', 'IL2_EX'},
 'JNK': {'MKK7'},
 'JUN': {'JNK'},
 'MEK2': {'RAF'},
 'MKK7': {'TAK1'},
 'MTOR': {'MTORC1_D', 'MTORC2_D'},
 'MTOR_D': {'MTOR'},
 'MTOR_DD': {'MTOR_D'},
 'MTOR_DDD': set(),
 'MTOR_DDDD': set(),
 'MTORC1': {'MTORC1_OFF', 'RHEB'},
 'MTORC1_D': {'MTORC1'},
 'MTORC1_OFF': set(),
 'MTORC2': {'PI3K_HIGH', 'PI3K_LOW', 'S6K1'},
 'MTORC2_D': {'MTORC2'},
 'MTORC2_DD': set(),
 'NFAT': {'CA', 'NFAT_OFF'},
 'NFAT_OFF': set(),
 'NFKAPPAB': {'AKT', 'PKCTHETA'},
 'PDK1': {'PIP3'},
 'PI3K': {'PI3K_HIGH', 'PI3K_LO

In [6]:
# support your extracted event information here, it is from machine reading
reading_file = 'MachineReadingOutput.csv'
reading = 'input/' + reading_file

In [7]:
# we highlight the key information of each event
df_ro = pd.read_csv(reading)
df_ro[['regulator_name', 'ID1', 'regulated_name','ID2', 'interaction', 'PaperID']]

Unnamed: 0,regulator_name,ID1,regulated_name,ID2,interaction,PaperID
0,AKT,P31749,CD4,P01730,decreases,PMC2275380
1,AKT,P31749,CTRL,P40313,decreases,PMC2275380
2,TGFBETA,P01137,AKT,P31749,increases,PMC2275380
3,Foxp3,Q9BZS1,Ctla4,P16410,increases,PMC2275380
4,Foxp3,Q9BZS1,Gpr83,Q9NYM4,increases,PMC2275380
...,...,...,...,...,...,...
112,MTORC2,Q6R328,MTORC2,Q6R327,increases,PMC4418530
113,CD28,P10747,MTORC2,Q6R328,increases,PMC4418530
114,IL2_EX,P60568,MTORC2,Q6R329,increases,PMC4418530
115,IL2_R,P01589,MTORC2,Q6R330,increases,PMC4418530


In [8]:
# parse the reading output file (extracted events) into a set() object
# each component in the set has format: (regulator element, regulated element, type of interaction (+/-))
from runAccordion import parseExtension
exttt=parseExtension(model_dict, reading)
exttt

{('AKT', 'CD4_ext', '-'),
 ('AKT', 'CTRL_ext', '-'),
 ('AKT', 'FOXO1', '-'),
 ('AKT', 'FoxO3_ext', '-'),
 ('AKT', 'Itk_ext', '-'),
 ('AKT', 'MAGI1_ext', '-'),
 ('AKT', 'MEK1_ext', '-'),
 ('AKT', 'MTORC2', '-'),
 ('AKT', 'TBK1_ext', '+'),
 ('BAM32_ext', 'PAK1_ext', '+'),
 ('BIM_ext', 'PAK1_ext', '+'),
 ('Bcl2_ext', 'CD3_ext', '+'),
 ('Bcl2l11_ext', 'CD4_ext', '+'),
 ('CD25', 'MTORC2', '+'),
 ('CD28', 'MTORC2', '+'),
 ('CD4_ext', 'IL17A_ext', '-'),
 ('CD5_ext', 'CK2_ext', '+'),
 ('CD5_ext', 'P38_ext', '+'),
 ('CHK1_ext', 'AKT', '+'),
 ('CK2_ext', 'AKT', '+'),
 ('CK2_ext', 'CD4_ext', '+'),
 ('CK2_ext', 'CDC37_ext', '+'),
 ('CK2_ext', 'PTEN', '-'),
 ('CTLA4_ext', 'AKT', '-'),
 ('ERK', 'CK2_ext', '+'),
 ('ERK', 'PAK1_ext', '+'),
 ('ERK', 'S5B_ext', '+'),
 ('FASL_ext', 'FAS_ext', '+'),
 ('FOXO1', 'Foxo3a_ext', '-'),
 ('FOXO1', 'PTEN', '+'),
 ('FOXO3_ext', 'FOXO3_ext', '+'),
 ('FOXP3', 'Ctla4_ext', '+'),
 ('FOXP3', 'Gpr83_ext', '+'),
 ('FOXP3', 'Itk_ext', '+'),
 ('HIF1alpha_ext', 'IL17A_ext',

# 3 Clustering, Generate Extension Candidates, Merging

In [9]:
# To keep root folder intact, generate output in temporary directory 'examples/output'
out_dir = 'output/'

isExist = os.path.exists(out_dir)
if not isExist:
    os.makedirs()

In [10]:
# Build non-python dependencies for the usage of function runMarkovCluster()
import subprocess
command = "cd ../dependencies/mcl-14-137"
command += " && ./configure && make && make install && make clean && make distclean"
process = subprocess.Popen(command, shell=True)
process.wait()

checking for a BSD-compatible install... /usr/bin/install -c
checking whether build environment is sane... yes
checking for a thread-safe mkdir -p... autofoo/install-sh -c -d
checking for gawk... no
checking for mawk... no
checking for nawk... no
checking for awk... awk
checking whether make sets $(MAKE)... yes
checking whether to enable maintainer-specific portions of Makefiles... no
checking for style of include used by make... GNU
checking for gcc... gcc
checking whether the C compiler works... yes
checking for C compiler default output file name... a.out
checking for suffix of executables... 
checking whether we are cross compiling... no
checking for suffix of object files... o
checking whether we are using the GNU C compiler... yes
checking whether gcc accepts -g... yes
checking for gcc option to accept ISO C89... none needed
checking dependency style of gcc... gcc3
checking  _____ weak test for C void* <=> unsigned int conversion... failed
  /\  custom check: num <-> void* failed

      ;  if (hk->n_at_most > hk->n_at_most)
                           ^


mv -f .deps/opt.Tpo .deps/opt.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT io.o -MD -MP -MF .deps/io.Tpo -c -o io.o io.c
mv -f .deps/io.Tpo .deps/io.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT err.o -MD -MP -MF .deps/err.Tpo -c -o err.o err.c
mv -f .deps/err.Tpo .deps/err.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT hash.o -MD -MP -MF .deps/hash.Tpo -c -o hash.o hash.c
mv -f .deps/hash.Tpo .deps/hash.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT heap.o -MD -MP -MF .deps/heap.Tpo -c -o heap.o heap.c
mv -f .deps/heap.Tpo .deps/heap.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT let.o -MD -MP -MF .deps/let.Tpo -c -o let.o let.c
mv -f .deps/let.Tpo .deps/let.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT list.o -MD -MP -MF .deps/list.Tpo -c -o list.o list.c
mv -f .deps/list.Tpo .deps/list.Po
gcc -DHAVE_CONFIG_H -I. -I.. -I..    -g -O2 -D_THREAD_SAFE  -MT gralloc.o

mv -f .deps/clminfo.Tpo .deps/clminfo.Po
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT clmimac.o -MD -MP -MF .deps/clmimac.Tpo -c -o clmimac.o clmimac.c
mv -f .deps/clmimac.Tpo .deps/clmimac.Po
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT clmadjust.o -MD -MP -MF .deps/clmadjust.Tpo -c -o clmadjust.o clmadjust.c
mv -f .deps/clmadjust.Tpo .deps/clmadjust.Po
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT clmresidue.o -MD -MP -MF .deps/clmresidue.Tpo -c -o clmresidue.o clmresidue.c
mv -f .deps/clmresidue.Tpo .deps/clmresidue.Po
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT clmclose.o -MD -MP -MF .deps/clmclose.Tpo -c -o clmclose.o clmclose.c
mv -f .deps/clmclose.Tpo .deps/clmclose.Po
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT clmmate.o -MD -MP -MF .deps/clmmate.Tpo -c -o clmmate.o clmmate.c
mv -f .deps/clmmate

mv -f .deps/mcxtest2.Tpo .deps/mcxtest2.Po
gcc  -g -O2 -D_THREAD_SAFE  -lm  -o mcxtest2 mcxtest2.o ../mcl/libmcl.a ../clew/libclew.a  ../gryphon/libgryphon.a ../impala/libimpala.a ../../util/libutil.a -lm 
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT mcxtest.o -MD -MP -MF .deps/mcxtest.Tpo -c -o mcxtest.o mcxtest.c
mv -f .deps/mcxtest.Tpo .deps/mcxtest.Po
gcc  -g -O2 -D_THREAD_SAFE  -lm  -o mcxtest mcxtest.o ../mcl/libmcl.a ../clew/libclew.a  ../gryphon/libgryphon.a ../impala/libimpala.a ../../util/libutil.a -lm 
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..    -g -O2 -D_THREAD_SAFE  -MT mcxminusmeet.o -MD -MP -MF .deps/mcxminusmeet.Tpo -c -o mcxminusmeet.o mcxminusmeet.c
mv -f .deps/mcxminusmeet.Tpo .deps/mcxminusmeet.Po
gcc  -g -O2 -D_THREAD_SAFE  -lm  -o mcxminusmeet mcxminusmeet.o ../mcl/libmcl.a ../clew/libclew.a  ../gryphon/libgryphon.a ../impala/libimpala.a ../../util/libutil.a -lm 
gcc -DHAVE_CONFIG_H -I. -I../.. -I../../src -I../..   

test -z "/usr/local/bin" || ../../autofoo/install-sh -c -d "/usr/local/bin"
  /usr/bin/install -c mcx mcxsubs mcxmap mcxassemble mcxarray mcxdump mcxload mcxrand '/usr/local/bin'
make[3]: Nothing to be done for `install-data-am'.
Making install in shmcx
test -z "/usr/local/bin" || ../../autofoo/install-sh -c -d "/usr/local/bin"
  /usr/bin/install -c mcxi '/usr/local/bin'
make[3]: Nothing to be done for `install-data-am'.
Making install in shmcxquery
make[3]: Nothing to be done for `install-exec-am'.
make[3]: Nothing to be done for `install-data-am'.
Making install in alien
make[4]: Nothing to be done for `install-exec-am'.
make[4]: Nothing to be done for `install-data-am'.
Making install in include
Making install in .
make[3]: Nothing to be done for `install-exec-am'.
make[3]: Nothing to be done for `install-data-am'.
Making install in scripts
test -z "/usr/local/bin" || ../autofoo/install-sh -c -d "/usr/local/bin"
 /usr/bin/install -c clxdo '/usr/local/bin'
make[2]: Nothing to be done

0

In [11]:
# set the inflation parameter of the Markov Clustering Algorithm(MCL)
inflation = 2

In [12]:
# This function prepares the inputs to the markov clustering algorithm (MCL)
# 1. it creates a pickle file for the output clusters with interaction information
# 2. it also returns a modified baseline model (without introducing new nodes)
    
from markovCluster import runMarkovCluster
res, new_base_model = runMarkovCluster(out_dir,exttt,model_dict,inflation)

INFO:root:Running the following command through MCL algorithm:
mcl output/abc_model --abc -I 2 -o output/markov_cluster

[mcl] new tab created
[mcl] pid 73975
 ite -----------------  chaos  time hom(avg,lo,hi) m-ie m-ex i-ex fmv
  1  .................  11.98  0.00 1.01/0.05/1.70 4.57 4.57 4.57  71
  2  .................  19.01  0.00 0.70/0.30/1.05 3.64 2.70 12.35  94
  3  .................  11.78  0.00 0.73/0.32/1.14 1.71 0.44 5.46  94
  4  .................   6.19  0.00 0.87/0.39/1.21 3.05 0.36 1.97  94
  5  .................   1.98  0.00 0.92/0.48/1.35 2.51 0.38 0.76  77
  6  .................   1.07  0.00 0.97/0.53/1.02 1.45 0.59 0.45  29
  7  .................   0.57  0.00 0.98/0.60/1.00 1.15 0.68 0.31   9
  8  .................   0.36  0.00 0.99/0.75/1.03 1.00 0.89 0.27   0
  9  .................   0.16  0.00 1.00/0.88/1.16 0.99 0.97 0.27   0
 10  .................   0.31  0.00 1.00/0.81/1.00 0.99 0.98 0.27   0
 11  .................   0.24  0.00 0.99/0.64/1.00 0.99 0.94 0.25   0


In [13]:
# Take a look at these clusters found by MCL
with open(out_dir+'markov_cluster','r') as f:
    for line in f:
        print(line)

AKT_OFF	AKT	CHK1_ext	CTLA4_ext	IFNgamma_ext	MEK1_ext	PD1_ext	PDK1	SHIP1_ext	TBK1_ext	TGFBETA	TIL_ext	CD4_ext	Bcl2l11_ext	CTRL_ext	FOXO1	FoxO3_ext	Foxo3a_ext	MAGI1_ext

PIP3	PTEN	TCR	HSC_ext	PI3K_HIGH	PI3K_LOW	NEDD4_ext	TCR_HIGH	PIP3_HIGH	PIP3_LOW	P53_ext

CD25	FOXP3	Ctla4_ext	SMAD3	Gpr83_ext	IL2

MTOR	MTOR_DD	MTORC2_D	MTOR_D	Pak1_ext

CD122	IL2R	CD132	IL2_EX	JAK3

MTORC1_D	MTORC1_OFF	MTORC1	RHEB	TSC

JUN	MKK7	JNK	TAK1

PDL1_ext	CD8_ext	Treg_ext	gp120_ext

MEK2	IQGAP1_ext	RAF	RAS

BAM32_ext	PAK1_ext	BIM_ext	siPAK1_ext

MTORC2	NFKAPPAB	CD28

FOS_DD	FOS	FOS_D

CA	NFAT	NFAT_OFF

Itk_ext	Myc_ext	Th17_ext

CK2_ext	CDC37_ext

PI3K	SHIP_ext

STAT5	STAT5_D

Bcl2_ext	CD3_ext

P38_ext	CD5_ext

SP_ext	CD69_ext

ERK	S5B_ext

FASL_ext	FAS_ext

IL17A_ext	HIF1alpha_ext

S6K1	PS6

PKCTHETA	TCR_LOW

AP1



In [14]:
# show clusters with interaction information added, it's one output of runMarkovCluster()
res

[[1,
  ['AKT', 'FoxO3_ext', '-'],
  ['MTORC2', 'AKT', '+'],
  ['MEK1_ext', 'PTEN', '+'],
  ['TBK1_ext', 'CD4_ext', '+'],
  ['AKT', 'FOXO1', '-'],
  ['AKT', 'MEK1_ext', '-'],
  ['SHIP1_ext', 'AKT', '-'],
  ['PIP3', 'AKT', '+'],
  ['CK2_ext', 'AKT', '+'],
  ['MTOR', 'AKT', '+'],
  ['AKT', 'TBK1_ext', '+'],
  ['Itk_ext', 'CD4_ext', '+'],
  ['TCR', 'AKT', '-'],
  ['FOXO1', 'Foxo3a_ext', '-'],
  ['CHK1_ext', 'AKT', '+'],
  ['Itk_ext', 'CD4_ext', '-'],
  ['PDK1', 'AKT', '+'],
  ['CK2_ext', 'CD4_ext', '+'],
  ['AKT', 'MTORC2', '-'],
  ['CTLA4_ext', 'AKT', '-'],
  ['TIL_ext', 'AKT', '-'],
  ['MEK1_ext', 'AKT', '-'],
  ['PI3K', 'AKT', '+'],
  ['MTOR', 'TBK1_ext', '+'],
  ['PI3K', 'AKT', '-'],
  ['TGFBETA', 'AKT', '+'],
  ['TBK1_ext', 'FOXO1', '+'],
  ['TCR', 'CD4_ext', '-'],
  ['TCR', 'MEK1_ext', '+'],
  ['FOXO1', 'PTEN', '+'],
  ['IFNgamma_ext', 'AKT', '+'],
  ['TBK1_ext', 'AKT', '-'],
  ['PTEN', 'AKT', '+'],
  ['AKT', 'CTRL_ext', '-'],
  ['AKT', 'CD4_ext', '-'],
  ['AKT', 'Itk_ext', '-'],
  [

In [15]:
# show the modified baseline model (without introducing new nodes), it's another output of runMarkovCluster()
new_base_model

{'AKT': {'AKT_OFF', 'MTORC2', 'PDK1'},
 'AKT_OFF': set(),
 'AP1': {'FOS_DD', 'JUN'},
 'CA': {'TCR'},
 'CD122': set(),
 'CD132': set(),
 'CD25': {'AP1', 'FOXP3', 'NFAT', 'NFKAPPAB', 'STAT5'},
 'CD28': set(),
 'ERK': {'MEK2'},
 'FOS': {'ERK'},
 'FOS_D': {'FOS'},
 'FOS_DD': {'FOS_D'},
 'FOS_DDD': set(),
 'FOXP3': {'MTOR_DD', 'NFAT', 'SMAD3', 'STAT5'},
 'IL2': {'AP1', 'FOXP3', 'IL2', 'NFAT', 'NFKAPPAB'},
 'IL2_EX': {'IL2', 'IL2_EX'},
 'IL2R': {'CD122', 'CD132', 'CD25'},
 'JAK3': {'IL2R', 'IL2_EX'},
 'JNK': {'MKK7'},
 'JUN': {'JNK'},
 'MEK2': {'RAF'},
 'MKK7': {'TAK1'},
 'MTOR': {'MTORC1_D', 'MTORC2_D'},
 'MTOR_D': {'MTOR'},
 'MTOR_DD': {'MTOR_D'},
 'MTOR_DDD': set(),
 'MTOR_DDDD': set(),
 'MTORC1': {'MTORC1_OFF', 'RHEB'},
 'MTORC1_D': {'MTORC1'},
 'MTORC1_OFF': set(),
 'MTORC2': {'PI3K_HIGH', 'PI3K_LOW', 'S6K1'},
 'MTORC2_D': {'MTORC2'},
 'MTORC2_DD': set(),
 'NFAT': {'CA', 'NFAT_OFF'},
 'NFAT_OFF': set(),
 'NFKAPPAB': {'AKT', 'PKCTHETA'},
 'PDK1': {'PIP3'},
 'PI3K': {'PI3K_HIGH', 'PI3K_LO

In [16]:
return_path_TH = 5

In [17]:
# This function records indices of clusters to be merged based on the existence of return paths
# It generates the grouped_ext_Merged pickle file that contains the merged clusters

from runAccordion import merge_clusters
merge_clusters(regulators, out_dir, return_path_TH)

INFO:root:Merge clusters NO.1 and NO.2


# 4 Extend Model to Create New Spreadsheet

In [18]:
# prepare the merged clusters for spreadsheet modification
p_file = open(out_dir+'grouped_ext_Merged','rb')
merged_res = pickle.load(p_file)

In [19]:
# regulate names
# output extended models are stored in 'examples/output/''
original_model = in_dir + model_file
extended_name = out_dir + model_file.split('.')[0] + '_Extension_'

In [20]:
# choose candidate extension, either from the unmerged clusters or merged clusters
candidate = merged_res
#candidate = res

In [21]:
from runAccordion import extend_model

for i in range(len(candidate)):
    extend_model(original_model,candidate[i],extended_name+str(i+1)+'.xlsx')