### Download data and install required libraries 

In [1]:
!git clone https://github.com/stasaki/DEcode.git

Cloning into 'DEcode'...
remote: Enumerating objects: 77, done.[K
remote: Counting objects: 100% (77/77), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 83 (delta 15), reused 49 (delta 5), pack-reused 6[K
Unpacking objects: 100% (83/83), done.


In [2]:
%cd DEcode

/content/DEcode


In [3]:
!pip install --upgrade setuptools
!pip2 install shap==0.27.0
!Rscript functions/install_package.R

Requirement already up-to-date: setuptools in /usr/local/lib/python2.7/dist-packages (41.2.0)
Collecting shap==0.27.0
[?25l  Downloading https://files.pythonhosted.org/packages/fe/93/9d41296314fe7ac42f1f117695d6d17b65b2c00790a88555522e52dbdfbc/shap-0.27.0.tar.gz (216kB)
[K     |████████████████████████████████| 225kB 48.3MB/s 
Building wheels for collected packages: shap
  Building wheel for shap (setup.py) ... [?25l[?25hdone
  Created wheel for shap: filename=shap-0.27.0-cp27-cp27mu-linux_x86_64.whl size=316528 sha256=70f401d43e1f21bc064a1868424694c48aa3904f50ed167439485ab40d88ac5f
  Stored in directory: /root/.cache/pip/wheels/64/d5/0a/e3b56a63e961ee9afe3c72f0b7cb9bae172c86bd63763c6655
Successfully built shap
Installing collected packages: shap
Successfully installed shap-0.27.0
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/rjson_0.2.20.tar.gz'
Content type 'application/x-gzip' length 99600 bytes

### Set location of input data

In [0]:
# Set input data

# Gene expression matrix
deg_data_file = "./data/toy/Transcriptome/expdata.txt"

# Location of RNA features
mRNA_data_loc = "./data/toy/RNA_features/"
mRNA_annotation_data = ["POSTAR","TargetScan"]

# Location of promoter features
promoter_data_loc = "./data/toy/Promoter_features/"
promoter_annotation_data = ["GTRD"]

# Genes used for traning, validation, and testing
train_genes = "./data/toy/Gene_splits/train.txt.gz"
validate_genes = "./data/toy/Gene_splits/validate.txt.gz"
test_genes = "./data/toy/Gene_splits/test.txt.gz"    

# Location of hyper-parameter
params_loc='./pretrained/Tissue_gene_params.json'

# Output directory
outloc='./train_out/full_model/'

### Define main function

In [0]:
import os
import sys
sys.path.append('./functions/')
import data
import model_utils
import pandas as pd
import numpy as np
import json

#os.environ["CUDA_VISIBLE_DEVICES"]="0"

def main(params):
    from datetime import datetime
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.models import load_model
    import numpy as np
    import metrics
    import layer_utils
    import network
    
    print(params)
    just_return_model=False
    
    # model parameters and learning parameters
    max_epoch = 100
    batch_size = 128
    
    # batch initialization
    train_steps, train_batches = data.batch_iter(X_mRNA_train.values[:,1],
                                                 X_promoter_train.values[:,1],
                                                 Y_train.values[:,1:],
                                                 batch_size,
                                                 shuffle=True)
    valid_steps, valid_batches = data.batch_iter(X_mRNA_validate.values[:,1],
                                                 X_promoter_validate.values[:,1],
                                                 Y_validate.values[:,1:],
                                                 batch_size,
                                                 shuffle=True)
    test_steps, test_batches = data.batch_iter(X_mRNA_test.values[:,1],
                                               X_promoter_test.values[:,1],
                                               Y_test.values[:,1:],
                                               batch_size,
                                               shuffle=True)

    # Paramters for network structure
    params['n_feature_mRNA']=X_mRNA_train.values[:,1][0].shape[0]
    params['n_feature_promoter']=X_promoter_train.values[:,1][0].shape[0]
    params['n_out'] = Y_train.values[:,1:].shape[1]
    
    # Define network structure
    model = network.define_network(params)
    
    # If you don't need to traning model and just want to have model structure
    if just_return_model:
        return model
    
    # Set callback functions to early stop training and save the best model so far
    time_stamp=datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    
    callbacks = [EarlyStopping(monitor='val_loss', patience=10),
                    ModelCheckpoint(outloc+time_stamp+'_model.h5', monitor='val_loss', verbose=0,
                    save_best_only=True,
                    save_weights_only=False,
                    mode='min', period=1)]
    
    # Optimizing model
    result = model.fit_generator(train_batches, train_steps, 
                                 epochs=max_epoch,
                                 validation_data=valid_batches,
                                 validation_steps=valid_steps,
                                 callbacks=callbacks,
                                 max_queue_size=10,
                                 verbose=0)
    
    # Test performance
    # Load best model
    model = load_model(outloc+time_stamp+'_model.h5',
                   custom_objects={'pcor': metrics.pcor,
                                  'GlobalSumPooling1D': layer_utils.GlobalSumPooling1D})
    test_performance= np.array(model.evaluate_generator(test_batches,test_steps))
    np.savetxt(outloc+time_stamp+'_test_performance.txt',
               test_performance,delimiter="\t")
    
    # Saving optimization history
    with open(outloc+time_stamp+'_history.json', 'w') as f:
        json.dump(result.history, f)
    
    # Saving model and learning paramters
    with open(outloc+time_stamp+'_params.json', 'w') as f:
        json.dump(params, f)
    
    # Return validation loss for model selection
    validation_loss = np.amin(result.history['val_loss']) 
    
    return {'loss': validation_loss, 'status': STATUS_OK, 'model': model}

### Training model

In [6]:
! mkdir -p "$outloc"
shuffle="None"
# Prepare learning data
Y_train, Y_validate, Y_test, X_mRNA_train, X_mRNA_validate, X_mRNA_test, X_promoter_train, X_promoter_validate, X_promoter_test = data.prep_ml_data_split(
    deg_data_file=deg_data_file,
    mRNA_data_loc=mRNA_data_loc,
    mRNA_annotation_data=mRNA_annotation_data,
    promoter_data_loc=promoter_data_loc,
    promoter_annotation_data=promoter_annotation_data,
    train_genes=train_genes,
    validate_genes=validate_genes,
    test_genes=test_genes,
    outloc=outloc,
    shuffle=shuffle)

# Obtain hyper parameters
with open(params_loc) as f:
    params=json.load(f)  

# Training model
from hyperopt import STATUS_OK
for i in range(10):
    main(params)

# Summarizing the result
! Rscript functions/find_best_model.R "$outloc" &> /dev/null 
with open(outloc+"/summary/best_model.txt") as f:
    best_model=f.readline().rstrip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value
Using TensorFlow backend.
W0826 19:20:53.835948 140285664106368 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0826 19:20:53.864257 140285664106368 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf

{u'n_feature_mRNA': 372, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 726, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}


W0826 19:20:54.049827 140285664106368 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0826 19:20:54.149852 140285664106368 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0826 19:20:54.603009 140285664106368 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}
{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}
{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RN

In [0]:
with open(outloc+"/summary/best_model.txt") as f:
    best_model=f.readline().rstrip()


### Compute Spearman's correlation between actual and predicted expression for each sample

In [0]:
# Prediction for test samples with the best model
model_utils.test_prediction(outloc,
                            best_model,
                            X_mRNA_test,
                            X_promoter_test,
                            Y_test)

In [9]:
! Rscript --vanilla --slave functions/calc_performance.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/test_data/cor_tbl.txt',sep="\t")

Unnamed: 0,sample,estimate,statistic,p.value,method,alternative
0,Adipose_Subcutaneous,0.207047,2601248.0,0.0006181839,Spearman's rank correlation rho,two.sided
1,Adipose_Visceral_Omentum,0.051037,3113030.0,0.403561,Spearman's rank correlation rho,two.sided
2,Adrenal_Gland,-0.007165,3303958.0,0.9067157,Spearman's rank correlation rho,two.sided
3,Artery_Aorta,0.204438,2609805.0,0.0007263359,Spearman's rank correlation rho,two.sided
4,Artery_Coronary,0.082329,3010378.0,0.177394,Spearman's rank correlation rho,two.sided
5,Artery_Tibial,0.304999,2279920.0,3.21005e-07,Spearman's rank correlation rho,two.sided
6,Bladder,0.258763,2431595.0,1.664295e-05,Spearman's rank correlation rho,two.sided
7,Brain_Amygdala,0.411786,1929609.0,1.786476e-12,Spearman's rank correlation rho,two.sided
8,Brain_Anterior_cingulate_cortex_BA24,0.388998,2004365.0,3.472624e-11,Spearman's rank correlation rho,two.sided
9,Brain_Caudate_basal_ganglia,0.368323,2072187.0,4.23777e-10,Spearman's rank correlation rho,two.sided


### Compute average DeepLIFT score for each regulator

In [10]:
# Estimate variable imporance using test samples
model_utils.compute_DeepLIFT(outloc,
                             best_model,
                             X_mRNA_test,
                             X_promoter_test,
                             Y_test)

0


W0826 19:27:07.657358 140285664106368 deprecation.py:323] From /usr/local/lib/python2.7/dist-packages/shap/explainers/deep/deep_tf.py:450: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53


In [11]:
! Rscript --vanilla --slave functions/summarize_DeepLIFT.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/DeepLIFT/RNA_importance_mean.txt',sep="\t")
pd.read_csv(outloc+best_model+'/DeepLIFT/promoter_importance_mean.txt',sep="\t")

Unnamed: 0,sample_name,AEBP2,AHR,AHRR,APC,AR,ARID1A,ARID1B,ARID2,ARID3A,ARID4B,ARNT,ARNT2,ARNTL,ASCL1,ASCL2,ATF1,ATF2,ATF3,ATF4,ATF5,ATF6,ATF7,ATOH1,ATRX,BACH1,BACH2,BARHL1,BARX1,BARX2,BATF,BATF3,BCL11A,BCL11B,BCL3,BCL6,BHLHE40,BRD4,CDC5L,CDX2,...,ZNF623,ZNF629,ZNF639,ZNF644,ZNF652,ZNF654,ZNF664,ZNF680,ZNF687,ZNF692,ZNF7,ZNF701,ZNF711,ZNF740,ZNF75A,ZNF76,ZNF766,ZNF768,ZNF770,ZNF778,ZNF784,ZNF792,ZNF8,ZNF816,ZNF83,ZNF84,ZNF85,ZNF92,ZSCAN16,ZSCAN2,ZSCAN21,ZSCAN22,ZSCAN29,ZSCAN31,ZSCAN4,ZSCAN5A,ZSCAN5DP,ZSCAN9,ZXDB,ZXDC
0,Adipose_Subcutaneous,0.0003346075,-0.00419,-0.001368,0.002606,-0.062573,7.5e-05,0.007642,-0.000335,-0.000493,-0.001375,-0.005749,-0.000183086,0.000345,-0.020718,-0.008279,0.004546,0.002894,-0.00835,0.000932,-0.000491,0.000658,-0.000438,3.1e-05,9.6e-05,-0.003111,0.001163,-7.1e-05,0.000517,0.003132,0.005774,-3.43259e-07,-0.00129,-0.004309,0.010666,-0.013139,-0.000942,-0.002903,3.5e-05,-0.00618,...,-0.000194,0.00313,-0.000694,0.000835,0.000603,0.0002442221,-0.000174,0.000722,0.002802,-0.001888,-0.001804,-0.002768,0.004027,0.00077,0.001181,-0.005704,-0.000201,3.5e-05,-0.008549,0.005569,-0.00117,0.003127,0.000216,-0.000632,-7.1e-05,-5.9e-05,-0.001268,-7.5e-05,0.000177,0.000506,0.000527,0.005083,-0.002429,0.000544,0.003949,0.002405,0.001795,-0.001066,0.001416,-0.00546
1,Adipose_Visceral_Omentum,0.0005258322,-0.005644,-0.001788,0.002077,-0.049528,0.000107,0.007555,-0.000276,-0.001579,-0.002271,-0.00186,-0.0003674158,0.000844,-0.015808,-0.002019,0.001558,-0.001019,-0.012159,0.000908,-0.000238,0.000994,-0.000678,8.7e-05,-0.000832,-0.003887,0.001083,0.000188,0.001712,0.002195,0.006735,-0.0002544933,-0.002472,-0.001576,0.0052,-0.01196,-0.000952,-0.0027,5.2e-05,-0.001121,...,9.8e-05,0.000595,-0.002171,0.000691,0.002948,0.0003053344,0.000147,0.000578,0.002627,-0.002198,-0.002049,-0.003501,0.00337,0.001063,0.001513,-0.004702,-0.000113,0.00023,-0.007775,0.005897,-0.001424,0.005519,0.000307,-0.000712,0.002219,-0.000285,-0.001299,-6.1e-05,-0.001082,0.000617,0.000572,-0.002927,-0.00307,8.6e-05,0.002724,0.002127,0.001135,-0.002844,0.00062,-0.004052
2,Adrenal_Gland,0.0001616744,-0.001091,0.000101,0.000941,-0.015609,0.000336,-0.001759,0.000812,-0.000471,-0.000373,-0.00026,-2.32127e-05,0.000503,0.010186,0.003824,0.003171,-0.001237,0.002315,-0.000263,0.000619,-0.000421,-0.00063,-0.000114,-0.000363,-0.000169,0.001511,0.000112,0.000876,-0.000568,-0.000618,-0.0003551392,0.001604,0.003532,-0.008504,0.003524,0.002026,0.000428,-6.7e-05,0.004585,...,0.000255,-0.005726,7.9e-05,0.000374,0.002368,-0.0001357475,1e-06,-0.000833,-0.000514,0.000258,0.000381,0.000358,-0.00147,0.000568,-0.00022,0.001746,2.7e-05,0.000369,-0.000815,-0.000752,2e-05,0.004251,-0.000145,-3e-05,-0.001555,-8e-06,0.000431,-0.000262,-0.000191,-0.000639,-0.000142,-0.004132,-1.2e-05,2.9e-05,-0.001197,-0.000962,-0.001244,0.004285,0.001105,0.000742
3,Artery_Aorta,3.575876e-07,-0.000303,-0.001413,0.002678,-0.02091,0.000138,0.004456,-0.000874,5.2e-05,-0.003106,-0.001607,8.057584e-05,7.4e-05,-0.006889,-0.007726,-0.000274,0.0024,0.004322,0.000792,-0.000488,0.000508,-0.000214,2.8e-05,-0.000123,-0.000366,0.000372,-6.4e-05,-4.2e-05,0.002343,-6.8e-05,0.0003239651,-0.005569,-0.004989,0.010829,-0.002231,0.001946,-0.001602,2.8e-05,5.7e-05,...,-0.000316,0.004338,-0.003658,0.002804,-0.001246,-1.374252e-05,-0.000493,0.001356,0.000158,-0.000518,-0.001514,-0.004576,0.004982,-0.000258,0.000939,-0.00375,-0.000151,0.000219,-0.008283,0.002297,-0.00057,0.003007,-6.7e-05,-0.000522,0.003586,-0.000414,-0.000347,5.1e-05,0.000828,0.001428,0.000329,0.00034,-0.001405,0.000834,0.001912,0.001723,-0.000524,-0.000886,0.000283,-0.002274
4,Artery_Coronary,-0.0002994203,0.000762,-0.001242,0.00235,-0.028607,0.001075,0.00431,-0.000891,-0.000326,-0.001023,-0.003893,-9.579547e-05,0.00089,-0.010784,-0.008674,-0.001377,0.002305,-0.003959,0.000547,-0.000381,-0.000296,-0.001948,0.000184,6.9e-05,-0.000681,0.000796,-6.8e-05,0.00091,0.001072,0.001799,0.0002871171,-0.002102,-0.002613,0.008845,-0.011279,0.001932,-0.00238,3.5e-05,-0.005141,...,-0.000209,0.00167,-0.001265,0.001117,-0.001741,0.0001547758,-0.00011,0.001061,-0.000862,-0.001075,-0.001607,-0.004712,0.003626,-0.000199,0.000791,-0.002338,-0.000167,0.000403,-0.008189,0.002802,-0.000507,0.000636,-0.000165,-0.000597,-0.00144,-0.000164,-0.000546,9.8e-05,0.000517,0.000862,0.000276,0.001337,-0.001551,0.00051,0.00277,0.002669,-4.6e-05,-0.004633,0.001628,-0.00645
5,Artery_Tibial,-0.0007869059,0.001349,-0.000852,0.003234,0.000871,0.001048,0.004975,-0.000723,-0.00077,-0.002827,-0.004729,-0.0002442245,0.000237,-0.009641,-0.007109,-0.003596,0.002198,-0.006301,3.7e-05,-0.000669,-0.000364,-0.001321,0.00013,1.7e-05,-0.000931,0.000477,-0.000119,0.000653,0.000632,0.002616,0.0004313657,-0.001094,-0.004006,0.008977,-0.01496,0.001796,-0.001132,-4.2e-05,-0.001989,...,-0.000295,-0.00035,-0.001538,0.001043,-0.000873,0.0001903548,-0.00017,0.00077,-0.002083,-0.00143,-0.001461,-0.00415,0.003454,0.000355,0.000518,-0.001034,-0.000133,0.000256,-0.005319,0.003511,-0.000539,-0.002284,-8.2e-05,-0.000724,-0.001599,-0.000621,-0.000572,1e-05,0.001384,-0.000178,0.000169,0.002332,-0.001314,-0.000151,0.003059,0.002439,0.0001,-0.005217,-3e-05,-0.003652
6,Bladder,0.0003787277,-0.005539,-0.001071,0.002355,0.041396,-0.001656,0.007459,-0.000884,-0.001419,-0.002765,0.007403,-0.0001467931,-0.001175,-0.00464,0.007013,-0.005017,-0.00586,-0.012359,-0.000524,-0.000178,0.001147,0.001145,-2.7e-05,-0.001478,-0.002438,-0.002822,0.000333,0.000488,0.000966,0.002466,-0.0004126969,-0.00904,-0.007863,0.001831,-0.006385,-0.002653,0.000478,0.000258,0.00943,...,0.000295,0.001864,-0.004969,0.001352,0.006113,0.0005591933,0.001163,0.00066,0.00232,-0.001735,-0.001358,0.001739,0.004531,0.001118,0.001426,-0.000463,7.1e-05,-0.000415,0.001414,0.003559,-0.001433,0.001672,0.000636,-0.000155,0.007956,-0.000971,-0.000907,-8.8e-05,-0.00111,0.000949,0.000267,0.000415,-0.001209,-0.00055,-0.000493,-0.000405,0.001111,-0.000175,-0.003288,0.008086
7,Brain_Amygdala,-0.002451743,0.01659,0.004662,-0.004015,0.073268,0.000729,-0.022341,0.001348,0.003361,0.010994,-0.005024,0.0004292027,-1.9e-05,0.042254,-0.001966,0.000994,0.005178,0.02299,-0.002646,0.000687,-0.002542,0.000758,0.000516,0.002625,0.009303,0.001158,-0.000273,-0.002989,-0.005726,-0.010491,0.0003941879,0.017654,0.010462,-0.017005,0.019553,0.002266,0.003916,-0.000535,-0.000947,...,-0.000266,-0.000204,0.005593,-0.000995,-0.01093,-0.001258139,0.00068,-0.001313,-0.007082,0.004676,0.004987,0.006785,-0.0075,-0.002656,-0.003372,0.009057,0.000278,-0.000343,0.018585,-0.011746,0.003392,-0.013181,-0.001272,0.001399,-0.007349,4.3e-05,0.0027,0.000181,0.002154,-0.00168,-0.001384,0.005261,0.005308,-0.000675,-0.007352,-0.002516,-0.003122,-0.00045,0.001319,0.003035
8,Brain_Anterior_cingulate_cortex_BA24,-0.002210161,0.015099,0.005166,-0.005686,0.08877,-0.000828,-0.022046,0.001474,0.004975,0.008627,-0.000419,0.0005829565,-0.000216,0.046404,-0.000795,-0.00203,0.008751,0.028505,-0.001817,0.000446,-0.002553,0.002047,0.000389,0.002953,0.010573,0.001314,-8.4e-05,-0.004016,-0.007463,-0.010762,0.000876249,0.017948,0.009462,-0.011137,0.025507,0.002068,0.005472,-0.000563,0.001782,...,-0.000394,0.001668,0.005121,-0.001088,-0.014086,-0.00147243,-9e-06,-0.000871,-0.008956,0.004815,0.005392,0.005291,-0.006255,-0.00292,-0.004533,0.010441,0.000329,-0.000241,0.016754,-0.012766,0.003871,-0.01773,-0.000959,0.001206,-0.006341,0.000874,0.002838,0.000429,0.003143,-0.001478,-0.001575,0.009191,0.007324,0.00024,-0.007556,-0.003043,-0.003183,0.000665,0.002266,0.001264
9,Brain_Caudate_basal_ganglia,-0.002278672,0.012482,0.004231,-0.004744,0.091918,-0.000414,-0.021047,0.001558,0.00357,0.007244,0.00254,0.0005521202,0.000123,0.04274,-0.000877,-0.003464,0.008042,0.028503,-0.001876,0.000648,-0.002018,0.001951,0.000368,0.002994,0.009452,0.00165,6.5e-05,-0.003462,-0.005972,-0.008737,0.0007638694,0.015572,0.009133,-0.013554,0.026158,0.003082,0.005,-0.000441,0.003486,...,-0.000377,0.000794,0.002748,0.000102,-0.013618,-0.001257414,8.5e-05,-0.000853,-0.006904,0.004858,0.004517,0.004725,-0.006579,-0.002737,-0.003811,0.009035,0.000209,-0.000226,0.015245,-0.011803,0.003666,-0.012771,-0.000909,0.001383,-0.00474,0.000253,0.002731,0.000359,0.001905,-0.001725,-0.001378,0.004972,0.005848,4e-06,-0.007609,-0.002401,-0.003597,0.001352,0.001614,0.002608


### Simulate the concequence of regulator knockout

In [0]:
# Simulate the concequence of regulator knockout
genes=["ENSG00000268903","ENSG00000239906"]
model_utils.coexpression_with_KO(genes,
                                 outloc,
                                 best_model,
                                 X_mRNA_test,
                                 X_promoter_test,
                                 Y_test)    

In [13]:
! Rscript --vanilla --slave functions/test_KO.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/regulator_KO/regression_res.txt',sep="\t")

Unnamed: 0,term,estimate,std.error,statistic,p.value
0,EGR1,0.019071,0.000549,34.754318,6.349288e-250
1,ESR1,0.013733,0.000548,25.042667,2.7787169999999996e-134
2,WT1,-0.010967,0.000548,-20.001986,2.684291e-87
3,CEBPA,-0.010542,0.000549,-19.219337,7.304835e-81
4,ZNF18,-0.010246,0.000548,-18.683391,1.358906e-76
5,ZBTB33,0.007998,0.000549,14.573703,1.271898e-47
6,MYCN,0.007575,0.000548,13.816717,5.0260770000000004e-43
7,ESRRA,0.006425,0.000548,11.715854,1.699357e-31
8,SP1,0.006124,0.000548,11.168537,8.610233e-29
9,ZNF592,0.006016,0.000549,10.959262,8.637257000000001e-28


### Simulate the concequence of binding site removals

In [0]:
# Simulate the concequence of binding site removals
genes=["ENSG00000268903","ENSG00000239906"]
model_utils.coexpression_with_binding_site_removal(genes,
                                                   outloc,
                                                   best_model,
                                                   X_mRNA_test,
                                                   X_promoter_test,
                                                   Y_test)

In [15]:
! Rscript --vanilla --slave functions/test_interval.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/binding_site_removal/regression_res.txt',sep="\t")

Unnamed: 0,term,estimate,std.error,statistic,p.value,Gene
0,promoter_interval_27,1.404362e-02,2.082604e-04,6.743297e+01,0.000000e+00,ENSG00000268903
1,promoter_interval_28,1.434568e-02,2.083366e-04,6.885817e+01,0.000000e+00,ENSG00000268903
2,promoter_interval_30,6.450572e-04,6.514633e-18,9.901667e+13,0.000000e+00,ENSG00000239906
3,promoter_interval_14,7.401378e-03,2.083081e-04,3.553093e+01,1.834235e-260,ENSG00000268903
4,promoter_interval_11,5.935169e-03,2.082483e-04,2.850044e+01,7.945986e-172,ENSG00000268903
5,promoter_interval_10,-5.225048e-03,2.084236e-04,-2.506937e+01,1.494389e-134,ENSG00000268903
6,promoter_interval_13,4.137921e-03,2.083907e-04,1.985656e+01,4.407078e-86,ENSG00000268903
7,promoter_interval_12,-3.136259e-03,2.082211e-04,-1.506216e+01,1.036337e-50,ENSG00000268903
8,promoter_interval_16,2.590693e-03,2.082527e-04,1.244014e+01,2.889767e-35,ENSG00000268903
9,promoter_interval_15,-2.217819e-03,2.083146e-04,-1.064649e+01,2.507232e-26,ENSG00000268903
