### Download data and install required libraries 

In [1]:
!git clone https://github.com/stasaki/DEcode.git

Cloning into 'DEcode'...
remote: Enumerating objects: 71, done.[K
remote: Counting objects: 100% (71/71), done.[K
remote: Compressing objects: 100% (60/60), done.[K
remote: Total 77 (delta 11), reused 47 (delta 5), pack-reused 6[K
Unpacking objects: 100% (77/77), done.


In [2]:
%cd DEcode

/content/DEcode


In [16]:
!pip install --upgrade setuptools
!pip2 install shap==0.27.0
!Rscript functions/install_package.R

Requirement already up-to-date: setuptools in /usr/local/lib/python2.7/dist-packages (41.2.0)
Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/src/contrib/rjson_0.2.20.tar.gz'
Content type 'application/x-gzip' length 99600 bytes (97 KB)
downloaded 97 KB

* installing *source* package ‘rjson’ ...
** package ‘rjson’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
g++ -std=gnu++11 -I"/usr/share/R/include" -DNDEBUG     -fpic  -g -O2 -fdebug-prefix-map=/build/r-base-uuRxut/r-base-3.6.1=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g  -c dump.cpp -o dump.o
gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG     -fpic  -g -O2 -fdebug-prefix-map=/build/r-base-uuRxut/r-base-3.6.1=. -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g  -c parser.c -o parser.o
gcc -std=gnu99 -I"/usr/share/R/include" -DNDEBUG     -

### Set location of input data

In [0]:
# Set input data

# Gene expression matrix
deg_data_file = "./data/toy/Transcriptome/expdata.txt"

# Location of RNA features
mRNA_data_loc = "./data/toy/RNA_features/"
mRNA_annotation_data = ["POSTAR","TargetScan"]

# Location of promoter features
promoter_data_loc = "./data/toy/Promoter_features/"
promoter_annotation_data = ["GTRD"]

# Genes used for traning, validation, and testing
train_genes = "./data/toy/Gene_splits/train.txt.gz"
validate_genes = "./data/toy/Gene_splits/validate.txt.gz"
test_genes = "./data/toy/Gene_splits/test.txt.gz"    

# Location of hyper-parameter
params_loc='./pretrained/Tissue_gene_params.json'

# Output directory
outloc='./train_out/full_model/'

### Define main function

In [0]:
import os
import sys
sys.path.append('./functions/')
import data
import model_utils
import pandas as pd
import numpy as np
import json

#os.environ["CUDA_VISIBLE_DEVICES"]="0"

def main(params):
    from datetime import datetime
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.models import load_model
    import numpy as np
    import metrics
    import layer_utils
    import network
    
    print(params)
    just_return_model=False
    
    # model parameters and learning parameters
    max_epoch = 100
    batch_size = 128
    
    # batch initialization
    train_steps, train_batches = data.batch_iter(X_mRNA_train.values[:,1],
                                                 X_promoter_train.values[:,1],
                                                 Y_train.values[:,1:],
                                                 batch_size,
                                                 shuffle=True)
    valid_steps, valid_batches = data.batch_iter(X_mRNA_validate.values[:,1],
                                                 X_promoter_validate.values[:,1],
                                                 Y_validate.values[:,1:],
                                                 batch_size,
                                                 shuffle=True)
    test_steps, test_batches = data.batch_iter(X_mRNA_test.values[:,1],
                                               X_promoter_test.values[:,1],
                                               Y_test.values[:,1:],
                                               batch_size,
                                               shuffle=True)

    # Paramters for network structure
    params['n_feature_mRNA']=X_mRNA_train.values[:,1][0].shape[0]
    params['n_feature_promoter']=X_promoter_train.values[:,1][0].shape[0]
    params['n_out'] = Y_train.values[:,1:].shape[1]
    
    # Define network structure
    model = network.define_network(params)
    
    # If you don't need to traning model and just want to have model structure
    if just_return_model:
        return model
    
    # Set callback functions to early stop training and save the best model so far
    time_stamp=datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    
    callbacks = [EarlyStopping(monitor='val_loss', patience=10),
                    ModelCheckpoint(outloc+time_stamp+'_model.h5', monitor='val_loss', verbose=0,
                    save_best_only=True,
                    save_weights_only=False,
                    mode='min', period=1)]
    
    # Optimizing model
    result = model.fit_generator(train_batches, train_steps, 
                                 epochs=max_epoch,
                                 validation_data=valid_batches,
                                 validation_steps=valid_steps,
                                 callbacks=callbacks,
                                 max_queue_size=10,
                                 verbose=0)
    
    # Test performance
    # Load best model
    model = load_model(outloc+time_stamp+'_model.h5',
                   custom_objects={'pcor': metrics.pcor,
                                  'GlobalSumPooling1D': layer_utils.GlobalSumPooling1D})
    test_performance= np.array(model.evaluate_generator(test_batches,test_steps))
    np.savetxt(outloc+time_stamp+'_test_performance.txt',
               test_performance,delimiter="\t")
    
    # Saving optimization history
    with open(outloc+time_stamp+'_history.json', 'w') as f:
        json.dump(result.history, f)
    
    # Saving model and learning paramters
    with open(outloc+time_stamp+'_params.json', 'w') as f:
        json.dump(params, f)
    
    # Return validation loss for model selection
    validation_loss = np.amin(result.history['val_loss']) 
    
    return {'loss': validation_loss, 'status': STATUS_OK, 'model': model}

### Training model

In [6]:
! mkdir -p "$outloc"
shuffle="None"
# Prepare learning data
Y_train, Y_validate, Y_test, X_mRNA_train, X_mRNA_validate, X_mRNA_test, X_promoter_train, X_promoter_validate, X_promoter_test = data.prep_ml_data_split(
    deg_data_file=deg_data_file,
    mRNA_data_loc=mRNA_data_loc,
    mRNA_annotation_data=mRNA_annotation_data,
    promoter_data_loc=promoter_data_loc,
    promoter_annotation_data=promoter_annotation_data,
    train_genes=train_genes,
    validate_genes=validate_genes,
    test_genes=test_genes,
    outloc=outloc,
    shuffle=shuffle)

# Obtain hyper parameters
with open(params_loc) as f:
    params=json.load(f)  

# Training model
from hyperopt import STATUS_OK
for i in range(10):
    main(params)

# Summarizing the result
! Rscript functions/find_best_model.R "$outloc" &> /dev/null 
with open(outloc+"/summary/best_model.txt") as f:
    best_model=f.readline().rstrip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item_labels[indexer[info_axis]]] = value
Using TensorFlow backend.
W0826 18:43:00.965044 140387272288128 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0826 18:43:00.996310 140387272288128 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf

{u'n_feature_mRNA': 372, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 726, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}


W0826 18:43:01.198169 140387272288128 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0826 18:43:01.309458 140387272288128 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0826 18:43:01.817775 140387272288128 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}
{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RNA_n_channel_1st': 160.0, u'Last_n_channel': 160.0, u'RNA_conv_strides': 2.0, u'DNA_n_ConvLayer': 4.0, u'RNA_n_ConvLayer': 1.0, u'n_out': 54, u'Add_basic_info': u'No'}
{u'n_feature_mRNA': 271, u'DNA_n_channel_1st': 120.0, u'Last_fullConLayer': 2.0, u'DNA_conv_strides': 1.0, u'ConvRelu': u'No', u'FullRelu': u'Yes', u'n_feature_mRNA_basic': 1, u'n_feature_promoter': 601, u'lr': 0.001, u'RN

IOError: ignored

In [0]:
with open(outloc+"/summary/best_model.txt") as f:
    best_model=f.readline().rstrip()


### Compute Spearman's correlation between actual and predicted expression for each sample

In [0]:
# Prediction for test samples with the best model
model_utils.test_prediction(outloc,
                            best_model,
                            X_mRNA_test,
                            X_promoter_test,
                            Y_test)

In [20]:
! Rscript --vanilla --slave functions/calc_performance.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/test_data/cor_tbl.txt',sep="\t")

Unnamed: 0,sample,estimate,statistic,p.value,method,alternative
0,Adipose_Subcutaneous,0.139464,2822948.0,0.02189321,Spearman's rank correlation rho,two.sided
1,Adipose_Visceral_Omentum,0.156453,2767217.0,0.01003164,Spearman's rank correlation rho,two.sided
2,Adrenal_Gland,0.17514,2705916.0,0.003891355,Spearman's rank correlation rho,two.sided
3,Artery_Aorta,-0.124397,3688535.0,0.04110041,Spearman's rank correlation rho,two.sided
4,Artery_Coronary,0.005646,3261934.0,0.9264274,Spearman's rank correlation rho,two.sided
5,Artery_Tibial,0.278379,2367244.0,3.39804e-06,Spearman's rank correlation rho,two.sided
6,Bladder,0.191963,2650729.0,0.001528781,Spearman's rank correlation rho,two.sided
7,Brain_Amygdala,0.400801,1965646.0,7.68207e-12,Spearman's rank correlation rho,two.sided
8,Brain_Anterior_cingulate_cortex_BA24,0.396349,1980249.0,1.366575e-11,Spearman's rank correlation rho,two.sided
9,Brain_Caudate_basal_ganglia,0.347193,2141503.0,4.577209e-09,Spearman's rank correlation rho,two.sided


### Compute average DeepLIFT score for each regulator

In [21]:
# Estimate variable imporance using test samples
model_utils.compute_DeepLIFT(outloc,
                             best_model,
                             X_mRNA_test,
                             X_promoter_test,
                             Y_test)

0


W0826 18:57:08.636358 140387272288128 deprecation.py:323] From /usr/local/lib/python2.7/dist-packages/shap/explainers/deep/deep_tf.py:450: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53


In [22]:
! Rscript --vanilla --slave functions/summarize_DeepLIFT.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/DeepLIFT/RNA_importance_mean.txt',sep="\t")
pd.read_csv(outloc+best_model+'/DeepLIFT/promoter_importance_mean.txt',sep="\t")

Unnamed: 0,sample_name,AEBP2,AHR,AHRR,APC,AR,ARID1A,ARID1B,ARID2,ARID3A,ARID4B,ARNT,ARNT2,ARNTL,ASCL1,ASCL2,ATF1,ATF2,ATF3,ATF4,ATF5,ATF6,ATF7,ATOH1,ATRX,BACH1,BACH2,BARHL1,BARX1,BARX2,BATF,BATF3,BCL11A,BCL11B,BCL3,BCL6,BHLHE40,BRD4,CDC5L,CDX2,...,ZNF623,ZNF629,ZNF639,ZNF644,ZNF652,ZNF654,ZNF664,ZNF680,ZNF687,ZNF692,ZNF7,ZNF701,ZNF711,ZNF740,ZNF75A,ZNF76,ZNF766,ZNF768,ZNF770,ZNF778,ZNF784,ZNF792,ZNF8,ZNF816,ZNF83,ZNF84,ZNF85,ZNF92,ZSCAN16,ZSCAN2,ZSCAN21,ZSCAN22,ZSCAN29,ZSCAN31,ZSCAN4,ZSCAN5A,ZSCAN5DP,ZSCAN9,ZXDB,ZXDC
0,Adipose_Subcutaneous,-0.000363,6e-06,-0.000423,0.000858,-0.003265,-0.000553,0.00346,-0.000184,-0.001252,-0.003966,0.000239,-8.5e-05,-0.000257,-0.008232,-0.000301,-0.006879,-0.000121,-0.002783,0.000175,6.2e-05,0.0001976943,-0.002148,0.0001423825,-0.000286,-0.00055,-0.000612,0.000112,0.000159,0.00024,0.002345,-4.2e-05,0.001864,-0.000866,-6.3e-05,-0.002421,0.000912,-0.000846,2.758076e-05,2.9e-05,...,-9.396449e-05,-0.000763,-0.000891,-0.000763,0.00127,-3.8e-05,-0.000108,-0.000215,0.000496,-0.000584,-0.000477,-0.000605,-0.000519,0.000254,0.000411,-0.000203,-3.950635e-05,0.00014,-0.004199,0.001846,-0.000437,-0.000741,0.0001752792,1.095512e-05,0.00326,-7.355281e-05,-0.00029,0.000106,0.000325,9.6e-05,0.000139,0.001557,-0.001071,0.000227,0.000557,0.000622,0.000684,-0.002433,-0.000661,8.4e-05
1,Adipose_Visceral_Omentum,-0.000449,0.000815,-0.000519,0.001636,-0.045263,-0.000742,0.005242,-0.000572,-0.002177,-0.005168,-0.002727,-0.000141,-0.000523,-0.016472,0.001079,-0.007843,-0.000306,-0.001342,0.000476,4.5e-05,-7.914809e-05,-0.002528,5.938452e-06,-0.000712,-0.001639,-0.000808,0.000263,0.000679,4e-06,0.003842,-2.4e-05,0.001725,-0.003723,0.001056,0.002788,0.000241,7e-05,5.800007e-05,-0.002227,...,-2.307161e-05,-0.000903,-0.00134,-0.000873,0.002049,-0.000216,-4.5e-05,-0.000398,0.00065,-0.00083,-0.00075,-0.001926,-0.000422,0.000604,0.00115,0.000175,3.338502e-06,0.000347,-0.005985,0.004392,-0.000875,-0.000134,0.0001213426,-3.327525e-05,0.005641,0.0003335936,-0.000496,0.000298,-0.000327,0.000155,0.000179,-0.003927,-0.00171,0.000143,0.000684,0.000961,0.001147,-0.001658,-0.001541,-0.001554
2,Adrenal_Gland,0.000105,0.001729,0.000204,0.000223,-0.02092,-0.00024,-0.002994,0.000127,0.000375,0.000761,2.6e-05,1.5e-05,-0.000286,0.007269,0.001598,0.002153,0.002533,-0.000271,0.000391,2.7e-05,-0.0002435988,0.000756,2.199082e-05,3.9e-05,-0.000274,8.2e-05,-9.5e-05,-9e-06,-0.000459,-0.000117,0.000162,0.000734,0.001341,-0.003868,0.001864,0.000858,-0.000236,-9.533766e-07,0.000271,...,-8.314948e-05,-0.000865,0.000884,0.000901,-0.001252,0.000116,-8.5e-05,0.00012,-6.1e-05,7.5e-05,0.00011,0.00043,-0.000345,8.9e-05,-0.000502,-6e-05,-8.169672e-06,-2.9e-05,0.001999,-0.000203,-9.3e-05,-0.000631,0.0002606687,-9.849179e-05,-0.001557,0.000203507,2.8e-05,-0.000221,-0.0004,0.000199,-0.000101,-0.001964,-8.4e-05,3e-05,0.000443,-0.000252,-0.000666,-0.000237,3.7e-05,-0.001543
3,Artery_Aorta,-0.000115,-0.000508,-0.000241,0.00175,-0.017843,-0.000542,0.003975,-0.000293,-0.002374,-0.006113,-8.4e-05,-0.000155,-5.4e-05,-0.006948,-9e-05,-0.004004,0.000221,-0.003665,0.000252,-2.6e-05,-2.786772e-05,-0.001585,0.0001482681,-0.001254,0.000252,-0.000429,-3.3e-05,0.000215,0.000208,0.001437,-0.000158,-0.00071,-0.002867,-0.001536,-0.003335,-0.001455,-2.8e-05,1.412855e-05,-0.001194,...,-9.7351e-05,-0.000466,-0.000113,-0.001041,0.003179,9.2e-05,-0.000272,-0.000198,0.001865,-0.001,-0.000689,-0.00018,-0.000933,0.000135,0.000641,-0.0006,1.461708e-06,0.000179,-0.003677,0.002689,-0.000474,-0.001812,0.0001573903,-8.566207e-06,0.004337,-9.630659e-05,-0.000394,0.000167,-0.000104,0.000117,0.000132,-0.001758,-0.001262,0.000349,0.000784,0.000444,0.000792,-0.002403,-0.000756,0.000149
4,Artery_Coronary,0.000127,-2.8e-05,-0.000708,0.001644,-0.028052,-0.000725,0.005915,-0.000248,-0.002104,-0.005386,-0.003058,-0.000167,-1.6e-05,-0.009692,-0.00229,-0.009357,-0.001318,0.000393,0.000199,4.7e-05,0.0001405708,-0.00272,6.828061e-05,-0.000822,-0.000381,-0.000531,-3.9e-05,0.00051,0.001679,0.003241,4.7e-05,0.000892,-0.002559,0.001909,-0.005018,-0.000669,-0.000891,-1.688533e-05,-0.002337,...,-6.067435e-05,0.00037,-0.001286,-0.000852,0.002324,-0.000249,6.7e-05,-0.000144,0.002019,-0.00091,-0.000704,-0.001742,5.1e-05,0.000767,0.001261,0.000255,2.350107e-05,0.000285,-0.004316,0.003943,-0.001289,-0.001978,0.0001816871,-2.825503e-05,0.003179,7.579197e-05,-0.000523,0.000243,0.000286,-0.000215,0.000268,0.000502,-0.001942,0.000397,-0.000508,0.000454,0.002374,-0.003286,-0.001277,0.000563
5,Artery_Tibial,-0.000435,-0.000512,-0.000139,0.000517,0.036324,0.000881,0.001112,0.000245,0.000281,3.6e-05,0.003442,9e-06,-0.000284,-0.001192,6.6e-05,-0.000677,-0.002129,-0.000384,-0.000723,-0.000137,0.0002827656,-6e-05,-7.172602e-05,-0.000658,-0.000159,-0.000637,4.5e-05,9.4e-05,-0.00077,-0.003187,-0.000147,-0.001155,-0.002518,0.006861,-0.006423,-0.001863,0.000442,3.782931e-05,0.00241,...,-1.562807e-07,0.00222,-0.002473,-0.00116,0.001497,-2.5e-05,-0.000207,4e-05,0.00014,3.8e-05,-0.000207,0.001685,0.000885,0.000118,0.000331,0.000134,-3.016349e-05,-0.000145,-0.000363,0.000984,-7.6e-05,0.001167,-0.0003338964,0.0001286846,0.00225,-0.0001036148,7.8e-05,5.8e-05,0.000255,0.000143,-7.9e-05,0.002314,0.000653,0.000197,0.000263,0.000694,9.1e-05,0.000562,7.8e-05,0.002966
6,Bladder,-0.000743,-0.001647,-0.000618,0.002068,0.03325,0.000573,0.005777,-0.000482,-0.001923,-0.005787,0.00307,-0.000133,-0.000729,-0.011777,-0.004036,-0.009571,-0.003338,0.000632,-0.000493,5e-06,0.0004473703,-0.002002,-9.515345e-05,-0.001513,-0.000836,-0.000985,9.9e-05,0.000524,-0.000507,-0.001333,-0.00041,-0.00105,-0.004998,0.008863,-0.007193,-0.002533,0.001243,4.257757e-05,0.001328,...,-0.000142133,0.003078,-0.000839,-0.002994,0.002819,-2.7e-05,0.000251,-8.7e-05,5e-06,-0.001148,-0.000704,0.000151,0.001164,0.00046,0.001402,-0.000982,-4.190789e-05,0.000127,-0.004412,0.004266,-0.000885,0.001628,-0.0005748796,0.0001597138,0.005357,-0.0001143726,-0.000298,0.00015,0.001135,-0.000285,0.000116,0.001683,-0.000704,0.000274,0.000605,0.001779,0.001272,0.000793,-0.002436,0.004332
7,Brain_Amygdala,-8.3e-05,0.004286,0.001175,-0.00357,0.065932,0.000763,-0.011594,0.000661,0.005723,0.010743,0.006611,0.000202,0.000761,0.035296,0.00041,0.010337,0.00287,0.00534,-0.001141,-8e-05,0.0003318697,0.00643,-8.111545e-05,0.001829,0.002617,0.002077,-0.000271,-0.001701,-0.002175,-0.006192,0.000278,-0.001861,0.002206,-0.007118,0.002175,0.002593,0.000805,-0.0002373438,0.003955,...,0.0002714675,-0.000377,0.0026,0.002067,-0.004468,0.000179,0.000349,0.000706,-0.003903,0.002297,0.00223,0.002493,-0.000106,-0.001068,-0.002825,0.00084,4.078452e-05,-0.000713,0.011214,-0.008396,0.001633,-0.001149,-1.181628e-05,0.0001800783,-0.010662,-0.0005810354,0.000991,-0.000466,0.001338,2.9e-05,-0.000452,0.002731,0.003013,-0.000631,-0.000996,-0.001983,-0.003476,0.004873,0.003132,0.000829
8,Brain_Anterior_cingulate_cortex_BA24,8.5e-05,0.003091,0.000994,-0.002385,0.051919,0.000719,-0.009321,0.000568,0.004823,0.010474,0.005337,0.000102,0.000745,0.033565,0.002721,0.012898,0.003726,0.001732,-0.000778,-8.8e-05,0.000292793,0.004895,-1.53582e-06,0.001475,0.002338,0.001698,-0.000197,-0.001327,-0.001172,-0.005224,0.000293,-0.001904,0.002895,-0.004969,0.002546,0.001953,0.000497,-0.0002246099,0.005112,...,0.0002162839,-0.001317,0.00101,0.002487,-0.003763,0.000141,-2e-06,0.000754,-0.002747,0.00221,0.001907,0.003286,-0.000226,-0.000677,-0.002369,0.000799,3.180332e-05,-0.000625,0.01011,-0.006964,0.00098,-0.000882,0.0001606316,4.470566e-05,-0.008188,-0.0003400072,0.000895,-0.000382,0.000308,0.000186,-0.000431,0.003408,0.002384,-0.000462,-0.000693,-0.002215,-0.002946,0.003022,0.002909,0.000234
9,Brain_Caudate_basal_ganglia,1.2e-05,0.000523,0.000769,-0.004408,0.095639,0.000627,-0.008349,0.00046,0.00495,0.011525,0.006021,0.000152,0.000891,0.034016,0.003478,0.009458,0.001861,-0.003254,-0.001147,-0.000184,0.0003177466,0.005277,-9.667806e-05,0.001683,0.002951,0.001501,-0.000121,-0.001506,-0.000944,-0.004772,0.000166,-0.000569,-0.001521,-0.004623,0.002616,0.001655,0.00114,-0.0002871651,0.005472,...,0.0003437493,-0.000491,0.0038,0.002294,-0.001796,9.2e-05,2.2e-05,0.000681,-0.003441,0.00243,0.002065,0.002,-0.000877,-0.001213,-0.002651,0.002637,5.544614e-05,-0.000758,0.008961,-0.007024,0.001664,-0.002692,-0.000151788,0.0001752249,-0.006396,-0.0003717273,0.001039,-0.000209,0.001031,0.000375,-0.000658,-0.000937,0.00298,-0.000663,-8.8e-05,-0.002081,-0.003101,0.002177,0.002379,0.000769


### Simulate the concequence of regulator knockout

In [0]:
# Simulate the concequence of regulator knockout
genes=["ENSG00000268903","ENSG00000239906"]
model_utils.coexpression_with_KO(genes,
                                 outloc,
                                 best_model,
                                 X_mRNA_test,
                                 X_promoter_test,
                                 Y_test)    

In [24]:
! Rscript --vanilla --slave functions/test_KO.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/regulator_KO/regression_res.txt',sep="\t")

Unnamed: 0,term,estimate,std.error,statistic,p.value
0,NR2F6,0.005235,0.000191,27.474442,2.954167e-160
1,CEBPA,0.004362,0.000191,22.893958,4.2386e-113
2,EGR1,-0.004215,0.000191,-22.115296,7.657080000000001e-106
3,ESR1,0.003547,0.00019,18.621147,4.186045e-76
4,SP5,-0.003146,0.000191,-16.50936,1.981776e-60
5,ZNF18,0.001565,0.00019,8.218735,2.312262e-16
6,RARA,0.001405,0.000191,7.373096,1.799603e-13
7,RUNX1,0.001354,0.00019,7.108905,1.249829e-12
8,ESRRA,0.001165,0.00019,6.1149,1.002391e-09
9,MYCN,-0.001102,0.00019,-5.78682,7.389296e-09


### Simulate the concequence of binding site removals

In [0]:
# Simulate the concequence of binding site removals
genes=["ENSG00000268903","ENSG00000239906"]
model_utils.coexpression_with_binding_site_removal(genes,
                                                   outloc,
                                                   best_model,
                                                   X_mRNA_test,
                                                   X_promoter_test,
                                                   Y_test)

In [26]:
! Rscript --vanilla --slave functions/test_interval.R "$outloc$best_model" &> /dev/null 
pd.read_csv(outloc+best_model+'/binding_site_removal/regression_res.txt',sep="\t")

Unnamed: 0,term,estimate,std.error,statistic,p.value,Gene
0,promoter_interval_10,3.881948e-03,5.611975e-05,6.917257e+01,0.000000e+00,ENSG00000268903
1,promoter_interval_12,3.592989e-03,5.606523e-05,6.408587e+01,0.000000e+00,ENSG00000268903
2,promoter_interval_15,6.851069e-03,5.609040e-05,1.221433e+02,0.000000e+00,ENSG00000268903
3,promoter_interval_30,-7.256894e-04,1.711670e-16,-4.239657e+12,0.000000e+00,ENSG00000239906
4,promoter_interval_14,-1.938777e-03,5.608864e-05,-3.456630e+01,2.216499e-247,ENSG00000268903
5,promoter_interval_28,1.410729e-03,5.609633e-05,2.514833e+01,2.308738e-135,ENSG00000268903
6,promoter_interval_27,1.057172e-03,5.607582e-05,1.885255e+01,6.297272e-78,ENSG00000268903
7,promoter_interval_11,5.054476e-04,5.607256e-05,9.014170e+00,2.348348e-19,ENSG00000268903
8,promoter_interval_16,4.797265e-04,5.607375e-05,8.555279e+00,1.348987e-17,ENSG00000268903
9,promoter_interval_29,3.466486e-04,5.608066e-05,6.181250e+00,6.608706e-10,ENSG00000268903
