In [3]:
import process_data
import constants



Using TensorFlow backend.


In [19]:
'''
Specify parameters and launch function to train CNN with data preprocessing

:param HISTONE_TARGET: str - name of the target trainig histine modification
:param HELPERS: list of str - several histone modifications for model prediction quality improvement
:param CHROM_TRAIN: str - name of chromosome for training
:param N_TRAIN_2: int - the amount of data for training
:param MODEL_NAME_2: str - the output name for the model

:result: the model 'MODEL_NAME_2.h5' in the 'CODA_PATH/models' directory
'''

HISTONE_TARGET = 'k36me3'
HELPERS = ['k27ac', 'k4me3']
CHROM_TRAIN = 'chr3'
N_TRAIN_2 = 10000
MODEL_NAME_2 = NAME_EXP + '.' + HISTONE_TARGET + '.' + CHROM_TRAIN + '.n_train_' + str(N_TRAIN_2) + '.h5'

train_w_data_preprocessing(HISTONE_TARGET, HELPERS, CHROM_TRAIN, N_TRAIN_2, MODEL_NAME_2)


In [20]:
'''
Specify parameters and launch function to train CNN without data preprocessing

:param X_FILES: array with directions to data files(.bedggraph) - the first one is target for quallity improvement, other are helpers
:param Y_FILE: direction to data file(.bedggraph) - good quality track for CNN training
:param N_TRAIN_1: int - the amount of data for training
:param MODEL_NAME_1: str - the output name for the model

:result: the model 'MODEL_NAME_1.h5' in the 'CODA_PATH/models' directory
'''

X_FILES = [DATA_PATH + 'OD8_k36me3.chr3.bad_quality_tr.b25.bedgraph', DATA_PATH + 'OD8_k27ac.chr3.bad_quality_tr.b25.bedgraph', DATA_PATH + 'OD8_k4me3.chr3.bad_quality_tr.b25.bedgraph'] 
Y_FILE = DATA_PATH + 'OD8_k36me3.chr3.good_quality_tr.b25.bedgraph'
N_TRAIN_1 = 10000
MODEL_NAME_1 = 'OD8.k36me3.chr3.n_train_' + str(N_TRAIN_1) + '.h5'

train_wout_data_preprocessing(X_FILES, Y_FILE, N_TRAIN_1, MODEL_NAME_1)



In [4]:
'''
Specify parameters and launch function to apply CNN without data preprocessing 

:param BOUNDS_IMPL_1: dict, for example, BOUNDS_IMPL_1 = {'start': int, 'end': int} or BOUNDS_IMPL_2 = None - basepairs bounds for implementation
:param X_FILES_IMPL: list of str - directions to data files(.bedggraph), the first one is target for quallity improvement, other are helpers
:param Y_FILE_CHECK: str - direction to data file(.bedgraph) of good quality track for the comparison with the result
:param MODEL_IMPL_NAME_1: str - pre-trained model
:param OUT_NAME_1: str - the output name for bedgraph and bigwig

:result: files 'OUT_NAME_2.bedgraph' and 'OUT_NAME_2.bw' in 'CODA_PATH/output' directory
'''

BOUNDS_IMPL_1 = {'start': 4700000, 'end': 4700300}
X_FILES_IMPL = [DATA_PATH + 'OD8_k36me3.chr3.bad_quality_impl.b25.bedgraph', DATA_PATH + 'OD8_k27ac.chr3.bad_quality_impl.b25.bedgraph', DATA_PATH + 'OD8_k4me3.chr3.bad_quality_impl.b25.bedgraph']
Y_FILE_CHECK = DATA_PATH + 'OD8_k36me3.chr3.good_quality_check.b25.bedgraph'
MODEL_IMPL_NAME_1 = 'OD8.k36me3.chr3.n_train_10000.h5' 
OUT_NAME_1 = 'OD8_k36me3.chr3.b25.prediction'

apply_wout_data_preprocessing(X_FILES_IMPL, Y_FILE_CHECK, 
                              MODEL_IMPL_NAME_1, OUT_NAME_1, 
                              bounds = BOUNDS_IMPL_1)


!! OD8_k36me3.chr3.b25.prediction
/Users/dashabalashova/0_python_projects/DCNN_3/data/OD8_k36me3.chr3.bad_quality_impl.b25.sub_4700000_4700300.bedgraph
/Users/dashabalashova/0_python_projects/DCNN_3/data/OD8_k27ac.chr3.bad_quality_impl.b25.bedgraph
/Users/dashabalashova/0_python_projects/DCNN_3/data/OD8_k4me3.chr3.bad_quality_impl.b25.bedgraph


100% (9 of 9) |##########################| Elapsed Time: 0:00:01 Time:  0:00:01


/Users/dashabalashova/0_python_projects/DCNN_3/data/OD8_k36me3.chr3.good_quality_check.b25.bedgraph


100% (9 of 9) |##########################| Elapsed Time: 0:00:00 Time:  0:00:00


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


 11% (1 of 9) |##                        | Elapsed Time: 0:00:00 ETA:  00:00:00

OD8_k36me3.chr3.b25.prediction
OD8_k36me3.chr3.b25.prediction
called
/Users/dashabalashova/0_python_projects/DCNN_3/data/OD8_k36me3.chr3.bad_quality_impl.b25.sub_4700000_4700300.bedgraph


100% (9 of 9) |##########################| Elapsed Time: 0:00:00 Time:  0:00:00


low quality signal/noise = 2.285714285714286
good quality signal/noise = 2.7837837837837838
prediction signal/noise = 1.1562970139434308


In [22]:
'''
Specify parameters and launch function to apply CNN with data preprocessing 

:param BOUNDS_IMPL_2: dict, for example, BOUNDS_IMPL_2 = {'start': int, 'end': int} or BOUNDS_IMPL_2 = None - basepairs bounds for implementation
:param HISTONE_IMPL: str - the target applying histine modification
:param HELPERS_IMPL: list of str - histone modifications(same as in trained model) for model prediction quality improvement
:param CHROM_IMPL: str - name of the chromosome for implementation
:param MODEL_IMPL_NAME_2: str - pre-trained model
:param OUT_NAME_2: str - the output name for bedgraph and bigwig

:result: files 'OUT_NAME_2.bedgraph' and 'OUT_NAME_2.bw' in 'CODA_PATH/output' directory
'''

BOUNDS_IMPL_2 = {'start': 4700000, 'end': 4800000}
HISTONE_IMPL = 'k36me3'
HELPERS_IMPL = ['k27ac', 'k4me3'] 
CHROM_IMPL = 'chr3' 
MODEL_IMPL_NAME_2 = 'OD8.k36me3.chr3.n_train_10000.h5' 
OUT_NAME_2 = 'OD8_k36me3.chr3.b25.prediction'

apply_w_data_preprocessing(HISTONE_IMPL, HELPERS_IMPL, CHROM_IMPL, 
                           MODEL_IMPL_NAME_2, OUT_NAME_2, 
                           bounds = BOUNDS_IMPL_2)
