In [17]:
import importlib

import preprocess_data
importlib.reload(preprocess_data)
from preprocess_data import *

import process_data
importlib.reload(process_data)
from process_data import *

import constants
importlib.reload(constants)
from constants import *

import cnn_model
importlib.reload(cnn_model)
from cnn_model import create_cnn


X_FILES = [DATA_PATH + 'OD8_k36me3.chr3.bad_quality_tr.b25.bedgraph', DATA_PATH + 'OD8_k27ac.chr3.bad_quality_tr.b25.bedgraph', DATA_PATH + 'OD8_k4me3.chr3.bad_quality_tr.b25.bedgraph'] 
Y_FILE = DATA_PATH + 'OD8_k36me3.chr3.good_quality_tr.b25.bedgraph'
N_TRAIN_1 = 10000
MODEL_NAME_1 = 'OD8.k36me3.chr3.n_train_' + str(N_TRAIN_1) + '.h5'


HISTONE_TARGET = 'k36me3'
HELPERS = ['k27ac', 'k4me3']
CHROM_TRAIN = 'chr3'
N_TRAIN_2 = 10000
MODEL_NAME_2 = NAME_EXP + '.' + HISTONE_TARGET + '.' + CHROM_TRAIN + '.n_train_' + str(N_TRAIN_2) + '.h5'


BOUNDS_IMPL_1 = {'start': 4700000, 'end': 4800000}
X_FILES_IMPL = [DATA_PATH + 'OD8_k36me3.chr3.bad_quality_impl.b25.bedgraph', DATA_PATH + 'OD8_k27ac.chr3.bad_quality_impl.b25.bedgraph', DATA_PATH + 'OD8_k4me3.chr3.bad_quality_impl.b25.bedgraph']
Y_FILE_CHECK = DATA_PATH + 'OD8_k36me3.chr3.good_quality_check.b25.bedgraph'
MODEL_IMPL_NAME_1 = 'OD8.k36me3.chr3.n_train_10000.h5' 
OUT_BW_NAME_1 = 'OD8_k36me3.chr3.b25.prediction.bw'


BOUNDS_IMPL_2 = {'start': 4700000, 'end': 4800000}
HISTONE_IMPL = 'k36me3'
HELPERS_IMPL = ['k27ac', 'k4me3'] 
CHROM_IMPL = 'chr3' 
MODEL_IMPL_NAME_2 = 'OD8.k36me3.chr3.n_train_10000.h5' 
OUT_BW_NAME_2 = 'OD8_k36me3.chr3.b25.prediction.bw'

  
#training CNN steps
def train_cnn(preprocessing = False, 
              X_files = None, y_file = None, 
              histone_target = None, helpers = None, chrom_train = None, name_exp = NAME_EXP, quality_percent = 0.2,
              w = W, n_train = None, model_output_name = 'CNN.h5'):   
    if preprocessing == True:
        X_files, y_file = create_subs_data(histone_target, helpers, chrom_train, name_exp, 'train', quality_percent)
        print(X_files[0], SNR(X_files[0]))
        print(y_file, SNR(y_file))
    X_df, y_df = process_model_df(X_files, y_file, w, n_train)
    model = create_cnn(X_df, y_df , model_output_name)

    
#applying CNN steps
def apply_cnn(preprocessing = False,
              bounds_impl = None, 
              X_files_impl = None, y_file_impl = None, 
              histone_impl = None, helpers = None, chrom_impl = None, name_exp = NAME_EXP, quality_percent = 0.2,
              model_name = 'CNN.h5', output_bw_name = 'prediction.bw'):
    if preprocessing == True:
        X_files_impl, y_file_impl = create_subs_data(histone_impl, helpers, chrom_impl, name_exp, 'impl', quality_percent)
        print(X_files_impl[0], SNR(X_files_impl[0]))
        print(y_file_impl, SNR(y_file_impl))
    model_implementation(model_name, X_files_impl[0], X_files_impl[1:],  bounds_impl, y_file_impl, output_bw_name)
    

#traing CNN without data preprocessing
def train_wout_data_preprocessing(X_FILES_IMPL, Y_FILE, N_TRAIN_1, MODEL_NAME_1):    
    train_cnn(preprocessing = False, 
              X_files = X_FILES_IMPL, 
              y_file = Y_FILE, 
              n_train = N_TRAIN_1, 
              model_output_name = MODEL_NAME_1)

#traing CNN with data preprocessing
def train_w_data_preprocessing(HISTONE_TARGET, HELPERS, CHROM_TRAIN, N_TRAIN_2, MODEL_NAME_2):   
    train_cnn(preprocessing = True, 
              histone_target = HISTONE_TARGET, 
              helpers = HELPERS, 
              chrom_train = CHROM_TRAIN, 
              n_train = N_TRAIN_2, 
              model_output_name = MODEL_NAME_2)
    
#applying CNN without data preprocessing    
def apply_wout_data_preprocessing(X_FILES_IMPL, Y_FILE_CHECK, MODEL_IMPL_NAME_1, OUT_BW_NAME_1, bounds = None):      
    apply_cnn(preprocessing = False,
              bounds_impl = bounds, 
              X_files_impl = X_FILES_IMPL, y_file_impl = Y_FILE_CHECK,
              model_name = MODEL_IMPL_NAME_1,
              output_bw_name = OUT_BW_NAME_1)

#applying CNN with data preprocessing        
def apply_w_data_preprocessing(HISTONE_IMPL, HELPERS_IMPL, CHROM_IMPL, MODEL_IMPL_NAME_2, OUT_BW_NAME_2, bounds = None):      
    apply_cnn(preprocessing = True,
              bounds_impl = bounds, 
              histone_impl = HISTONE_IMPL, helpers = HELPERS_IMPL, chrom_impl = CHROM_IMPL,
              model_name = MODEL_IMPL_NAME_2,
              output_bw_name = OUT_BW_NAME_2)
    

In [19]:
train_w_data_preprocessing(HISTONE_TARGET, HELPERS, CHROM_TRAIN, N_TRAIN_2, MODEL_NAME_2)


In [20]:
train_wout_data_preprocessing(X_FILES, Y_FILE, N_TRAIN_1, MODEL_NAME_1)


In [21]:
apply_wout_data_preprocessing(X_FILES_IMPL, Y_FILE_CHECK, 
                              MODEL_IMPL_NAME_1, OUT_BW_NAME_1, 
                              bounds = BOUNDS_IMPL_1)


In [22]:
apply_w_data_preprocessing(HISTONE_IMPL, HELPERS_IMPL, CHROM_IMPL, 
                           MODEL_IMPL_NAME_2, OUT_BW_NAME_2, 
                           bounds = BOUNDS_IMPL_2)
