In [2]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive 

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
/gdrive


In [3]:
cd /gdrive/My\ Drive/nn 

/gdrive/My Drive/nn


In [4]:
%tensorflow_version 1.x
import argparse
import os
import pickle
import sys

import numpy as np

np.random.seed(7)  # for reproducibility

import tensorflow as tf
tf.random.set_random_seed(5005)

from sklearn.model_selection import train_test_split, KFold

# tf.python.control_flow_ops = tf


from tensorflow.python.keras.models import Model, load_model
from tensorflow.python.keras.layers import Input
from tensorflow.python.keras.layers import Dense, Flatten, Dropout
from tensorflow.python.keras.layers.convolutional import Conv1D
from tensorflow.python.keras.layers.pooling import MaxPooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow.python.keras.backend as K
from tensorflow.keras.utils import to_categorical

import matplotlib as mpl

mpl.use('Agg')
import utils
sys.path.append(".")
from utils import precision, recall, load_data_merged


TensorFlow 1.x selected.


Using TensorFlow backend.


In [0]:
def train_diff_model(data_path, res_path, model_name, input_len,
                     num_epoch, batchsize, model_path="./weights.hdf5", 
                     number_of_folds=1, save=True):
    """
    Training the model
    :param data_path: path to file (consist of train, valid and test data)
    :param res_path:
    :param model_name:
    :param input_len:
    :param num_epoch:
    :param batchsize:
    :param model_path:
    :return:
    """
    print('creating model')
    model = create_seq_model(input_len)
    print('compiling model')
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    checkpointer = ModelCheckpoint(filepath=model_path, verbose=1, save_best_only=True)
    earlystopper = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    print('loading data')
    x_train_list, y_train_list, x_valid_list, y_valid_list, x_test_seq, y_test = load_data_merged(data_path, input_len, kfold=number_of_folds)

    print('fitting the model')
    for i in range(len(x_train_list)):
      print("Using fold %s/%s" %(i+1, number_of_folds))
      x_train_seq = x_train_list[i]
      y_train = y_train_list[i]
      x_valid_seq = x_valid_list[i]
      y_valid = y_valid_list[i]

      history = model.fit(x_train_seq, y_train, epochs=num_epoch, batch_size=batchsize,
                          validation_data=(x_valid_seq, y_valid), verbose=2,
                          callbacks=[checkpointer, earlystopper, ])  # tb])

    if save:
      print('saving the model')
      model.save(os.path.join(res_path, model_name + ".h5"))

    print('testing the model')
    score = model.evaluate(x_test_seq, y_test)

    for i in range(len(model.metrics_names)):
        print(str(model.metrics_names[i]) + ": " + str(score[i]))

In [0]:
train_diff_model(data_path=r"dataset/classifier_data_ccpg1.pkl", 
                 res_path="./models", model_name="150cpg", model_path="./models/temp/1",
                 input_len=150, num_epoch=20, batchsize=128, number_of_folds=10,save=True)

**Models**

In [0]:
def deepripe_model(input_len):
    K.clear_session()
    tf.random.set_random_seed(5005)

    input_node = Input(shape=(input_len, 4), name="input")
    conv1 = Conv1D(filters=90, kernel_size=7, padding='valid', activation="relu", name="conv1")(input_node)
    pool1 = MaxPooling1D(pool_size=4, strides=2, name="pool1")(conv1)
    drop1 = Dropout(0.25, name="drop1")(pool1)

    conv2 = Conv1D(filters=100, kernel_size=5, padding='valid', activation="relu", name="conv2")(drop1)
    pool2  = MaxPooling1D(pool_size=10, strides=5)(conv2)
    drop2  = Dropout(0.25)(pool2)
    flat = Flatten()(drop2)

    hidden1 = Dense(250, activation='relu', name="hidden1")(flat)
    output = Dense(1, activation='sigmoid', name="output")(hidden1)
    model = Model(inputs=[input_node], outputs=output)
    print(model.summary())
    return model



In [0]:
def model_baseline(input_len):
    K.clear_session()
    tf.random.set_random_seed(5005)

    input_node = Input(shape=(input_len, 4), name="input")
    conv1 = Conv1D(filters=90, kernel_size=7, padding='valid', activation="relu", name="conv1")(input_node)
    pool1 = MaxPooling1D(pool_size=4, strides=2, name="pool1")(conv1)
    drop1 = Dropout(0.25, name="drop1")(pool1)
  
    conv2 = Conv1D(filters=100, kernel_size=5, padding='valid', activation="relu", name="conv2")(drop1)
    pool2 = MaxPooling1D(pool_size=10, strides=5)(conv2)
    drop2 = Dropout(0.25)(pool2)
    flat = Flatten()(drop2)

    hidden1 = Dense(500, activation='relu', name="hidden1")(flat)
    output = Dense(1, activation='sigmoid', name="output")(hidden1)
    model = Model(inputs=[input_node], outputs=output)
    print(model.summary())
    return model