In [8]:
import numpy as np
import pandas as pd
import torch
import torch
import torch.nn as nn
import torch.nn.functional as F

In [9]:
from keras.models import Model
from keras.layers import Input, GRU, Bidirectional
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.layers import Multiply


In [10]:
def grna_preprocess(lines):
    length = 23
    data_n = len(lines)
    seq = np.zeros((data_n, length, 4), dtype=int)
    for l in range(data_n):
        data = lines[l]
        seq_temp = data
        for i in range(length):
            if seq_temp[i] in "Aa":
                seq[l, i, 0] = 1
            elif seq_temp[i] in "Cc":
                seq[l, i, 1] = 1
            elif seq_temp[i] in "Gg":
                seq[l, i, 2] = 1
            elif seq_temp[i] in "Tt":
                seq[l, i, 3] = 1
    return seq


def epi_preprocess(lines):
    length = 23
    data_n = len(lines)
    epi = np.zeros((data_n, length), dtype=int)
    for l in range(data_n):
        data = lines[l]
        epi_temp = data
        for i in range(length):
            if epi_temp[i] in "A":
                epi[l, i] = 1
            elif epi_temp[i] in "N":
                epi[l, i] = 0
    return epi

def preprocess(file_path, usecols):
    data = pd.read_csv(file_path, usecols=usecols)
    data = np.array(data)
    epi_1, epi_2, epi_3, epi_4 = epi_preprocess(data[:, 0]), epi_preprocess(data[:, 1]), epi_preprocess(data[:, 2]), epi_preprocess(data[:, 3])
    epi = []
    for i in range(len(data)):
        epi_1_temp, epi_2_temp, epi_3_temp, epi_4_temp = pd.DataFrame(epi_1[i]), pd.DataFrame(epi_2[i]), pd.DataFrame(
            epi_3[i]), pd.DataFrame(epi_4[i])
        epi_temp = pd.concat([epi_1_temp, epi_2_temp, epi_3_temp, epi_4_temp], axis=1)
        epi_temp = np.array(epi_temp)
        epi.append(epi_temp)
    epi = np.array(epi)
    return epi

def load_data(test_file):
    test_data = pd.read_csv(test_file, usecols=[4, 9])
    test_data = np.array(test_data)
    x_test, y_test = test_data[:, 0], test_data[:, 1]
    x_test = grna_preprocess(x_test)
    epi_test = preprocess(test_file, [5, 6, 7, 8])
    y_test = y_test.reshape(len(y_test), -1)
    return x_test, epi_test, y_test

In [11]:
test_file = "data/input_example.csv"
result_file = "result/output_example.csv"

In [None]:
# original keras implementation
# seq_input = Input(shape=(23, 4))
# seq_conv1 = Convolution1D(256, 5, kernel_initializer='random_uniform', name='seq_conv1')(seq_input)
# seq_act1 = Activation('relu')(seq_conv1)
# seq_pool1 = MaxPooling1D(2)(seq_act1)
# seq_drop1 = Dropout(0.2)(seq_pool1)
# gru1 = Bidirectional(GRU(256, kernel_initializer='he_normal', dropout=0.3, recurrent_dropout=0.2), name='gru1')(seq_drop1)
# seq_dense1 = Dense(256, name='seq_dense1')(gru1)
# seq_act2 = Activation('relu')(seq_dense1)
# seq_drop2 = Dropout(0.3)(seq_act2)
# seq_dense2 = Dense(128, name='seq_dense2')(seq_drop2)
# seq_act3 = Activation('relu')(seq_dense2)
# seq_drop3 = Dropout(0.2)(seq_act3)
# seq_dense3 = Dense(64, name='seq_dense3')(seq_drop3)
# seq_act4 = Activation('relu')(seq_dense3)
# seq_drop4 = Dropout(0.2)(seq_act4)
# seq_dense4 = Dense(40, name='seq_dense4')(seq_drop4)
# seq_act5 = Activation('relu')(seq_dense4)
# seq_drop5 = Dropout(0.2)(seq_act5)

# epi_input = Input(shape=(23, 4))
# epi_conv1 = Convolution1D(256, 5, name='epi_conv1')(epi_input)
# epi_act1 = Activation('relu')(epi_conv1)
# epi_pool1 = MaxPooling1D(2)(epi_act1)
# epi_drop1 = Dropout(0.3)(epi_pool1)
# epi_dense1 = Dense(256, name='epi_dense1')(epi_drop1)
# epi_act2 = Activation('relu')(epi_dense1)
# epi_drop2 = Dropout(0.2)(epi_act2)
# epi_dense2 = Dense(128, name='epi_dense2')(epi_drop2)
# epi_act3 = Activation('relu')(epi_dense2)
# epi_drop3 = Dropout(0.3)(epi_act3)
# epi_dense3 = Dense(64, name='epi_dense3')(epi_drop3)
# epi_act4 = Activation('relu')(epi_dense3)
# epi_drop4 = Dropout(0.3)(epi_act4)
# epi_act5 = Dense(40, name='epi_dense4')(epi_drop4)
# epi_out = Activation('relu')(epi_act5)

# seq_epi_m = Multiply()([seq_drop5, epi_out])
# seq_epi_drop = Dropout(0.2)(seq_epi_m)
# seq_epi_flat = Flatten()(seq_epi_drop)
# seq_epi_output = Dense(1, activation='linear')(seq_epi_flat)
# model = Model(inputs=[seq_input, epi_input], outputs=[seq_epi_output])

In [None]:
class EPIModel(nn.Module):
    def __init__(self):
        super(EPIModel, self).__init__()

        self.epi_conv1 = nn.Conv1d(in_channels=4, out_channels=256, kernel_size=5)
        self.epi_act1 = nn.ReLU()
        self.epi_pool1 = nn.MaxPool1d(kernel_size=2)
        self.epi_drop1 = nn.Dropout(p=0.3)
        self.epi_dense1 = nn.Linear(256, 256)
        self.epi_act2 = nn.ReLU()
        self.epi_drop2 = nn.Dropout(p=0.2)
        self.epi_dense2 = nn.Linear(256, 128)
        self.epi_act3 = nn.ReLU()
        self.epi_drop3 = nn.Dropout(p=0.3)

        self.epi_dense3 = nn.Linear(128, 64)
        self.epi_act4 = nn.ReLU()
        self.epi_drop4 = nn.Dropout(p=0.3)

        self.epi_dense4 = nn.Linear(64, 40)
        self.epi_act5 = nn.ReLU()

    def forward(self, x):
        x = self.epi_conv1(x)
        x = self.epi_act1(x)
        x = self.epi_pool1(x)
        x = self.epi_drop1(x)
        x = self.epi_dense1(x)
        x = self.epi_act2(x)
        x = self.epi_drop2(x)
        x = self.epi_dense2(x)
        x = self.epi_act3(x)
        x = self.epi_drop3(x)
        x = self.epi_dense3(x)
        x = self.epi_act4(x)
        x = self.epi_drop4(x)
        x = self.epi_dense4(x)
        x = self.epi_act5(x)

        return x

class SeqModel(nn.Module):
    def __init__(self):
        super(SeqModel, self).__init__()
        self.seq_conv1 = nn.Conv1d(4, 256, kernel_size=5)
        nn.init.uniform_(self.seq_conv1.weight, -0.05, 0.05)

        self.seq_pool1 = nn.MaxPool1d(kernel_size=2)

        self.gru1 = nn.GRU(256, 256, bidirectional=True, dropout=0.3, batch_first=True,)
        nn.init.kaiming_uniform_(self.gru1.weight_ih_l0)
        nn.init.kaiming_uniform_(self.gru1.weight_hh_l0)
        nn.init.constant_(self.gru1.bias_ih_l0, 0)
        nn.init.constant_(self.gru1.bias_hh_l0, 0)
        nn.init.kaiming_uniform_(self.gru1.weight_ih_l0_reverse)
        nn.init.kaiming_uniform_(self.gru1.weight_hh_l0_reverse)
        nn.init.constant_(self.gru1.bias_ih_l0_reverse, 0)
        nn.init.constant_(self.gru1.bias_hh_l0_reverse, 0)

        self.seq_dense1 = nn.Linear(512, 256)
        self.seq_dense2 = nn.Linear(256, 128)
        self.seq_dense3 = nn.Linear(128, 64)
        self.seq_dense4 = nn.Linear(64, 40)
        self.dropout1 = nn.Dropout(0.2)
        self.dropout2 = nn.Dropout(0.3)
        self.dropout3 = nn.Dropout(0.2)
        self.dropout4 = nn.Dropout(0.2)
        self.dropout5 = nn.Dropout(0.2)

    def forward(self, x):
        x = self.seq_conv1(x)
        x = F.relu(x)
        x = self.seq_pool1(x)
        x = self.dropout1(x)
        x, _ = self.gru1(x)
        x = self.seq_dense1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.seq_dense2(x)
        x = F.relu(x)
        x = self.dropout3(x)
        x = self.seq_dense3(x)
        x = F.relu(x)
        x = self.dropout4(x)
        x = self.seq_dense4(x)
        x = F.relu(x)
        x = self.dropout5(x)
        return x

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.seq_model = SeqModel()
        self.epi_model = EPIModel()
        self.dropout = nn.Dropout(0.2)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(400, 1)


    def forward(self, x):
        seq_out = self.seq_model(x)
        epi_out = self.epi_model(x)
        seq_epi_m = seq_out * epi_out
        seq_epi_drop = self.dropout(seq_epi_m)
        seq_epi_flat = self.flatten(seq_epi_drop)
        seq_epi_output =  self.linear(seq_epi_flat)
        return seq_epi_output



In [None]:
model = MyModel()

In [None]:
print("Loading weights for the models")
model.load_weights('weights/C_RNNCrispr_weights.h5')

print("Loading test data")
x_test, epi_test, y_test = load_data(test_file)

print("Predicting on test data")
y_test = pd.DataFrame(y_test)
y_pred = model([x_test, epi_test], batch_size=256, verbose=2)
y_pred = pd.DataFrame(y_pred)

result = pd.concat([y_test, y_pred], axis=1)
result.to_csv(result_file, index=False, sep=',', header=['y_test', 'y_pred'])
