In [2]:
import argparse
import h5py
import os
import sys

from keras.models import model_from_json
import keras.backend as K

sys.path.insert(0, os.path.abspath('../../Evolutron'))
from evolutron.engine import DeepTrainer
from evolutron.tools import load_dataset, Handle, shape, data_it
from evolutron.motifs import motif_extraction
from evolutron.networks import custom_layers

In [39]:
# First load model architecture
filename = '../models/mycoplasma/200_30_29_1_2_DeepCoFAM.model'
hf = h5py.File(filename)
model_config = hf.attrs['model_config'].decode('utf8')
hf.close()

In [40]:
net = DeepTrainer(model_from_json(model_config, custom_objects=custom_layers))

In [45]:
filename2 = 'models/mycoplasma/200_30_29_1_2_DeepCoFAM.model'
handle = Handle.from_filename(filename2)

# Then load model parameters
net.load_all_param_values(filename)

data_id = handle.dataset

x_data, y_data = load_dataset(data_id, padded=True)

Dataset size: 76291


In [41]:
conv_layers = net.get_conv_layers()

conv_layer = conv_layers[0]

conv_scores = conv_layer.output  # Changed from -1 to 0

boolean_mask = K.any(K.not_equal(net.input, 0.0), axis=-1, keepdims=True)
conv_scores = conv_scores * K.cast(boolean_mask, K.floatx())

# Compile function that spits out the outputs of the correct convolutional layer
motif_fun = K.function([net.input], [conv_scores])

In [42]:
filters = conv_layer.filters
kernel_size = conv_layer.kernel_size[0]
depth = 0
vf= kernel_size + depth * (kernel_size - 1)

In [49]:
import numpy as np
max_seq_scores = []
for x_part in data_it(x_data, 5000):
    seq_scores = np.squeeze(motif_fun([x_part]), 0) 

    # For every filter, keep max and argmax for each input protein
    max_seq_scores.append(np.asarray([np.vstack((np.max(x, 0), np.argmax(x, 0))) for x in seq_scores]))

    del seq_scores

KeyboardInterrupt: 

In [52]:
max_seq_scores = np.concatenate(max_seq_scores).transpose((2, 0, 1))

In [53]:
max_seq_scores.shape

(200, 10000, 2)