In [1]:
import tensorflow as tf

import numpy as np
import requests as rq
import io, h5py

import matplotlib.pyplot as plt

In [2]:
data = rq.get('https://www.dropbox.com/s/c3umbo5y13sqcfp/synthetic_dataset.h5?raw=true')
data.raise_for_status()

with h5py.File(io.BytesIO(data.content), 'r') as dataset:
    x_train = np.array(dataset['X_train']).astype(np.float32).transpose([0, 2, 1])
    y_train = np.array(dataset['Y_train']).astype(np.float32)
    x_valid = np.array(dataset['X_valid']).astype(np.float32).transpose([0, 2, 1])
    y_valid = np.array(dataset['Y_valid']).astype(np.int32)
    x_test = np.array(dataset['X_test']).astype(np.float32).transpose([0, 2, 1])
    y_test = np.array(dataset['Y_test']).astype(np.int32)

ConnectionError: HTTPSConnectionPool(host='www.dropbox.com', port=443): Max retries exceeded with url: /s/c3umbo5y13sqcfp/synthetic_dataset.h5?raw=true (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x00000129FD958190>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

In [3]:
model = tf.keras.models.load_model(h5py.File(r'models/model.h5', 'r'))

In [4]:
model.evaluate(x_valid, y_valid)
model.summary()

NameError: name 'x_valid' is not defined

In [None]:
layer = 3        # activation layer for 1st convolutional layer
threshold = 0.5  # threshold for significant activations
window = 20      # window size of alignment 

# get feature maps of 1st convolutional layer after activation
intermediate = tf.keras.Model(inputs=model.inputs, outputs=model.layers[layer].output)
fmap = intermediate.predict(x_test)
num_filters = fmap.shape[-1]

# set the left and right window sizes
window_left = int(window/2)
window_right = window - window_left

N, L, A = x_test.shape

ppms = []
for filter_index in range(num_filters):

    # find regions above threshold
    coords = np.where(fmap[:,:,filter_index] > np.max(fmap[:,:,filter_index])*threshold)
    x, y = coords

    # sort score
    index = np.argsort(fmap[x,y,filter_index])[::-1]
    data_index = x[index].astype(int)
    pos_index = y[index].astype(int)

    # make a sequence alignment centered about each activation (above threshold)
    seq_align = []
    for i in range(len(pos_index)):

        # determine position of window about each filter activation
        start_window = pos_index[i] - window_left
        end_window = pos_index[i] + window_right

        # check to make sure positions are valid
        if (start_window > 0) & (end_window < L):
            seq = x_test[data_index[i], start_window:end_window, :]
            seq_align.append(seq)

    # calculate position probability matrix
    ppms.append(np.mean(seq_align, axis=0))
ppms = np.array(ppms)
x, y = np.where(ppms > 0.5)
print(y.shape)

In [19]:
for i in range(ppms.shape[0]):
    open(f'motifs/model-test/filter-{i+1}.txt', 'w+').close()

    motif = ppms[i]
    out = []

    out.append("MEME version 4\n\n")

    out.append("ALPHABET= ACGT\n\n")

    out.append("strands: + -\n\n")

    out.append("Background letter frequencies\n")
    out.append("A 0.25 C 0.25 G 0.25 T 0.25\n\n")

    out.append("MOTIF test-motif\n")
    out.append("letter-probability matrix: alength= 4 w= 20\n")

    for j in motif:
        out.append("%.4f %.4f %.4f %.4f\n" % (j[0], j[1], j[2], j[3]))

    with open(f'motifs/model-test/filter-{i+1}.txt', 'w') as file:
        file.writelines(out)