In [6]:
!pip install https://github.com/p-koo/tfomics/tarball/master

Collecting https://github.com/p-koo/tfomics/tarball/master
  Using cached https://github.com/p-koo/tfomics/tarball/master


# Imports

In [7]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

import numpy as np
import requests as rq
import os, io, h5py
import pickle as pk

from tfomics import moana
from tfomics.layers import MultiHeadAttention

# Retrieve Dataset

In [3]:
data = rq.get('https://www.dropbox.com/s/c3umbo5y13sqcfp/synthetic_dataset.h5?raw=true')
data.raise_for_status()

with h5py.File(io.BytesIO(data.content), 'r') as dataset:
    x_train = np.array(dataset['X_train']).astype(np.float32).transpose([0, 2, 1])
    y_train = np.array(dataset['Y_train']).astype(np.float32)
    x_valid = np.array(dataset['X_valid']).astype(np.float32).transpose([0, 2, 1])
    y_valid = np.array(dataset['Y_valid']).astype(np.int32)
    x_test = np.array(dataset['X_test']).astype(np.float32).transpose([0, 2, 1])
    y_test = np.array(dataset['Y_test']).astype(np.int32)

# Define & Train Models

In [22]:
category = "filters"
variants = [1, 16, 32, 64, 128, 256]

places = len(str(max(variants)))
names = [f"model-{str(variants[i]).zfill(places)}" for i in range(len(variants))]
print(names)

if not os.path.exists(f'models/{category}'):
    os.makedirs(f'models/{category}')
if not os.path.exists(f'motifs/{category}'):
    os.makedirs(f'motifs/{category}')

for i in range(len(variants)):
    # Input
    inputs = layers.Input(shape=(200, 4))

    # Convolutional Block
    nn = layers.Conv1D(filters=variants[i], kernel_size=19, use_bias=False, padding='same')(inputs)
    nn = layers.Activation('relu', name='conv_activation')(nn)
    nn = layers.MaxPool1D(pool_size=25)(nn)
    nn = layers.Dropout(0.1)(nn)

    # Positional Encoding
    positions = tf.range(nn.shape[1])
    context = layers.Embedding(input_dim=nn.shape[1], output_dim=nn.shape[2])(positions)
    nn = tf.add(nn, context)

    # Multi-Head Attention
    nn, weights = MultiHeadAttention(num_heads=16, d_model=64)(nn, nn, nn)
    nn = layers.Dropout(0.1)(nn)

    nn = layers.Flatten()(nn)

    # Feed Forward
    nn = layers.Dense(512, use_bias=False)(nn)
    nn = layers.BatchNormalization()(nn)
    nn = layers.Activation('relu')(nn)
    nn = layers.Dropout(0.5)(nn)

    # Output
    outputs = layers.Dense(12, activation='sigmoid')(nn)

    # Compile model
    model = Model(inputs=inputs, outputs=outputs, name=names[i])
    print('\n' + model.name)

    auroc = tf.keras.metrics.AUC(curve='ROC', name='auroc')
    aupr = tf.keras.metrics.AUC(curve='PR', name='aupr')
    model.compile(tf.keras.optimizers.Adam(0.0005), loss='binary_crossentropy', metrics=[auroc, aupr])
    
    # Train Model
    lr_decay = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_aupr', factor=0.2, patient=5, verbose=1, min_lr=1e-7, mode='max')
    model.fit(x_train, y_train, epochs=10, validation_data=(x_valid, y_valid), callbacks=[lr_decay], verbose=2, shuffle=True)
    
    # Save Model
    save_path = os.path.join('models', category, names[i] + '.h5')
    model.save_weights(save_path)
    
    #Extract PPMs
    index = [type(j) for j in model.layers].index(tf.keras.layers.Activation)
    
    ppms = moana.filter_activations(x_test, model, layer=index, window=20, threshold=0.5)
    ppms = moana.clip_filters(ppms, threshold=0.5, pad=3)
    
    moana.meme_generate(ppms, output_file=f'motifs/{category}/{names[i]}.txt', prefix='filter')
    
    break


['model-016', 'model-016', 'model-032', 'model-064', 'model-128', 'model-256']

model-016
Train on 21000 samples, validate on 3000 samples
Epoch 1/10
21000/21000 - 4s - loss: 0.4656 - auroc: 0.5317 - aupr: 0.1626 - val_loss: 0.3974 - val_auroc: 0.6145 - val_aupr: 0.2465
Epoch 2/10
21000/21000 - 3s - loss: 0.3792 - auroc: 0.6726 - aupr: 0.3547 - val_loss: 0.3416 - val_auroc: 0.7487 - val_aupr: 0.4414
Epoch 3/10
21000/21000 - 3s - loss: 0.3182 - auroc: 0.7928 - aupr: 0.5378 - val_loss: 0.2667 - val_auroc: 0.8697 - val_aupr: 0.6508
Epoch 4/10
21000/21000 - 3s - loss: 0.2756 - auroc: 0.8563 - aupr: 0.6480 - val_loss: 0.2554 - val_auroc: 0.8811 - val_aupr: 0.6865
Epoch 5/10
21000/21000 - 3s - loss: 0.2556 - auroc: 0.8821 - aupr: 0.6947 - val_loss: 0.2582 - val_auroc: 0.8809 - val_aupr: 0.6933
Epoch 6/10
21000/21000 - 3s - loss: 0.2401 - auroc: 0.8993 - aupr: 0.7290 - val_loss: 0.2183 - val_auroc: 0.9205 - val_aupr: 0.7614
Epoch 7/10
21000/21000 - 3s - loss: 0.2340 - auroc: 0.9046 - aupr: 0.