In [None]:
!pip install https://github.com/p-koo/tfomics/tarball/master

# Imports

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

import numpy as np
import requests as rq
import io, h5py
import pickle as pk

from tfomics import moana

# Retrieve Dataset

In [None]:
data = rq.get('https://www.dropbox.com/s/c3umbo5y13sqcfp/synthetic_dataset.h5?raw=true')
data.raise_for_status()

with h5py.File(io.BytesIO(data.content), 'r') as dataset:
    x_train = np.array(dataset['X_train']).astype(np.float32).transpose([0, 2, 1])
    y_train = np.array(dataset['Y_train']).astype(np.float32)
    x_valid = np.array(dataset['X_valid']).astype(np.float32).transpose([0, 2, 1])
    y_valid = np.array(dataset['Y_valid']).astype(np.int32)
    x_test = np.array(dataset['X_test']).astype(np.float32).transpose([0, 2, 1])
    y_test = np.array(dataset['Y_test']).astype(np.int32)

# Connect to Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Define Models

In [None]:
category = "pools"
variants = [1, 5, 15, 25, 50, 100]

models = []

for i in range(len(variants)):
    # Input
    inputs = layers.Input(shape=(200, 4))

    # Convolutional Block
    nn = layers.Conv1D(filters=32, kernel_size=19, use_bias=False, padding='same')(inputs)
    nn = layers.Activation('relu', name='conv_activation')(nn)
    nn = layers.MaxPool1D(pool_size=variants[i])(nn)
    nn = layers.Dropout(0.1)(nn)

    # Positional Encoding
    positions = tf.range(nn.shape[1])
    context = layers.Embedding(input_dim=positions.shape[0], output_dim=nn.shape[2])(positions)
    nn = tf.add(nn, context)

    # Multi-Head Attention
    nn, weights = layers.MultiHeadAttention(num_heads=16, key_dim=64)(nn, nn, return_attention_scores=True)
    nn = layers.Dropout(0.1)(nn)
    nn = layers.LayerNormalization()(nn)

    nn = layers.Flatten()(nn)

    # Feed Forward
    nn = layers.Dense(512, use_bias=False)(nn)
    nn = layers.BatchNormalization()(nn)
    nn = layers.Activation('relu')(nn)
    nn = layers.Dropout(0.5)(nn)

    # Output
    outputs = layers.Dense(12, activation='sigmoid')(nn)

    # Compile model
    model = Model(inputs=inputs, outputs=outputs, name=f"model-{variants[i]}")

    auroc = tf.keras.metrics.AUC(curve='ROC', name='auroc')
    aupr = tf.keras.metrics.AUC(curve='PR', name='aupr')
    model.compile(tf.keras.optimizers.Adam(0.0005), loss='binary_crossentropy', metrics=[auroc, aupr])
    
    models.append(model)

# Train Models

In [None]:
for i in range(len(models)):
    lr_decay = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_aupr', factor=0.2, patient=5, verbose=1, min_lr=1e-7, mode='max')
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=f'/content/drive/MyDrive/Colab Notebooks/ConvAttData/models/{category}/{models[i].name}.h5', monitor='val_aupr')

    models[i].fit(x_train, y_train, epochs=75, validation_data=(x_valid, y_valid), callbacks=[lr_decay, checkpoint], verbose=1)

# Extract PPMs

In [None]:
for i in range(len(models)):
    index = [type(j) for j in models[i].layers].index(tf.keras.layers.Activation)
    
    ppms = moana.filter_activations(x_test, models[i], layer=index, window=20, threshold=0.5)
    ppms = moana.clip_filters(ppms, threshold=0.5, pad=3)
    
    moana.meme_generate(ppms, output_file=f'/content/drive/MyDrive/Colab Notebooks/ConvAttData/motifs/{category}/{models[i].name}.meme', prefix='filter')