In [None]:
import numpy as np

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
from igrads import integrated_gradients

In [None]:
base2int = {'A': 0, 'C': 1, 'G': 2, 'T': 3}

def load_fasta(fasta):
    with open(fasta) as f:
        for line in f:
            if line[0] != '>':
                raise ValueError('Header not found.')
            header = line.strip()[1:]
            sequence = f.readline().strip()
            sequence = np.array([base2int.get(b, 999) for b in sequence])
            sequence = tf.one_hot(sequence, depth = 4)
            yield sequence

In [None]:
positives = [s for s in load_fasta('./data/positive.fasta')]
negatives = [s for s in load_fasta('./data/negative.fasta')]

In [None]:
X = np.stack(positives + negatives)

In [None]:
X.shape

In [None]:
Y = np.hstack([np.ones((len(positives), )), np.zeros((len(negatives), ))])

In [None]:
Y.shape

In [None]:
model = tf.keras.models.Sequential()
model.add(layers.Input((101, 4)))
model.add(layers.Conv1D(64, 10, activation='relu', padding='same'))
model.add(layers.MaxPool1D(2))
model.add(layers.Conv1D(64, 3, activation='relu', padding='same'))
model.add(layers.MaxPool1D(2))
model.add(layers.Conv1D(64, 3, activation='relu', padding='same'))
model.add(layers.MaxPool1D(2))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', metrics = ["accuracy"])
model.summary()

In [None]:
#history = model.fit(x = X, y = Y, epochs = 2, shuffle=True, batch_size=128)
#model.save('model.h5')

model = tf.keras.models.load_model('model.h5')

In [None]:
X = positives = [s for s in load_fasta('./data/sequences.fasta')]

In [None]:
attributions = []
for inputs in X[:5]:
    inputs = tf.constant(inputs, dtype=tf.float32)
    attribution = integrated_gradients(inputs, model).numpy()
    attributions.append(attribution)

In [None]:
#

In [None]:
from igrads import plot_sequence_attribution

In [None]:
for attribution in attributions:
    plot_sequence_attribution(attribution)

In [None]:
attribution

In [None]:
import pandas as pd
import logomaker

In [None]:
def plot_dna_attribution(attribution_matrix):
    attribution_df = pd.DataFrame(attribution_matrix, columns=['A', 'C', 'G', 'T'])

    # create Logo object
    attribution_logo = logomaker.Logo(attribution_df,
                            shade_below=.5,
                            fade_below=.5,
                            font_name='Arial Rounded MT Bold')

    # style using Logo methods
    attribution_logo.style_spines(visible=False)
    attribution_logo.style_spines(spines=['left', 'bottom'], visible=True)
    attribution_logo.style_xticks(rotation=90, fmt='%d', anchor=0)

    # style using Axes methods
    attribution_logo.ax.set_ylabel("IG Attribution", labelpad=-1)
    attribution_logo.ax.xaxis.set_tick_params(which='both', bottom=False, top=False, labelbottom=False)

In [None]:
plot_dna_attribution(attribution.numpy())