In [1]:
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

In [2]:
ATCG = { 'A' : 0, 'T' : 1, 'C' : 2, 'G' : 3}
xdim = len(ATCG)

In [3]:
def atcg(x) : return ATCG[x]

In [4]:
def readfile(fname, todig) :
    f = open(fname, "r")
    data = np.array([ list(line[:-1]) for line in list(f) ])
    np.random.shuffle(data)
    X = to_categorical(np.vectorize(todig)(data[:,:-1]))
    Y = tf.strings.to_number(data[:,-1])
    return X, Y

In [5]:
start_pos = 18

In [6]:
X, Y = readfile('data/promoters-48+1.txt', atcg)

In [7]:
X.shape

(16455, 50, 4)

In [8]:
Y.shape

TensorShape([16455])

In [9]:
Y

<tf.Tensor: shape=(16455,), dtype=float32, numpy=array([0., 0., 0., ..., 0., 1., 0.], dtype=float32)>

In [10]:
timesteps = X.shape[1]

In [11]:
opt = Adam(lr = 0.0001)

In [12]:
model = Sequential()
model.add(LSTM(64, input_dim = xdim))
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['accuracy'])

In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 64)                17664     
_________________________________________________________________
dense (Dense)                (None, 1)                 65        
Total params: 17,729
Trainable params: 17,729
Non-trainable params: 0
_________________________________________________________________


In [14]:
model.fit(X[:,start_pos:], Y, epochs = 100, batch_size = 64, verbose = 0)

<tensorflow.python.keras.callbacks.History at 0x7f87ba361b10>

In [15]:
model.evaluate(X[:,start_pos:])



[0.0, 0.0]

In [16]:
keras.models.save_model(model, 'promoters_50_start_18_LSTM_64.h5')

In [17]:
load_model = keras.models.load_model('promoters_50_start_18_LSTM_64.h5')

In [18]:
load_model.evaluate(X[:,start_pos:])



[0.0, 0.0]