In [1]:
import numpy as np
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Input
from keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPool2D
from keras.callbacks import LearningRateScheduler, EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Data

In [2]:
X_train = np.load('./dataset-n20-X-reshaped-train.npy')
X_validate = np.load('./dataset-n20-X-reshaped-validate.npy')
y_train = np.load('./dataset-n20-y-reshaped-train.npy')
y_validate = np.load('./dataset-n20-y-reshaped-validate.npy')

In [4]:
print(X_train[0])
print(y_train[0])

[[[ 0  0  0 58  1]]

 [[59  0  0  0  0]]

 [[ 2  0 54  0  3]]

 [[ 0  1  0  0 58]]

 [[ 6  1  7  2 43]]

 [[56  0  0  1  2]]

 [[ 0  0  6  0 53]]

 [[ 1  0  0  0 58]]

 [[ 0  2  0  0 57]]

 [[ 2  0 53  0  4]]

 [[58  0  0  0  1]]

 [[ 0  1  9  0 49]]

 [[ 0  4 54  0  1]]

 [[ 0 57  1  0  1]]

 [[12  1  0  0 46]]

 [[50  0  0  9  0]]

 [[ 0  0  1 52  6]]

 [[ 0  6  0  4 49]]

 [[ 0  2  0 53  4]]

 [[ 0  0  1  0 58]]

 [[ 0 56  0  1  2]]

 [[ 0  0 11  0 48]]

 [[ 0  6 53  0  0]]

 [[ 0 58  0  0  1]]

 [[56  0  0  0  3]]

 [[ 2  0 58  0  0]]

 [[ 0  2  4  0 54]]

 [[ 0 58  1  0  1]]

 [[ 0  0 48  1 11]]

 [[ 1  0 59  0  0]]

 [[ 0  7 13  0 40]]

 [[ 6  0 53  0  1]]

 [[52  0  5  0  3]]

 [[ 0  1  9  0 50]]

 [[58  1  0  1  0]]

 [[ 9 17  0  0 34]]

 [[ 0  1  0  1 58]]

 [[ 1 49  1  0  9]]

 [[ 0 58  0  0  2]]

 [[ 3  0 11  0 46]]

 [[ 1 58  0  0  1]]]
[0. 0. 0. 0. 0. 0.]


In [5]:
print(X_train.shape)
print(y_train.shape)

(27981697, 41, 1, 5)
(27981697, 6)


In [7]:
print(y_train[:10])

[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


## Investigate pileups

In [15]:
X_pileups = np.load('/home/diplomski-rad/blade/pb/racon-hax-pileups/e-coli-NCTC86-racon-MSA-test-bug-fix/pileups-X-0.npy')
y_pileups = np.load('/home/diplomski-rad/blade/pb/racon-hax-pileups/e-coli-NCTC86-racon-MSA-test-bug-fix/pileups-y-0.npy')

print(X_pileups.shape)
print(y_pileups.shape)

print(y_pileups[:10])

(8429809, 5)
(8429809, 7)
[[1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]]


In [16]:
print(X_pileups[0])

[36  0  0  0  0]


In [17]:
print(X_pileups[1])

[ 0  0 39  0  0]


## Model

In [3]:
input_shape = X_train.shape[1:]
num_output_classes = y_train.shape[1]

input_layer = Input(shape=input_shape)
conv_1 = Conv2D(filters=10, kernel_size=3, padding='same', activation='relu')(input_layer)
conv_2 = Conv2D(filters=10, kernel_size=3, padding='same', activation='relu')(conv_1)

flatten = Flatten()(conv_2)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

batch_size = 10000
epochs = 100
callbacks = [EarlyStopping(monitor='val_loss', patience=3)]

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate), callbacks=callbacks)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 41, 1, 5)          0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 41, 1, 10)         460       
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 41, 1, 10)         910       
_________________________________________________________________
flatten_1 (Flatten)          (None, 410)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 6)                 2466      
Total params: 3,836
Trainable params: 3,836
Non-trainable params: 0
_________________________________________________________________
None
Train on 27981697 samples, validate on 3109078 samples
Epoch 1/100
 3070000/27981697 [==>...........................] - ETA: 3:39 - los

KeyboardInterrupt: 