In [1]:
import numpy as np
import progressbar
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Input
from keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPool2D
from keras.callbacks import LearningRateScheduler, EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Conv1D


In [2]:
data = np.array([
    [20, 0, 12, 6, 5],
    [10, 0, 10, 4, 6],
    [21, 2, 11, 4, 4],
    [22, 0, 10, 3, 6],
    [20, 1, 10, 4, 5],
    [40, 0, 9, 4, 5],
    [41, 0, 10, 4, 4],
    [42, 4, 10, 6, 5],
    [39, 0, 10, 4, 5],
    [38, 9, 13, 3, 5],
])

labels = np.array([
    [1, 0],
    [1, 0],
    [1, 0],
    [1, 0],
    [1, 0],
    [0, 1],
    [0, 1],
    [0, 1],
    [0, 1],
    [0, 1],
])

input_shape = data.shape[0:]
# input_shape = (None, 5)
num_output_classes = labels.shape[1]

print('input_shape:', input_shape)
print('num_output_classes:', num_output_classes)

input_layer = Input(shape=input_shape)
conv_1 = Conv1D(filters=2, kernel_size=3, padding='same', activation='relu')(input_layer)
# pool_1 = MaxPool2D(pool_size=(2, 1))(conv_1)
# conv_2 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(pool_1)

flatten = Flatten()(conv_1)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


input_shape: (10, 5)
num_output_classes: 2
Instructions for updating:
`NHWC` for data_format is deprecated, use `NWC` instead


In [3]:
X = np.load('/home/ajuric/faks/sem4/diplomski/consensus-net/experiments/blade/morganela-morgani-NCTC235/pysam-dataset-n3-X.npy')
y = np.load('/home/ajuric/faks/sem4/diplomski/consensus-net/experiments/blade/morganela-morgani-NCTC235/pysam-dataset-n3-y.npy')

In [4]:
print(X.shape)
print(y.shape)

(3573946, 7, 4)
(3573946, 4)


In [5]:
input_shape = X.shape[1:]
# input_shape = (None, 5)
num_output_classes = y.shape[1]

print('input_shape:', input_shape)
print('num_output_classes:', num_output_classes)

input_layer = Input(shape=input_shape)
conv_1 = Conv1D(filters=2, kernel_size=2, padding='same', activation='relu')(input_layer)
# pool_1 = MaxPool2D(pool_size=(2, 1))(conv_1)
# conv_2 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(pool_1)

flatten = Flatten()(conv_1)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

input_shape: (7, 4)
num_output_classes: 4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 7, 4)              0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 7, 2)              18        
_________________________________________________________________
flatten_2 (Flatten)          (None, 14)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 60        
Total params: 78
Trainable params: 78
Non-trainable params: 0
_________________________________________________________________
None


In [6]:
batch_size = 1000
epochs = 5

In [7]:
model.fit(X, y, batch_size=batch_size, epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4e2afc1b00>

## Conv2D

In [8]:
input_shape = X.shape[1:]
# input_shape = (None, 5)
num_output_classes = y.shape[1]

print('input_shape:', input_shape)
print('num_output_classes:', num_output_classes)

input_layer = Input(shape=input_shape)
conv_1 = Conv2D(filters=2, kernel_size=2, padding='same', activation='relu')(input_layer)
# pool_1 = MaxPool2D(pool_size=(2, 1))(conv_1)
# conv_2 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(pool_1)

flatten = Flatten()(conv_1)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

input_shape: (7, 4)
num_output_classes: 4


ValueError: Input 0 is incompatible with layer conv2d_1: expected ndim=4, found ndim=3

In [29]:
batch_size = 1000
epochs = 5

In [30]:
model.fit(X, y, batch_size=batch_size, epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7fa10cb6fc50>

## Reshaping input

In [9]:
X_list, y_list = [X], [y
                      ]
reshaped_X_list, reshaped_y_list = list(), list()
for X, y in zip(X_list, y_list):
    print('X shape before reshaping:', X.shape)
    print('y shape before reshaping:', y.shape)

    new_X = list()
    neighbourhood_size = X[0].shape[0]
    # Number of columns is equal to the number of letters in dataset (A, C,
    # G, T, I, D, ...).
    num_columns = X[0].shape[1]
    num_data = X.shape[0]
    with progressbar.ProgressBar(max_value=num_data) as progress_bar:
        for i, xi in enumerate(X):
            new_xi = np.dstack(
                [xi[:, col_index].reshape(neighbourhood_size, 1)
                 for col_index in range(num_columns)]
            )
            new_X.append(new_xi)
            progress_bar.update(i)

    new_X = np.array(new_X)
    X = new_X
    print('X shape after reshaping:', X.shape)
    print('y shape after reshaping:', y.shape)

    reshaped_X_list.append(X), reshaped_y_list.append(y)

  0% (13519 of 3573946) |                 | Elapsed Time: 0:00:00 ETA:  0:00:53

X shape before reshaping: (3573946, 7, 4)
y shape before reshaping: (3573946, 4)


100% (3573946 of 3573946) |###############| Elapsed Time: 0:00:54 Time: 0:00:54


X shape after reshaping: (3573946, 7, 1, 4)
y shape after reshaping: (3573946, 4)


In [11]:
reshaped_X = reshaped_X_list[0]
reshaped_y = reshaped_y_list[0]

input_shape = reshaped_X.shape[1:]
# input_shape = (None, 5)
num_output_classes = reshaped_y.shape[1]

print('input_shape:', input_shape)
print('num_output_classes:', num_output_classes)

input_layer = Input(shape=input_shape)
conv_1 = Conv2D(filters=2, kernel_size=2, padding='same', activation='relu')(input_layer)
# pool_1 = MaxPool2D(pool_size=(2, 1))(conv_1)
# conv_2 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(pool_1)

flatten = Flatten()(conv_1)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
print(model.summary())

input_shape: (7, 1, 4)
num_output_classes: 4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 7, 1, 4)           0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 1, 2)           34        
_________________________________________________________________
flatten_4 (Flatten)          (None, 14)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 4)                 60        
Total params: 94
Trainable params: 94
Non-trainable params: 0
_________________________________________________________________
None


In [12]:
batch_size = 1000
epochs = 5

In [13]:
model.fit(X, y, batch_size=batch_size, epochs=epochs)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f4dec16efd0>

In [None]:
input_shape = X_train.shape[1:]
num_output_classes = y_train.shape[1]

input_layer = Input(shape=input_shape)
conv_1 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(input_layer)
pool_1 = MaxPool2D(pool_size=(2, 1))(conv_1)
conv_2 = Conv2D(filters=40, kernel_size=3, padding='same', activation='relu')(pool_1)

flatten = Flatten()(conv_2)
predictions = Dense(num_output_classes, activation='softmax')(flatten)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])