In [2]:
# from comet_ml import Experiment

# experiment = Experiment(api_key="oda8KKpxlDgWmJG5KsYrrhmIV", project_name="consensusnet")

import numpy as np
from keras.models import Model
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization, Input
from keras.layers import Conv1D, MaxPooling1D, Conv2D, MaxPooling2D, Dropout
from keras.callbacks import EarlyStopping, TensorBoard

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Using Sequence generator

In [2]:
import numpy as np
from keras.utils import Sequence


class PileupSequence(Sequence):
    """
    Class for creating batches while training.

    Instance of this class is provided in fit_generator() method for training.
    """

    def __init__(self, X_paths, classes, batch_size, num_classes):
        """
        :param X_paths: list of paths to data
        :type X_paths: list of str
        :param classes: labels for data
        :type classes: np.ndarray
        :param batch_size: size of training batch
        :type batch_size: int
        :param num_classes: number of labels classes
        :type num_classes: int
        """
        if not len(X_paths) == len(classes):
            raise ValueError('You must provide same number of Xs and ys! '
                             'Number of Xs given is {], and number of ys '
                             'given is {}'.format(len(X_paths), len(classes)))
        if batch_size < 1 or batch_size > len(X_paths):
            raise ValueError('Batch size must be positive number less than '
                             'number of Xs, but {} given.'.format(batch_size))

        self.X_paths, self.classes = X_paths, classes
        self.batch_size = batch_size
        self.num_classes = num_classes

    def __len__(self):
        return int(np.ceil(len(self.X_paths) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.X_paths[
                  idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.classes[
                  idx * self.batch_size:(idx + 1) * self.batch_size]

        return np.array([np.load(file_name) for file_name in batch_x]), \
            _to_categorical(batch_y, num_classes=self.num_classes)


def _to_categorical(y, num_classes=None):
    """
    Converts given labels to one-hot encoding (i.e. categorical).

    :param y:
    :type y: np.ndarray
    :param num_classes:
    :type num_classes: int
    :return: Labels one-hot encoded.
    :rtype: np.ndarray
    """
    if num_classes < 1:
        raise ValueError('Number of classes must be positive int, but {} '
                         'given.'.format(num_classes))
    if not len(y.shape) == 1:
        raise ValueError('y must be 1-D numpy array, but {} shape '
                         'given.'.format(y.shape))

    n = y.shape[0]

    if num_classes is None:
        num_classes = np.max(y)

    categorical = np.zeros((n, num_classes), dtype=np.uint8)
    categorical[np.arange(n), y] = 1
    return categorical

In [None]:
import os

classes = np.load('./dataset-classes-y.npy')
X_paths = [os.path.join('./X/', xi) for xi in os.listdir('./X/')]
batch_size = 1
num_classes = 4

print('done!')

In [10]:
batch_size = 100
generator = PileupSequence(X_paths, classes, batch_size, num_classes)

example_shape = (21, 1, 6)
input_layer = Input(shape=example_shape)

conv_1 = Conv2D(filters=16, kernel_size=3, padding='same', activation='relu')(input_layer)

conv_2 = Conv2D(filters=32, kernel_size=3, padding='same', activation='relu')(conv_1)

flatten = Flatten()(conv_2)
dense_1 = Dense(1042)(flatten)
dropout_1 = Dropout(0.25)(dense_1)
predictions = Dense(4, activation='softmax')(dropout_1)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

# batch_size = 10000
epochs = 3

# model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))
model.fit_generator(generator, epochs=epochs, use_multiprocessing=True, workers=12)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         (None, 21, 1, 6)          0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 21, 1, 16)         880       
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 21, 1, 32)         4640      
_________________________________________________________________
flatten_8 (Flatten)          (None, 672)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 1042)              701266    
_________________________________________________________________
dropout_8 (Dropout)          (None, 1042)              0         
_________________________________________________________________
dense_16 (Dense)             (None, 4)                 4172      
Total para

Process ForkPoolWorker-67:
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
Process ForkPoolWorker-69:
  File "/usr/local/lib/python3.5/dist-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]


KeyboardInterrupt: 

  File "<ipython-input-2-f8c5d0676194>", line 44, in __getitem__
    return np.array([np.load(file_name) for file_name in batch_x]),             _to_categorical(batch_y, num_classes=self.num_classes)
Process ForkPoolWorker-66:
  File "<ipython-input-2-f8c5d0676194>", line 44, in <listcomp>
    return np.array([np.load(file_name) for file_name in batch_x]),             _to_categorical(batch_y, num_classes=self.num_classes)
Traceback (most recent call last):
  File "/usr/local/lib/python3.5/dist-packages/numpy/lib/npyio.py", line 404, in load
    magic = fid.read(N)
KeyboardInterrupt
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in 

  File "/usr/local/lib/python3.5/dist-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
KeyboardInterrupt
  File "<ipython-input-2-f8c5d0676194>", line 44, in __getitem__
    return np.array([np.load(file_name) for file_name in batch_x]),             _to_categorical(batch_y, num_classes=self.num_classes)
Traceback (most recent call last):
  File "<ipython-input-2-f8c5d0676194>", line 44, in <listcomp>
    return np.array([np.load(file_name) for file_name in batch_x]),             _to_categorical(batch_y, num_classes=self.num_classes)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/numpy/lib/npyio.py", line 404, in load
    magic = fid.read(N)
  File "/usr/lib/python3.5/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
KeyboardInterrupt
  File "/usr/lib/python3.5/multiprocessing/pool.py", line 119, in worker

## Read all in RAM

In [3]:
X_train = np.load('./dataset-n10-X-reshaped-train.npy')
X_validate = np.load('./dataset-n10-X-reshaped-validate.npy')
y_train = np.load('./dataset-n10-y-reshaped-train.npy')
y_validate = np.load('./dataset-n10-y-reshaped-validate.npy')

In [6]:
example_shape = X_train.shape[1:]
input_layer = Input(shape=example_shape)

conv_1 = Conv2D(filters=5, kernel_size=3, padding='same', activation='relu')(input_layer)

conv_2 = Conv2D(filters=5, kernel_size=3, padding='same', activation='relu')(conv_1)

flatten = Flatten()(conv_2)
dense_1 = Dense(10)(flatten)
dropout_1 = Dropout(0.25)(dense_1)
predictions = Dense(4, activation='softmax')(dropout_1)

model = Model(input_layer, predictions)

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

print(model.summary())

batch_size = 50000
epochs = 5

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 21, 1, 6)          0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 21, 1, 5)          275       
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 21, 1, 5)          230       
_________________________________________________________________
flatten_4 (Flatten)          (None, 105)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 10)                1060      
_________________________________________________________________
dropout_4 (Dropout)          (None, 10)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 4)                 44        
Total para

<keras.callbacks.History at 0x7fd4433e36a0>

In [7]:
model.save('./model-slim.h5')

In [4]:
from keras.models import load_model

model = load_model('./model-slim.h5')

batch_size = 50000
epochs = 5

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))

Train on 15258174 samples, validate on 1695353 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f2cfa706ef0>

In [5]:
batch_size = 50000
epochs = 5

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))

Train on 15258174 samples, validate on 1695353 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f2ca855a710>

In [6]:
batch_size = 50000
epochs = 5

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))

Train on 15258174 samples, validate on 1695353 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f31ca786b38>

In [7]:
batch_size = 50000
epochs = 5

model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_validate, y_validate))

Train on 15258174 samples, validate on 1695353 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f311572b0f0>

In [8]:
model.save('./model-slim-better.h5')