From [https://machinelearningmastery.com/reproducible-results-neural-networks-keras/](https://machinelearningmastery.com/reproducible-results-neural-networks-keras/)

In [1]:
from pandas import DataFrame, concat
from keras.models import Sequential
from keras.layers import Dense, Activation

import numpy as np

Using TensorFlow backend.


In [2]:
# create sequence
length = 11
sequence = [i/ 10.0 for i in range(length)]
# create X/y pairs
df = DataFrame(sequence)
df = concat([df.shift(1), df], axis=1)
df.dropna(inplace=True)
# convert to MLPfriendly format
values = df.values
X, y = values[:,0], values[:,1]
X, y

(array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
 array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ]))

In [3]:
from sklearn.metrics import mean_squared_error

# design network
model = Sequential()
model.add(Dense(10, input_dim=1))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# fit network
#model.fit(X, y, epochs=1000, batch_size=len(X), verbose=0)
# forecast
#yhat = model.predict(X, verbose=0)
#print(mean_squared_error(y, yhat[:,0]))
#yhat

## Using `keras.utils.Sequence`

From [https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly](https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly)

In [4]:
partition = {}
partition['validation'] = [f'{(l/10.0):0.2}' for l in range(length - 1, length + 5)]
partition['train'] = [f'{l:0.2}' for l in X]
print(partition)

{'validation': ['1.0', '1.1', '1.2', '1.3', '1.4', '1.5'], 'train': ['0.0', '0.1', '0.2', '0.3', '0.4', '0.5', '0.6', '0.7', '0.8', '0.9']}


In [5]:
labels = { l:float(l)+0.1 for l in partition['validation'] + partition['train']}
print(labels)

{'1.0': 1.1, '1.1': 1.2000000000000002, '1.2': 1.3, '1.3': 1.4000000000000001, '1.4': 1.5, '1.5': 1.6, '0.0': 0.1, '0.1': 0.2, '0.2': 0.30000000000000004, '0.3': 0.4, '0.4': 0.5, '0.5': 0.6, '0.6': 0.7, '0.7': 0.7999999999999999, '0.8': 0.9, '0.9': 1.0}


In [6]:
from keras.utils import Sequence
import numpy as np


class DataGenerator(Sequence):

    def __init__(self, data_ids, labels, shuffle=True):
        """Initialization"""
        self.labels = labels
        self.batch_size = 5
        self.data_ids = data_ids
        self.shuffle = shuffle
        self.on_epoch_end()

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        self.indexes = np.arange(len(self.data_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, temporary_list_id):
        """Generates data containing batch_size sample"""
        # Initialization
        X = np.empty((self.batch_size))
        y = np.empty((self.batch_size))

        # Generate data
        for i, ID in enumerate(temporary_list_id):
            # Store sample
            X[i,] = self.__get_datapoint(ID)

            # Store class
            y[i] = self.labels[ID]

        return X, y

    def __get_datapoint(self, identifier):
        """Perform complicated logic here"""
        return float(identifier)

    def __len__(self):
        'Denotes the number of batches per epoch'
        number_of_batches = int(np.floor(len(self.data_ids) / self.batch_size))
        return number_of_batches

    def __getitem__(self, index):
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # Find list of IDs
        temp_ids = [self.data_ids[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(temp_ids)
        return X, y


In [7]:
training_generator = DataGenerator(partition['train'], labels)
validation_generator = DataGenerator(partition['validation'], labels)

In [9]:
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator, epochs=1000, verbose=0)

yhat = model.predict(X, verbose=0)
print(mean_squared_error(y, yhat[:,0]))
yhat

7.960299093427971e-16


array([[0.1       ],
       [0.19999999],
       [0.3       ],
       [0.40000004],
       [0.5       ],
       [0.59999996],
       [0.70000005],
       [0.79999995],
       [0.9       ],
       [1.        ]], dtype=float32)