# Custom Keras Generator
This notebook outlines how to build custom Keras generators. These are used to feed in more complex data into models with potentially complex processing, such as image augmentation. It also allows for multiple datasets to be fed into the model for training and validation.

In [22]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow

# Load entire dataset
X = np.linspace(0,100, 10000)
y = np.sin(X)

### Single Input/Output Dataset

In [28]:
def data_generator(X,y, batch_size = 64):
    
    """
    Overview
    --------
    Simple Keras data generating function.
    
    
    Inputs
    ------
    X (numpy.ndarray): NumPy array of training data, with number
        of samples in the first dimension.
    y (numpy.ndarray): NumPy array of target data, with number
        of samples in the first dimension.
    batch_size (int, optional): Batch size.
    
    
    Returns
    -------
    Subset of X and y of size batch_size.
    """
    
    X_length = len(X)
    
    while True:
        # Select files (paths/indices) for the batch
        batch_idx = np.random.randint(0, X_length, (batch_size))
        
        # Return a tuple of (input,output) to feed the network
        batch_x = np.array(X[batch_idx])
        batch_y = np.array(y[batch_idx])
        
        yield( batch_x, batch_y )

In [27]:
# Design model
model = Sequential()
model.add(Dense(10))
model.add(Dense(1))
model.compile(loss = 'mean_squared_error')

In [25]:
model.fit_generator(image_generator(X, y, 32), steps_per_epoch = len(X)//64, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

KeyboardInterrupt: 

### Multiple Input/Output Datasets

In [29]:
def multi_input_data_generator(X,y, batch_size = 64):
    
    """
    Overview
    --------
    Simple Keras data generating function.
    
    
    Inputs
    ------
    X (numpy.ndarray): NumPy array of training data, with number
        of samples in the first dimension.
    y (numpy.ndarray): NumPy array of target data, with number
        of samples in the first dimension.
    batch_size (int, optional): Batch size.
    
    
    Returns
    -------
    Subset of X and y of size batch_size.
    """
    
    X_length = len(X)
    
    while True:
        # Select files (paths/indices) for the batch
        batch_idx = np.random.randint(0, X_length, (batch_size))
        
        # Return a tuple of (input,output) to feed the network
        batch_x = np.array(X[batch_idx])
        batch_y = np.array(y[batch_idx])
        
        yield( [batch_x, batch_x], batch_y )

In [32]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model

# This returns a tensor
inputs_main = Input(shape=(1,))
inputs_aux = Input(shape=(1,))

output_1_main = Dense(1, activation='relu')(inputs_main)
output_2_main = Dense(1, activation='relu')(output_1_main)

output_1_aux = Dense(1, activation='relu')(inputs_aux)
output_2_aux = Dense(1, activation='relu')(output_1_aux)

added = tensorflow.keras.layers.Add()([output_2_main, output_2_aux])

predictions = Dense(1)(added)

model = Model(inputs=[inputs_main, inputs_aux], outputs=predictions)

model.compile(optimizer='rmsprop',
              loss='mean_squared_error')

model.fit_generator(multi_input_data_generator(X, y, 32), steps_per_epoch = len(X)//64, epochs = 5)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f439c2d5470>

We assume that the data is in the format (time step, feature)

the data will be in the format (sample, time_step, feature) after processing.

In [34]:
def time_series_data_generator(X,y, batch_size = 64):
    
    """
    Overview
    --------
    Simple Keras data generating function.
    
    
    Inputs
    ------
    X (numpy.ndarray): NumPy array of training data, with number
        of samples in the first dimension.
    y (numpy.ndarray): NumPy array of target data, with number
        of samples in the first dimension.
    batch_size (int, optional): Batch size.
    
    
    Returns
    -------
    Subset of X and y of size batch_size.
    """
    
    X_length = len(X)
    
    while True:
        # Select files (paths/indices) for the batch
        batch_idx = np.random.randint(0, X_length, (batch_size))
        
        # Return a tuple of (input,output) to feed the network
        batch_x = np.array(X[batch_idx])
        batch_y = np.array(y[batch_idx])
        
        yield( [batch_x, batch_x], batch_y )
        
    
def generate_lags(data, lags, horizon, batch_size):
    
    # Need to start indexing from lags
    data_idx = np.random.randint(lags, len(data) - horizon)
    
    data_lags = data[data_idx - lags: data_idx]
    targets = data[data_idx:data_idx + horizon]
    
    return data_lags, targets
    

In [139]:
class time_series_generator:
    
    """
    Generates batches of time series data suitable for Keras.
    
    Keras requires time series data to be fed in with the format 
    (batch_size, time_lags, features). This generator automatically
    reshapes tabular data. It also adds sinusoidal waves to give the
    neural net some concepts of where in the period we are located,
    as neural networks do not model periodicity very well.
    
    Handles multivariable inputs and outputs.
    """
    
    def __init__(self, X, y, lags, horizon, batch_size, period):
        
        "Initialise generator class."
        
        self.lags = lags
        self.horizon = horizon
        self.batch_size = batch_size
        self.X = X
        self.y = y
        self.period = period
        
        self.sin_period, self.cos_period = self.calc_sinusoidal_time(self.period)
        
    def calc_sinusoidal_time(self, period):
        
        """
        Encodes periodicity of the data into sinusoidal features.
        
        
        Inputs
        ------
        period (int): Periodicity of the data.
        
        
        Returns
        -------
        A pair of sine and cosine wave with periodicity equal to
            the passed value of period.
        """
        
        t = np.arange(len(self.X)) % period
        wave_arg = 2*np.pi*t/np.max(period)
        
        return np.sin(wave_arg), np.cos(wave_arg)
    
    def flow(self, X, y):
        
        """
        Generates a list of input data and target data.
        
        The input data list contains the reshaped time series data
        and another dataset containing sinusoidal features.
        """
        
        while True:
        
            input_batch_1 = []
            sinusoidal_batch = []
            targets_batch = []
    
            for i in range(self.batch_size):
                # Need to start indexing from lags
                data_idx = np.random.randint(self.lags, len(X) - self.horizon)
    
                data_lags = X[data_idx - lags: data_idx]
                input_batch_1.append(data_lags)
            
                targets = y[data_idx:data_idx + self.horizon]
                targets_batch.append(targets) 
        
                sinusoidal_feature = np.concatenate([self.sin_period[data_idx - self.lags: data_idx], 
                                                 self.cos_period[data_idx - self.lags: data_idx]])
            
                sinusoidal_batch.append(sinusoidal_feature)
            
            input_batch_1 = np.reshape(input_batch_1, (self.batch_size, self.lags, -1))
            sinusoidal_batch = np.reshape(sinusoidal_batch, (self.batch_size, self.lags * 2, -1))
            targets_batch = np.reshape(targets_batch, (self.batch_size, -1))
    
            yield [input_batch_1, sinusoidal_batch], targets_batch

In [151]:
lags = 5
horizon = 3
batch_size =  32
period = 12
features = 2

# Generate data

X = np.random.normal(0,1,(10000,features))
y = np.sin(np.random.normal(0,1,(10000))) + np.random.normal(0,0.4, (10000))

data_gen = time_series_generator(X, y, lags, horizon, batch_size, period)

In [152]:
from tensorflow.keras.layers import Input, Flatten
from tensorflow.keras.models import Model

# This returns a tensor
inputs_main = Input(shape=(lags,features))
inputs_aux = Input(shape=(lags * 2, 1))

output_1_main = Dense(1, activation='relu')(inputs_main)
output_2_main = Dense(1, activation='relu')(output_1_main)

output_1_aux = Dense(1, activation='relu')(inputs_aux)
output_2_aux = Dense(1, activation='relu')(output_1_aux)

concat = tensorflow.keras.layers.Concatenate(axis=1)([output_2_main, output_2_aux])

flatten = Flatten()(concat)
dense1 = Dense(5, activation='relu')(flatten)

predictions = Dense(horizon)(dense1)

model = Model(inputs=[inputs_main, inputs_aux], outputs=predictions)

model.compile(optimizer='rmsprop',
              loss='mean_squared_error')

model.fit_generator(data_gen.flow(X, y), steps_per_epoch = len(X)//64, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f425cbc5fd0>